xref: /freebsd-src/sys/contrib/openzfs/cmd/ztest.c (revision 87bf66d4a7488c496af110d4d05cc0273d49f82e)
1716fd348SMartin Matuska /*
2716fd348SMartin Matuska  * CDDL HEADER START
3716fd348SMartin Matuska  *
4716fd348SMartin Matuska  * The contents of this file are subject to the terms of the
5716fd348SMartin Matuska  * Common Development and Distribution License (the "License").
6716fd348SMartin Matuska  * You may not use this file except in compliance with the License.
7716fd348SMartin Matuska  *
8716fd348SMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10716fd348SMartin Matuska  * See the License for the specific language governing permissions
11716fd348SMartin Matuska  * and limitations under the License.
12716fd348SMartin Matuska  *
13716fd348SMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14716fd348SMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15716fd348SMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16716fd348SMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17716fd348SMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18716fd348SMartin Matuska  *
19716fd348SMartin Matuska  * CDDL HEADER END
20716fd348SMartin Matuska  */
21716fd348SMartin Matuska /*
22716fd348SMartin Matuska  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23783d3ff6SMartin Matuska  * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
24716fd348SMartin Matuska  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
25716fd348SMartin Matuska  * Copyright (c) 2013 Steven Hartland. All rights reserved.
26716fd348SMartin Matuska  * Copyright (c) 2014 Integros [integros.com]
27716fd348SMartin Matuska  * Copyright 2017 Joyent, Inc.
28716fd348SMartin Matuska  * Copyright (c) 2017, Intel Corporation.
29ce4dcb97SMartin Matuska  * Copyright (c) 2023, Klara, Inc.
30716fd348SMartin Matuska  */
31716fd348SMartin Matuska 
32716fd348SMartin Matuska /*
33716fd348SMartin Matuska  * The objective of this program is to provide a DMU/ZAP/SPA stress test
34716fd348SMartin Matuska  * that runs entirely in userland, is easy to use, and easy to extend.
35716fd348SMartin Matuska  *
36716fd348SMartin Matuska  * The overall design of the ztest program is as follows:
37716fd348SMartin Matuska  *
38716fd348SMartin Matuska  * (1) For each major functional area (e.g. adding vdevs to a pool,
39716fd348SMartin Matuska  *     creating and destroying datasets, reading and writing objects, etc)
40716fd348SMartin Matuska  *     we have a simple routine to test that functionality.  These
41716fd348SMartin Matuska  *     individual routines do not have to do anything "stressful".
42716fd348SMartin Matuska  *
43716fd348SMartin Matuska  * (2) We turn these simple functionality tests into a stress test by
44716fd348SMartin Matuska  *     running them all in parallel, with as many threads as desired,
45716fd348SMartin Matuska  *     and spread across as many datasets, objects, and vdevs as desired.
46716fd348SMartin Matuska  *
47716fd348SMartin Matuska  * (3) While all this is happening, we inject faults into the pool to
48716fd348SMartin Matuska  *     verify that self-healing data really works.
49716fd348SMartin Matuska  *
50716fd348SMartin Matuska  * (4) Every time we open a dataset, we change its checksum and compression
51716fd348SMartin Matuska  *     functions.  Thus even individual objects vary from block to block
52716fd348SMartin Matuska  *     in which checksum they use and whether they're compressed.
53716fd348SMartin Matuska  *
54716fd348SMartin Matuska  * (5) To verify that we never lose on-disk consistency after a crash,
55716fd348SMartin Matuska  *     we run the entire test in a child of the main process.
56716fd348SMartin Matuska  *     At random times, the child self-immolates with a SIGKILL.
57716fd348SMartin Matuska  *     This is the software equivalent of pulling the power cord.
58716fd348SMartin Matuska  *     The parent then runs the test again, using the existing
59716fd348SMartin Matuska  *     storage pool, as many times as desired. If backwards compatibility
60716fd348SMartin Matuska  *     testing is enabled ztest will sometimes run the "older" version
61716fd348SMartin Matuska  *     of ztest after a SIGKILL.
62716fd348SMartin Matuska  *
63716fd348SMartin Matuska  * (6) To verify that we don't have future leaks or temporal incursions,
64716fd348SMartin Matuska  *     many of the functional tests record the transaction group number
65716fd348SMartin Matuska  *     as part of their data.  When reading old data, they verify that
66716fd348SMartin Matuska  *     the transaction group number is less than the current, open txg.
67716fd348SMartin Matuska  *     If you add a new test, please do this if applicable.
68716fd348SMartin Matuska  *
69716fd348SMartin Matuska  * (7) Threads are created with a reduced stack size, for sanity checking.
70716fd348SMartin Matuska  *     Therefore, it's important not to allocate huge buffers on the stack.
71716fd348SMartin Matuska  *
72716fd348SMartin Matuska  * When run with no arguments, ztest runs for about five minutes and
73716fd348SMartin Matuska  * produces no output if successful.  To get a little bit of information,
74716fd348SMartin Matuska  * specify -V.  To get more information, specify -VV, and so on.
75716fd348SMartin Matuska  *
76716fd348SMartin Matuska  * To turn this into an overnight stress test, use -T to specify run time.
77716fd348SMartin Matuska  *
78716fd348SMartin Matuska  * You can ask more vdevs [-v], datasets [-d], or threads [-t]
79716fd348SMartin Matuska  * to increase the pool capacity, fanout, and overall stress level.
80716fd348SMartin Matuska  *
81716fd348SMartin Matuska  * Use the -k option to set the desired frequency of kills.
82716fd348SMartin Matuska  *
83716fd348SMartin Matuska  * When ztest invokes itself it passes all relevant information through a
84716fd348SMartin Matuska  * temporary file which is mmap-ed in the child process. This allows shared
85716fd348SMartin Matuska  * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
86716fd348SMartin Matuska  * stored at offset 0 of this file and contains information on the size and
87716fd348SMartin Matuska  * number of shared structures in the file. The information stored in this file
88716fd348SMartin Matuska  * must remain backwards compatible with older versions of ztest so that
89716fd348SMartin Matuska  * ztest can invoke them during backwards compatibility testing (-B).
90716fd348SMartin Matuska  */
91716fd348SMartin Matuska 
92716fd348SMartin Matuska #include <sys/zfs_context.h>
93716fd348SMartin Matuska #include <sys/spa.h>
94716fd348SMartin Matuska #include <sys/dmu.h>
95716fd348SMartin Matuska #include <sys/txg.h>
96716fd348SMartin Matuska #include <sys/dbuf.h>
97716fd348SMartin Matuska #include <sys/zap.h>
98716fd348SMartin Matuska #include <sys/dmu_objset.h>
99716fd348SMartin Matuska #include <sys/poll.h>
100716fd348SMartin Matuska #include <sys/stat.h>
101716fd348SMartin Matuska #include <sys/time.h>
102716fd348SMartin Matuska #include <sys/wait.h>
103716fd348SMartin Matuska #include <sys/mman.h>
104716fd348SMartin Matuska #include <sys/resource.h>
105716fd348SMartin Matuska #include <sys/zio.h>
106716fd348SMartin Matuska #include <sys/zil.h>
107716fd348SMartin Matuska #include <sys/zil_impl.h>
108716fd348SMartin Matuska #include <sys/vdev_draid.h>
109716fd348SMartin Matuska #include <sys/vdev_impl.h>
110716fd348SMartin Matuska #include <sys/vdev_file.h>
111716fd348SMartin Matuska #include <sys/vdev_initialize.h>
112716fd348SMartin Matuska #include <sys/vdev_raidz.h>
113716fd348SMartin Matuska #include <sys/vdev_trim.h>
114716fd348SMartin Matuska #include <sys/spa_impl.h>
115716fd348SMartin Matuska #include <sys/metaslab_impl.h>
116716fd348SMartin Matuska #include <sys/dsl_prop.h>
117716fd348SMartin Matuska #include <sys/dsl_dataset.h>
118716fd348SMartin Matuska #include <sys/dsl_destroy.h>
119716fd348SMartin Matuska #include <sys/dsl_scan.h>
120716fd348SMartin Matuska #include <sys/zio_checksum.h>
121716fd348SMartin Matuska #include <sys/zfs_refcount.h>
122716fd348SMartin Matuska #include <sys/zfeature.h>
123716fd348SMartin Matuska #include <sys/dsl_userhold.h>
124716fd348SMartin Matuska #include <sys/abd.h>
1251f1e2261SMartin Matuska #include <sys/blake3.h>
126716fd348SMartin Matuska #include <stdio.h>
127716fd348SMartin Matuska #include <stdlib.h>
128716fd348SMartin Matuska #include <unistd.h>
129716fd348SMartin Matuska #include <getopt.h>
130716fd348SMartin Matuska #include <signal.h>
131716fd348SMartin Matuska #include <umem.h>
132716fd348SMartin Matuska #include <ctype.h>
133716fd348SMartin Matuska #include <math.h>
134716fd348SMartin Matuska #include <sys/fs/zfs.h>
135716fd348SMartin Matuska #include <zfs_fletcher.h>
136716fd348SMartin Matuska #include <libnvpair.h>
137716fd348SMartin Matuska #include <libzutil.h>
138716fd348SMartin Matuska #include <sys/crypto/icp.h>
1392a58b312SMartin Matuska #include <sys/zfs_impl.h>
140aca928a5SMartin Matuska #include <sys/backtrace.h>
141716fd348SMartin Matuska 
142716fd348SMartin Matuska static int ztest_fd_data = -1;
143716fd348SMartin Matuska static int ztest_fd_rand = -1;
144716fd348SMartin Matuska 
145716fd348SMartin Matuska typedef struct ztest_shared_hdr {
146716fd348SMartin Matuska 	uint64_t	zh_hdr_size;
147716fd348SMartin Matuska 	uint64_t	zh_opts_size;
148716fd348SMartin Matuska 	uint64_t	zh_size;
149716fd348SMartin Matuska 	uint64_t	zh_stats_size;
150716fd348SMartin Matuska 	uint64_t	zh_stats_count;
151716fd348SMartin Matuska 	uint64_t	zh_ds_size;
152716fd348SMartin Matuska 	uint64_t	zh_ds_count;
153e716630dSMartin Matuska 	uint64_t	zh_scratch_state_size;
154716fd348SMartin Matuska } ztest_shared_hdr_t;
155716fd348SMartin Matuska 
156716fd348SMartin Matuska static ztest_shared_hdr_t *ztest_shared_hdr;
157716fd348SMartin Matuska 
158716fd348SMartin Matuska enum ztest_class_state {
159716fd348SMartin Matuska 	ZTEST_VDEV_CLASS_OFF,
160716fd348SMartin Matuska 	ZTEST_VDEV_CLASS_ON,
161716fd348SMartin Matuska 	ZTEST_VDEV_CLASS_RND
162716fd348SMartin Matuska };
163716fd348SMartin Matuska 
164e716630dSMartin Matuska /* Dedicated RAIDZ Expansion test states */
165e716630dSMartin Matuska typedef enum {
166e716630dSMartin Matuska 	RAIDZ_EXPAND_NONE,		/* Default is none, must opt-in	*/
167e716630dSMartin Matuska 	RAIDZ_EXPAND_REQUESTED,		/* The '-X' option was used	*/
168e716630dSMartin Matuska 	RAIDZ_EXPAND_STARTED,		/* Testing has commenced	*/
169e716630dSMartin Matuska 	RAIDZ_EXPAND_KILLED,		/* Reached the proccess kill	*/
170e716630dSMartin Matuska 	RAIDZ_EXPAND_CHECKED,		/* Pool scrub verification done	*/
171e716630dSMartin Matuska } raidz_expand_test_state_t;
172e716630dSMartin Matuska 
173e716630dSMartin Matuska 
174716fd348SMartin Matuska #define	ZO_GVARS_MAX_ARGLEN	((size_t)64)
175716fd348SMartin Matuska #define	ZO_GVARS_MAX_COUNT	((size_t)10)
176716fd348SMartin Matuska 
177716fd348SMartin Matuska typedef struct ztest_shared_opts {
178716fd348SMartin Matuska 	char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
179716fd348SMartin Matuska 	char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
180716fd348SMartin Matuska 	char zo_alt_ztest[MAXNAMELEN];
181716fd348SMartin Matuska 	char zo_alt_libpath[MAXNAMELEN];
182716fd348SMartin Matuska 	uint64_t zo_vdevs;
183716fd348SMartin Matuska 	uint64_t zo_vdevtime;
184716fd348SMartin Matuska 	size_t zo_vdev_size;
185716fd348SMartin Matuska 	int zo_ashift;
186716fd348SMartin Matuska 	int zo_mirrors;
187e716630dSMartin Matuska 	int zo_raid_do_expand;
188716fd348SMartin Matuska 	int zo_raid_children;
189716fd348SMartin Matuska 	int zo_raid_parity;
190716fd348SMartin Matuska 	char zo_raid_type[8];
191716fd348SMartin Matuska 	int zo_draid_data;
192716fd348SMartin Matuska 	int zo_draid_spares;
193716fd348SMartin Matuska 	int zo_datasets;
194716fd348SMartin Matuska 	int zo_threads;
195716fd348SMartin Matuska 	uint64_t zo_passtime;
196716fd348SMartin Matuska 	uint64_t zo_killrate;
197716fd348SMartin Matuska 	int zo_verbose;
198716fd348SMartin Matuska 	int zo_init;
199716fd348SMartin Matuska 	uint64_t zo_time;
200716fd348SMartin Matuska 	uint64_t zo_maxloops;
201716fd348SMartin Matuska 	uint64_t zo_metaslab_force_ganging;
202e716630dSMartin Matuska 	raidz_expand_test_state_t zo_raidz_expand_test;
203716fd348SMartin Matuska 	int zo_mmp_test;
204716fd348SMartin Matuska 	int zo_special_vdevs;
205716fd348SMartin Matuska 	int zo_dump_dbgmsg;
206716fd348SMartin Matuska 	int zo_gvars_count;
207716fd348SMartin Matuska 	char zo_gvars[ZO_GVARS_MAX_COUNT][ZO_GVARS_MAX_ARGLEN];
208716fd348SMartin Matuska } ztest_shared_opts_t;
209716fd348SMartin Matuska 
210716fd348SMartin Matuska /* Default values for command line options. */
211716fd348SMartin Matuska #define	DEFAULT_POOL "ztest"
212716fd348SMartin Matuska #define	DEFAULT_VDEV_DIR "/tmp"
213716fd348SMartin Matuska #define	DEFAULT_VDEV_COUNT 5
214716fd348SMartin Matuska #define	DEFAULT_VDEV_SIZE (SPA_MINDEVSIZE * 4)	/* 256m default size */
215716fd348SMartin Matuska #define	DEFAULT_VDEV_SIZE_STR "256M"
216716fd348SMartin Matuska #define	DEFAULT_ASHIFT SPA_MINBLOCKSHIFT
217716fd348SMartin Matuska #define	DEFAULT_MIRRORS 2
218716fd348SMartin Matuska #define	DEFAULT_RAID_CHILDREN 4
219716fd348SMartin Matuska #define	DEFAULT_RAID_PARITY 1
220716fd348SMartin Matuska #define	DEFAULT_DRAID_DATA 4
221716fd348SMartin Matuska #define	DEFAULT_DRAID_SPARES 1
222716fd348SMartin Matuska #define	DEFAULT_DATASETS_COUNT 7
223716fd348SMartin Matuska #define	DEFAULT_THREADS 23
224716fd348SMartin Matuska #define	DEFAULT_RUN_TIME 300 /* 300 seconds */
225716fd348SMartin Matuska #define	DEFAULT_RUN_TIME_STR "300 sec"
226716fd348SMartin Matuska #define	DEFAULT_PASS_TIME 60 /* 60 seconds */
227716fd348SMartin Matuska #define	DEFAULT_PASS_TIME_STR "60 sec"
228716fd348SMartin Matuska #define	DEFAULT_KILL_RATE 70 /* 70% kill rate */
229716fd348SMartin Matuska #define	DEFAULT_KILLRATE_STR "70%"
230716fd348SMartin Matuska #define	DEFAULT_INITS 1
231716fd348SMartin Matuska #define	DEFAULT_MAX_LOOPS 50 /* 5 minutes */
232716fd348SMartin Matuska #define	DEFAULT_FORCE_GANGING (64 << 10)
233716fd348SMartin Matuska #define	DEFAULT_FORCE_GANGING_STR "64K"
234716fd348SMartin Matuska 
235716fd348SMartin Matuska /* Simplifying assumption: -1 is not a valid default. */
236716fd348SMartin Matuska #define	NO_DEFAULT -1
237716fd348SMartin Matuska 
238716fd348SMartin Matuska static const ztest_shared_opts_t ztest_opts_defaults = {
239716fd348SMartin Matuska 	.zo_pool = DEFAULT_POOL,
240716fd348SMartin Matuska 	.zo_dir = DEFAULT_VDEV_DIR,
241716fd348SMartin Matuska 	.zo_alt_ztest = { '\0' },
242716fd348SMartin Matuska 	.zo_alt_libpath = { '\0' },
243716fd348SMartin Matuska 	.zo_vdevs = DEFAULT_VDEV_COUNT,
244716fd348SMartin Matuska 	.zo_ashift = DEFAULT_ASHIFT,
245716fd348SMartin Matuska 	.zo_mirrors = DEFAULT_MIRRORS,
246716fd348SMartin Matuska 	.zo_raid_children = DEFAULT_RAID_CHILDREN,
247716fd348SMartin Matuska 	.zo_raid_parity = DEFAULT_RAID_PARITY,
248716fd348SMartin Matuska 	.zo_raid_type = VDEV_TYPE_RAIDZ,
249716fd348SMartin Matuska 	.zo_vdev_size = DEFAULT_VDEV_SIZE,
250716fd348SMartin Matuska 	.zo_draid_data = DEFAULT_DRAID_DATA,	/* data drives */
251716fd348SMartin Matuska 	.zo_draid_spares = DEFAULT_DRAID_SPARES, /* distributed spares */
252716fd348SMartin Matuska 	.zo_datasets = DEFAULT_DATASETS_COUNT,
253716fd348SMartin Matuska 	.zo_threads = DEFAULT_THREADS,
254716fd348SMartin Matuska 	.zo_passtime = DEFAULT_PASS_TIME,
255716fd348SMartin Matuska 	.zo_killrate = DEFAULT_KILL_RATE,
256716fd348SMartin Matuska 	.zo_verbose = 0,
257716fd348SMartin Matuska 	.zo_mmp_test = 0,
258716fd348SMartin Matuska 	.zo_init = DEFAULT_INITS,
259716fd348SMartin Matuska 	.zo_time = DEFAULT_RUN_TIME,
260716fd348SMartin Matuska 	.zo_maxloops = DEFAULT_MAX_LOOPS, /* max loops during spa_freeze() */
261716fd348SMartin Matuska 	.zo_metaslab_force_ganging = DEFAULT_FORCE_GANGING,
262716fd348SMartin Matuska 	.zo_special_vdevs = ZTEST_VDEV_CLASS_RND,
263716fd348SMartin Matuska 	.zo_gvars_count = 0,
264e716630dSMartin Matuska 	.zo_raidz_expand_test = RAIDZ_EXPAND_NONE,
265716fd348SMartin Matuska };
266716fd348SMartin Matuska 
267716fd348SMartin Matuska extern uint64_t metaslab_force_ganging;
268716fd348SMartin Matuska extern uint64_t metaslab_df_alloc_threshold;
269dbd5678dSMartin Matuska extern uint64_t zfs_deadman_synctime_ms;
270be181ee2SMartin Matuska extern uint_t metaslab_preload_limit;
271716fd348SMartin Matuska extern int zfs_compressed_arc_enabled;
272716fd348SMartin Matuska extern int zfs_abd_scatter_enabled;
273be181ee2SMartin Matuska extern uint_t dmu_object_alloc_chunk_shift;
274716fd348SMartin Matuska extern boolean_t zfs_force_some_double_word_sm_entries;
275716fd348SMartin Matuska extern unsigned long zio_decompress_fail_fraction;
276716fd348SMartin Matuska extern unsigned long zfs_reconstruct_indirect_damage_fraction;
277e716630dSMartin Matuska extern uint64_t raidz_expand_max_reflow_bytes;
278e716630dSMartin Matuska extern uint_t raidz_expand_pause_point;
279e2df9bb4SMartin Matuska extern boolean_t ddt_prune_artificial_age;
280e2df9bb4SMartin Matuska extern boolean_t ddt_dump_prune_histogram;
281716fd348SMartin Matuska 
282716fd348SMartin Matuska 
283716fd348SMartin Matuska static ztest_shared_opts_t *ztest_shared_opts;
284716fd348SMartin Matuska static ztest_shared_opts_t ztest_opts;
285a0b956f5SMartin Matuska static const char *const ztest_wkeydata = "abcdefghijklmnopqrstuvwxyz012345";
286716fd348SMartin Matuska 
287716fd348SMartin Matuska typedef struct ztest_shared_ds {
288716fd348SMartin Matuska 	uint64_t	zd_seq;
289716fd348SMartin Matuska } ztest_shared_ds_t;
290716fd348SMartin Matuska 
291716fd348SMartin Matuska static ztest_shared_ds_t *ztest_shared_ds;
292716fd348SMartin Matuska #define	ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
293716fd348SMartin Matuska 
294e716630dSMartin Matuska typedef struct ztest_scratch_state {
295e716630dSMartin Matuska 	uint64_t	zs_raidz_scratch_verify_pause;
296e716630dSMartin Matuska } ztest_shared_scratch_state_t;
297e716630dSMartin Matuska 
298e716630dSMartin Matuska static ztest_shared_scratch_state_t *ztest_scratch_state;
299e716630dSMartin Matuska 
300716fd348SMartin Matuska #define	BT_MAGIC	0x123456789abcdefULL
301716fd348SMartin Matuska #define	MAXFAULTS(zs) \
302716fd348SMartin Matuska 	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1)
303716fd348SMartin Matuska 
304716fd348SMartin Matuska enum ztest_io_type {
305716fd348SMartin Matuska 	ZTEST_IO_WRITE_TAG,
306716fd348SMartin Matuska 	ZTEST_IO_WRITE_PATTERN,
307716fd348SMartin Matuska 	ZTEST_IO_WRITE_ZEROES,
308716fd348SMartin Matuska 	ZTEST_IO_TRUNCATE,
309716fd348SMartin Matuska 	ZTEST_IO_SETATTR,
310716fd348SMartin Matuska 	ZTEST_IO_REWRITE,
311716fd348SMartin Matuska 	ZTEST_IO_TYPES
312716fd348SMartin Matuska };
313716fd348SMartin Matuska 
314716fd348SMartin Matuska typedef struct ztest_block_tag {
315716fd348SMartin Matuska 	uint64_t	bt_magic;
316716fd348SMartin Matuska 	uint64_t	bt_objset;
317716fd348SMartin Matuska 	uint64_t	bt_object;
318716fd348SMartin Matuska 	uint64_t	bt_dnodesize;
319716fd348SMartin Matuska 	uint64_t	bt_offset;
320716fd348SMartin Matuska 	uint64_t	bt_gen;
321716fd348SMartin Matuska 	uint64_t	bt_txg;
322716fd348SMartin Matuska 	uint64_t	bt_crtxg;
323716fd348SMartin Matuska } ztest_block_tag_t;
324716fd348SMartin Matuska 
325716fd348SMartin Matuska typedef struct bufwad {
326716fd348SMartin Matuska 	uint64_t	bw_index;
327716fd348SMartin Matuska 	uint64_t	bw_txg;
328716fd348SMartin Matuska 	uint64_t	bw_data;
329716fd348SMartin Matuska } bufwad_t;
330716fd348SMartin Matuska 
331716fd348SMartin Matuska /*
332716fd348SMartin Matuska  * It would be better to use a rangelock_t per object.  Unfortunately
333716fd348SMartin Matuska  * the rangelock_t is not a drop-in replacement for rl_t, because we
334716fd348SMartin Matuska  * still need to map from object ID to rangelock_t.
335716fd348SMartin Matuska  */
336716fd348SMartin Matuska typedef enum {
337e716630dSMartin Matuska 	ZTRL_READER,
338e716630dSMartin Matuska 	ZTRL_WRITER,
339e716630dSMartin Matuska 	ZTRL_APPEND
340716fd348SMartin Matuska } rl_type_t;
341716fd348SMartin Matuska 
342716fd348SMartin Matuska typedef struct rll {
343716fd348SMartin Matuska 	void		*rll_writer;
344716fd348SMartin Matuska 	int		rll_readers;
345716fd348SMartin Matuska 	kmutex_t	rll_lock;
346716fd348SMartin Matuska 	kcondvar_t	rll_cv;
347716fd348SMartin Matuska } rll_t;
348716fd348SMartin Matuska 
349716fd348SMartin Matuska typedef struct rl {
350716fd348SMartin Matuska 	uint64_t	rl_object;
351716fd348SMartin Matuska 	uint64_t	rl_offset;
352716fd348SMartin Matuska 	uint64_t	rl_size;
353716fd348SMartin Matuska 	rll_t		*rl_lock;
354716fd348SMartin Matuska } rl_t;
355716fd348SMartin Matuska 
356716fd348SMartin Matuska #define	ZTEST_RANGE_LOCKS	64
357716fd348SMartin Matuska #define	ZTEST_OBJECT_LOCKS	64
358716fd348SMartin Matuska 
359716fd348SMartin Matuska /*
360716fd348SMartin Matuska  * Object descriptor.  Used as a template for object lookup/create/remove.
361716fd348SMartin Matuska  */
362716fd348SMartin Matuska typedef struct ztest_od {
363716fd348SMartin Matuska 	uint64_t	od_dir;
364716fd348SMartin Matuska 	uint64_t	od_object;
365716fd348SMartin Matuska 	dmu_object_type_t od_type;
366716fd348SMartin Matuska 	dmu_object_type_t od_crtype;
367716fd348SMartin Matuska 	uint64_t	od_blocksize;
368716fd348SMartin Matuska 	uint64_t	od_crblocksize;
369716fd348SMartin Matuska 	uint64_t	od_crdnodesize;
370716fd348SMartin Matuska 	uint64_t	od_gen;
371716fd348SMartin Matuska 	uint64_t	od_crgen;
372716fd348SMartin Matuska 	char		od_name[ZFS_MAX_DATASET_NAME_LEN];
373716fd348SMartin Matuska } ztest_od_t;
374716fd348SMartin Matuska 
375716fd348SMartin Matuska /*
376716fd348SMartin Matuska  * Per-dataset state.
377716fd348SMartin Matuska  */
378716fd348SMartin Matuska typedef struct ztest_ds {
379716fd348SMartin Matuska 	ztest_shared_ds_t *zd_shared;
380716fd348SMartin Matuska 	objset_t	*zd_os;
381716fd348SMartin Matuska 	pthread_rwlock_t zd_zilog_lock;
382716fd348SMartin Matuska 	zilog_t		*zd_zilog;
383716fd348SMartin Matuska 	ztest_od_t	*zd_od;		/* debugging aid */
384716fd348SMartin Matuska 	char		zd_name[ZFS_MAX_DATASET_NAME_LEN];
385716fd348SMartin Matuska 	kmutex_t	zd_dirobj_lock;
386716fd348SMartin Matuska 	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
387716fd348SMartin Matuska 	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
388716fd348SMartin Matuska } ztest_ds_t;
389716fd348SMartin Matuska 
390716fd348SMartin Matuska /*
391716fd348SMartin Matuska  * Per-iteration state.
392716fd348SMartin Matuska  */
393716fd348SMartin Matuska typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
394716fd348SMartin Matuska 
395716fd348SMartin Matuska typedef struct ztest_info {
396716fd348SMartin Matuska 	ztest_func_t	*zi_func;	/* test function */
397716fd348SMartin Matuska 	uint64_t	zi_iters;	/* iterations per execution */
398716fd348SMartin Matuska 	uint64_t	*zi_interval;	/* execute every <interval> seconds */
399716fd348SMartin Matuska 	const char	*zi_funcname;	/* name of test function */
400716fd348SMartin Matuska } ztest_info_t;
401716fd348SMartin Matuska 
402716fd348SMartin Matuska typedef struct ztest_shared_callstate {
403716fd348SMartin Matuska 	uint64_t	zc_count;	/* per-pass count */
404716fd348SMartin Matuska 	uint64_t	zc_time;	/* per-pass time */
405716fd348SMartin Matuska 	uint64_t	zc_next;	/* next time to call this function */
406716fd348SMartin Matuska } ztest_shared_callstate_t;
407716fd348SMartin Matuska 
408716fd348SMartin Matuska static ztest_shared_callstate_t *ztest_shared_callstate;
409716fd348SMartin Matuska #define	ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
410716fd348SMartin Matuska 
411716fd348SMartin Matuska ztest_func_t ztest_dmu_read_write;
412716fd348SMartin Matuska ztest_func_t ztest_dmu_write_parallel;
413716fd348SMartin Matuska ztest_func_t ztest_dmu_object_alloc_free;
414716fd348SMartin Matuska ztest_func_t ztest_dmu_object_next_chunk;
415716fd348SMartin Matuska ztest_func_t ztest_dmu_commit_callbacks;
416716fd348SMartin Matuska ztest_func_t ztest_zap;
417716fd348SMartin Matuska ztest_func_t ztest_zap_parallel;
418716fd348SMartin Matuska ztest_func_t ztest_zil_commit;
419716fd348SMartin Matuska ztest_func_t ztest_zil_remount;
420716fd348SMartin Matuska ztest_func_t ztest_dmu_read_write_zcopy;
421716fd348SMartin Matuska ztest_func_t ztest_dmu_objset_create_destroy;
422716fd348SMartin Matuska ztest_func_t ztest_dmu_prealloc;
423716fd348SMartin Matuska ztest_func_t ztest_fzap;
424716fd348SMartin Matuska ztest_func_t ztest_dmu_snapshot_create_destroy;
425716fd348SMartin Matuska ztest_func_t ztest_dsl_prop_get_set;
426716fd348SMartin Matuska ztest_func_t ztest_spa_prop_get_set;
427716fd348SMartin Matuska ztest_func_t ztest_spa_create_destroy;
428716fd348SMartin Matuska ztest_func_t ztest_fault_inject;
429716fd348SMartin Matuska ztest_func_t ztest_dmu_snapshot_hold;
430716fd348SMartin Matuska ztest_func_t ztest_mmp_enable_disable;
431716fd348SMartin Matuska ztest_func_t ztest_scrub;
432716fd348SMartin Matuska ztest_func_t ztest_dsl_dataset_promote_busy;
433716fd348SMartin Matuska ztest_func_t ztest_vdev_attach_detach;
434e716630dSMartin Matuska ztest_func_t ztest_vdev_raidz_attach;
435716fd348SMartin Matuska ztest_func_t ztest_vdev_LUN_growth;
436716fd348SMartin Matuska ztest_func_t ztest_vdev_add_remove;
437716fd348SMartin Matuska ztest_func_t ztest_vdev_class_add;
438716fd348SMartin Matuska ztest_func_t ztest_vdev_aux_add_remove;
439716fd348SMartin Matuska ztest_func_t ztest_split_pool;
440716fd348SMartin Matuska ztest_func_t ztest_reguid;
441716fd348SMartin Matuska ztest_func_t ztest_spa_upgrade;
442716fd348SMartin Matuska ztest_func_t ztest_device_removal;
443716fd348SMartin Matuska ztest_func_t ztest_spa_checkpoint_create_discard;
444716fd348SMartin Matuska ztest_func_t ztest_initialize;
445716fd348SMartin Matuska ztest_func_t ztest_trim;
4461f1e2261SMartin Matuska ztest_func_t ztest_blake3;
447716fd348SMartin Matuska ztest_func_t ztest_fletcher;
448716fd348SMartin Matuska ztest_func_t ztest_fletcher_incr;
449716fd348SMartin Matuska ztest_func_t ztest_verify_dnode_bt;
450ce4dcb97SMartin Matuska ztest_func_t ztest_pool_prefetch_ddt;
451e2df9bb4SMartin Matuska ztest_func_t ztest_ddt_prune;
452716fd348SMartin Matuska 
453dbd5678dSMartin Matuska static uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
454dbd5678dSMartin Matuska static uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
455dbd5678dSMartin Matuska static uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
456dbd5678dSMartin Matuska static uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
457dbd5678dSMartin Matuska static uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
458716fd348SMartin Matuska 
459716fd348SMartin Matuska #define	ZTI_INIT(func, iters, interval) \
460716fd348SMartin Matuska 	{   .zi_func = (func), \
461716fd348SMartin Matuska 	    .zi_iters = (iters), \
462716fd348SMartin Matuska 	    .zi_interval = (interval), \
463716fd348SMartin Matuska 	    .zi_funcname = # func }
464716fd348SMartin Matuska 
465dbd5678dSMartin Matuska static ztest_info_t ztest_info[] = {
466716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always),
467716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
468716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
469716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
470716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
471716fd348SMartin Matuska 	ZTI_INIT(ztest_zap, 30, &zopt_always),
472716fd348SMartin Matuska 	ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
47315f0b8c3SMartin Matuska 	ZTI_INIT(ztest_split_pool, 1, &zopt_sometimes),
474716fd348SMartin Matuska 	ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant),
475716fd348SMartin Matuska 	ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes),
476716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often),
477716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often),
478716fd348SMartin Matuska 	ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often),
479716fd348SMartin Matuska 	ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes),
480716fd348SMartin Matuska #if 0
481716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes),
482716fd348SMartin Matuska #endif
483716fd348SMartin Matuska 	ZTI_INIT(ztest_fzap, 1, &zopt_sometimes),
484716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
485716fd348SMartin Matuska 	ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
486716fd348SMartin Matuska 	ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
487716fd348SMartin Matuska 	ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
488716fd348SMartin Matuska 	ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
489716fd348SMartin Matuska 	ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
490716fd348SMartin Matuska 	ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
491716fd348SMartin Matuska 	ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely),
492716fd348SMartin Matuska 	ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely),
493716fd348SMartin Matuska 	ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes),
494e716630dSMartin Matuska 	ZTI_INIT(ztest_vdev_raidz_attach, 1, &zopt_sometimes),
495716fd348SMartin Matuska 	ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely),
496716fd348SMartin Matuska 	ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime),
497716fd348SMartin Matuska 	ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime),
498716fd348SMartin Matuska 	ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
499716fd348SMartin Matuska 	ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
500716fd348SMartin Matuska 	ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
501716fd348SMartin Matuska 	ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
502716fd348SMartin Matuska 	ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
5031f1e2261SMartin Matuska 	ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
504716fd348SMartin Matuska 	ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
505716fd348SMartin Matuska 	ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
506716fd348SMartin Matuska 	ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
507ce4dcb97SMartin Matuska 	ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
508e2df9bb4SMartin Matuska 	ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
509716fd348SMartin Matuska };
510716fd348SMartin Matuska 
511716fd348SMartin Matuska #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
512716fd348SMartin Matuska 
513716fd348SMartin Matuska /*
514716fd348SMartin Matuska  * The following struct is used to hold a list of uncalled commit callbacks.
515716fd348SMartin Matuska  * The callbacks are ordered by txg number.
516716fd348SMartin Matuska  */
517716fd348SMartin Matuska typedef struct ztest_cb_list {
518716fd348SMartin Matuska 	kmutex_t	zcl_callbacks_lock;
519716fd348SMartin Matuska 	list_t		zcl_callbacks;
520716fd348SMartin Matuska } ztest_cb_list_t;
521716fd348SMartin Matuska 
522716fd348SMartin Matuska /*
523716fd348SMartin Matuska  * Stuff we need to share writably between parent and child.
524716fd348SMartin Matuska  */
525716fd348SMartin Matuska typedef struct ztest_shared {
526716fd348SMartin Matuska 	boolean_t	zs_do_init;
527716fd348SMartin Matuska 	hrtime_t	zs_proc_start;
528716fd348SMartin Matuska 	hrtime_t	zs_proc_stop;
529716fd348SMartin Matuska 	hrtime_t	zs_thread_start;
530716fd348SMartin Matuska 	hrtime_t	zs_thread_stop;
531716fd348SMartin Matuska 	hrtime_t	zs_thread_kill;
532716fd348SMartin Matuska 	uint64_t	zs_enospc_count;
533716fd348SMartin Matuska 	uint64_t	zs_vdev_next_leaf;
534716fd348SMartin Matuska 	uint64_t	zs_vdev_aux;
535716fd348SMartin Matuska 	uint64_t	zs_alloc;
536716fd348SMartin Matuska 	uint64_t	zs_space;
537716fd348SMartin Matuska 	uint64_t	zs_splits;
538716fd348SMartin Matuska 	uint64_t	zs_mirrors;
539716fd348SMartin Matuska 	uint64_t	zs_metaslab_sz;
540716fd348SMartin Matuska 	uint64_t	zs_metaslab_df_alloc_threshold;
541716fd348SMartin Matuska 	uint64_t	zs_guid;
542716fd348SMartin Matuska } ztest_shared_t;
543716fd348SMartin Matuska 
544716fd348SMartin Matuska #define	ID_PARALLEL	-1ULL
545716fd348SMartin Matuska 
546716fd348SMartin Matuska static char ztest_dev_template[] = "%s/%s.%llua";
547716fd348SMartin Matuska static char ztest_aux_template[] = "%s/%s.%s.%llu";
548dbd5678dSMartin Matuska static ztest_shared_t *ztest_shared;
549716fd348SMartin Matuska 
550716fd348SMartin Matuska static spa_t *ztest_spa = NULL;
551716fd348SMartin Matuska static ztest_ds_t *ztest_ds;
552716fd348SMartin Matuska 
553716fd348SMartin Matuska static kmutex_t ztest_vdev_lock;
554716fd348SMartin Matuska static boolean_t ztest_device_removal_active = B_FALSE;
555716fd348SMartin Matuska static boolean_t ztest_pool_scrubbed = B_FALSE;
556716fd348SMartin Matuska static kmutex_t ztest_checkpoint_lock;
557716fd348SMartin Matuska 
558716fd348SMartin Matuska /*
559716fd348SMartin Matuska  * The ztest_name_lock protects the pool and dataset namespace used by
560716fd348SMartin Matuska  * the individual tests. To modify the namespace, consumers must grab
561716fd348SMartin Matuska  * this lock as writer. Grabbing the lock as reader will ensure that the
562716fd348SMartin Matuska  * namespace does not change while the lock is held.
563716fd348SMartin Matuska  */
564716fd348SMartin Matuska static pthread_rwlock_t ztest_name_lock;
565716fd348SMartin Matuska 
566716fd348SMartin Matuska static boolean_t ztest_dump_core = B_TRUE;
567716fd348SMartin Matuska static boolean_t ztest_exiting;
568716fd348SMartin Matuska 
569716fd348SMartin Matuska /* Global commit callback list */
570716fd348SMartin Matuska static ztest_cb_list_t zcl;
571716fd348SMartin Matuska /* Commit cb delay */
572716fd348SMartin Matuska static uint64_t zc_min_txg_delay = UINT64_MAX;
573716fd348SMartin Matuska static int zc_cb_counter = 0;
574716fd348SMartin Matuska 
575716fd348SMartin Matuska /*
576716fd348SMartin Matuska  * Minimum number of commit callbacks that need to be registered for us to check
577716fd348SMartin Matuska  * whether the minimum txg delay is acceptable.
578716fd348SMartin Matuska  */
579716fd348SMartin Matuska #define	ZTEST_COMMIT_CB_MIN_REG	100
580716fd348SMartin Matuska 
581716fd348SMartin Matuska /*
582716fd348SMartin Matuska  * If a number of txgs equal to this threshold have been created after a commit
583716fd348SMartin Matuska  * callback has been registered but not called, then we assume there is an
584716fd348SMartin Matuska  * implementation bug.
585716fd348SMartin Matuska  */
586716fd348SMartin Matuska #define	ZTEST_COMMIT_CB_THRESH	(TXG_CONCURRENT_STATES + 1000)
587716fd348SMartin Matuska 
588716fd348SMartin Matuska enum ztest_object {
589716fd348SMartin Matuska 	ZTEST_META_DNODE = 0,
590716fd348SMartin Matuska 	ZTEST_DIROBJ,
591716fd348SMartin Matuska 	ZTEST_OBJECTS
592716fd348SMartin Matuska };
593716fd348SMartin Matuska 
594716fd348SMartin Matuska static __attribute__((noreturn)) void usage(boolean_t requested);
595716fd348SMartin Matuska static int ztest_scrub_impl(spa_t *spa);
596716fd348SMartin Matuska 
597716fd348SMartin Matuska /*
598716fd348SMartin Matuska  * These libumem hooks provide a reasonable set of defaults for the allocator's
599716fd348SMartin Matuska  * debugging facilities.
600716fd348SMartin Matuska  */
601716fd348SMartin Matuska const char *
602716fd348SMartin Matuska _umem_debug_init(void)
603716fd348SMartin Matuska {
604716fd348SMartin Matuska 	return ("default,verbose"); /* $UMEM_DEBUG setting */
605716fd348SMartin Matuska }
606716fd348SMartin Matuska 
607716fd348SMartin Matuska const char *
608716fd348SMartin Matuska _umem_logging_init(void)
609716fd348SMartin Matuska {
610716fd348SMartin Matuska 	return ("fail,contents"); /* $UMEM_LOGGING setting */
611716fd348SMartin Matuska }
612716fd348SMartin Matuska 
613716fd348SMartin Matuska static void
614716fd348SMartin Matuska dump_debug_buffer(void)
615716fd348SMartin Matuska {
616716fd348SMartin Matuska 	ssize_t ret __attribute__((unused));
617716fd348SMartin Matuska 
618716fd348SMartin Matuska 	if (!ztest_opts.zo_dump_dbgmsg)
619716fd348SMartin Matuska 		return;
620716fd348SMartin Matuska 
621716fd348SMartin Matuska 	/*
622716fd348SMartin Matuska 	 * We use write() instead of printf() so that this function
623716fd348SMartin Matuska 	 * is safe to call from a signal handler.
624716fd348SMartin Matuska 	 */
625aca928a5SMartin Matuska 	ret = write(STDERR_FILENO, "\n", 1);
626aca928a5SMartin Matuska 	zfs_dbgmsg_print(STDERR_FILENO, "ztest");
627716fd348SMartin Matuska }
628716fd348SMartin Matuska 
629716fd348SMartin Matuska static void sig_handler(int signo)
630716fd348SMartin Matuska {
631716fd348SMartin Matuska 	struct sigaction action;
632716fd348SMartin Matuska 
633aca928a5SMartin Matuska 	libspl_backtrace(STDERR_FILENO);
634716fd348SMartin Matuska 	dump_debug_buffer();
635716fd348SMartin Matuska 
636716fd348SMartin Matuska 	/*
637716fd348SMartin Matuska 	 * Restore default action and re-raise signal so SIGSEGV and
638716fd348SMartin Matuska 	 * SIGABRT can trigger a core dump.
639716fd348SMartin Matuska 	 */
640716fd348SMartin Matuska 	action.sa_handler = SIG_DFL;
641716fd348SMartin Matuska 	sigemptyset(&action.sa_mask);
642716fd348SMartin Matuska 	action.sa_flags = 0;
643716fd348SMartin Matuska 	(void) sigaction(signo, &action, NULL);
644716fd348SMartin Matuska 	raise(signo);
645716fd348SMartin Matuska }
646716fd348SMartin Matuska 
647716fd348SMartin Matuska #define	FATAL_MSG_SZ	1024
648716fd348SMartin Matuska 
649a0b956f5SMartin Matuska static const char *fatal_msg;
650716fd348SMartin Matuska 
651716fd348SMartin Matuska static __attribute__((format(printf, 2, 3))) __attribute__((noreturn)) void
652a0b956f5SMartin Matuska fatal(int do_perror, const char *message, ...)
653716fd348SMartin Matuska {
654716fd348SMartin Matuska 	va_list args;
655716fd348SMartin Matuska 	int save_errno = errno;
656716fd348SMartin Matuska 	char *buf;
657716fd348SMartin Matuska 
658716fd348SMartin Matuska 	(void) fflush(stdout);
659716fd348SMartin Matuska 	buf = umem_alloc(FATAL_MSG_SZ, UMEM_NOFAIL);
660716fd348SMartin Matuska 	if (buf == NULL)
661716fd348SMartin Matuska 		goto out;
662716fd348SMartin Matuska 
663716fd348SMartin Matuska 	va_start(args, message);
664716fd348SMartin Matuska 	(void) sprintf(buf, "ztest: ");
665716fd348SMartin Matuska 	/* LINTED */
666716fd348SMartin Matuska 	(void) vsprintf(buf + strlen(buf), message, args);
667716fd348SMartin Matuska 	va_end(args);
668716fd348SMartin Matuska 	if (do_perror) {
669716fd348SMartin Matuska 		(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
670716fd348SMartin Matuska 		    ": %s", strerror(save_errno));
671716fd348SMartin Matuska 	}
672716fd348SMartin Matuska 	(void) fprintf(stderr, "%s\n", buf);
673716fd348SMartin Matuska 	fatal_msg = buf;			/* to ease debugging */
674716fd348SMartin Matuska 
675716fd348SMartin Matuska out:
676716fd348SMartin Matuska 	if (ztest_dump_core)
677716fd348SMartin Matuska 		abort();
678716fd348SMartin Matuska 	else
679716fd348SMartin Matuska 		dump_debug_buffer();
680716fd348SMartin Matuska 
681716fd348SMartin Matuska 	exit(3);
682716fd348SMartin Matuska }
683716fd348SMartin Matuska 
684716fd348SMartin Matuska static int
685716fd348SMartin Matuska str2shift(const char *buf)
686716fd348SMartin Matuska {
687716fd348SMartin Matuska 	const char *ends = "BKMGTPEZ";
6887a7741afSMartin Matuska 	int i, len;
689716fd348SMartin Matuska 
690716fd348SMartin Matuska 	if (buf[0] == '\0')
691716fd348SMartin Matuska 		return (0);
6927a7741afSMartin Matuska 
6937a7741afSMartin Matuska 	len = strlen(ends);
6947a7741afSMartin Matuska 	for (i = 0; i < len; i++) {
695716fd348SMartin Matuska 		if (toupper(buf[0]) == ends[i])
696716fd348SMartin Matuska 			break;
697716fd348SMartin Matuska 	}
6987a7741afSMartin Matuska 	if (i == len) {
699716fd348SMartin Matuska 		(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
700716fd348SMartin Matuska 		    buf);
701716fd348SMartin Matuska 		usage(B_FALSE);
702716fd348SMartin Matuska 	}
703716fd348SMartin Matuska 	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
704716fd348SMartin Matuska 		return (10*i);
705716fd348SMartin Matuska 	}
706716fd348SMartin Matuska 	(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
707716fd348SMartin Matuska 	usage(B_FALSE);
708716fd348SMartin Matuska }
709716fd348SMartin Matuska 
710716fd348SMartin Matuska static uint64_t
711716fd348SMartin Matuska nicenumtoull(const char *buf)
712716fd348SMartin Matuska {
713716fd348SMartin Matuska 	char *end;
714716fd348SMartin Matuska 	uint64_t val;
715716fd348SMartin Matuska 
716716fd348SMartin Matuska 	val = strtoull(buf, &end, 0);
717716fd348SMartin Matuska 	if (end == buf) {
718716fd348SMartin Matuska 		(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
719716fd348SMartin Matuska 		usage(B_FALSE);
720716fd348SMartin Matuska 	} else if (end[0] == '.') {
721716fd348SMartin Matuska 		double fval = strtod(buf, &end);
722716fd348SMartin Matuska 		fval *= pow(2, str2shift(end));
723716fd348SMartin Matuska 		/*
724716fd348SMartin Matuska 		 * UINT64_MAX is not exactly representable as a double.
725716fd348SMartin Matuska 		 * The closest representation is UINT64_MAX + 1, so we
726716fd348SMartin Matuska 		 * use a >= comparison instead of > for the bounds check.
727716fd348SMartin Matuska 		 */
728716fd348SMartin Matuska 		if (fval >= (double)UINT64_MAX) {
729716fd348SMartin Matuska 			(void) fprintf(stderr, "ztest: value too large: %s\n",
730716fd348SMartin Matuska 			    buf);
731716fd348SMartin Matuska 			usage(B_FALSE);
732716fd348SMartin Matuska 		}
733716fd348SMartin Matuska 		val = (uint64_t)fval;
734716fd348SMartin Matuska 	} else {
735716fd348SMartin Matuska 		int shift = str2shift(end);
736716fd348SMartin Matuska 		if (shift >= 64 || (val << shift) >> shift != val) {
737716fd348SMartin Matuska 			(void) fprintf(stderr, "ztest: value too large: %s\n",
738716fd348SMartin Matuska 			    buf);
739716fd348SMartin Matuska 			usage(B_FALSE);
740716fd348SMartin Matuska 		}
741716fd348SMartin Matuska 		val <<= shift;
742716fd348SMartin Matuska 	}
743716fd348SMartin Matuska 	return (val);
744716fd348SMartin Matuska }
745716fd348SMartin Matuska 
746716fd348SMartin Matuska typedef struct ztest_option {
747716fd348SMartin Matuska 	const char	short_opt;
748716fd348SMartin Matuska 	const char	*long_opt;
749716fd348SMartin Matuska 	const char	*long_opt_param;
750716fd348SMartin Matuska 	const char	*comment;
751716fd348SMartin Matuska 	unsigned int	default_int;
752a0b956f5SMartin Matuska 	const char	*default_str;
753716fd348SMartin Matuska } ztest_option_t;
754716fd348SMartin Matuska 
755716fd348SMartin Matuska /*
756716fd348SMartin Matuska  * The following option_table is used for generating the usage info as well as
757716fd348SMartin Matuska  * the long and short option information for calling getopt_long().
758716fd348SMartin Matuska  */
759716fd348SMartin Matuska static ztest_option_t option_table[] = {
760716fd348SMartin Matuska 	{ 'v',	"vdevs", "INTEGER", "Number of vdevs", DEFAULT_VDEV_COUNT,
761716fd348SMartin Matuska 	    NULL},
762716fd348SMartin Matuska 	{ 's',	"vdev-size", "INTEGER", "Size of each vdev",
763716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_VDEV_SIZE_STR},
764716fd348SMartin Matuska 	{ 'a',	"alignment-shift", "INTEGER",
765716fd348SMartin Matuska 	    "Alignment shift; use 0 for random", DEFAULT_ASHIFT, NULL},
766716fd348SMartin Matuska 	{ 'm',	"mirror-copies", "INTEGER", "Number of mirror copies",
767716fd348SMartin Matuska 	    DEFAULT_MIRRORS, NULL},
768716fd348SMartin Matuska 	{ 'r',	"raid-disks", "INTEGER", "Number of raidz/draid disks",
769716fd348SMartin Matuska 	    DEFAULT_RAID_CHILDREN, NULL},
770716fd348SMartin Matuska 	{ 'R',	"raid-parity", "INTEGER", "Raid parity",
771716fd348SMartin Matuska 	    DEFAULT_RAID_PARITY, NULL},
772e716630dSMartin Matuska 	{ 'K',  "raid-kind", "raidz|eraidz|draid|random", "Raid kind",
773716fd348SMartin Matuska 	    NO_DEFAULT, "random"},
774716fd348SMartin Matuska 	{ 'D',	"draid-data", "INTEGER", "Number of draid data drives",
775716fd348SMartin Matuska 	    DEFAULT_DRAID_DATA, NULL},
776716fd348SMartin Matuska 	{ 'S',	"draid-spares", "INTEGER", "Number of draid spares",
777716fd348SMartin Matuska 	    DEFAULT_DRAID_SPARES, NULL},
778716fd348SMartin Matuska 	{ 'd',	"datasets", "INTEGER", "Number of datasets",
779716fd348SMartin Matuska 	    DEFAULT_DATASETS_COUNT, NULL},
780716fd348SMartin Matuska 	{ 't',	"threads", "INTEGER", "Number of ztest threads",
781716fd348SMartin Matuska 	    DEFAULT_THREADS, NULL},
782716fd348SMartin Matuska 	{ 'g',	"gang-block-threshold", "INTEGER",
783716fd348SMartin Matuska 	    "Metaslab gang block threshold",
784716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_FORCE_GANGING_STR},
785716fd348SMartin Matuska 	{ 'i',	"init-count", "INTEGER", "Number of times to initialize pool",
786716fd348SMartin Matuska 	    DEFAULT_INITS, NULL},
787716fd348SMartin Matuska 	{ 'k',	"kill-percentage", "INTEGER", "Kill percentage",
788716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_KILLRATE_STR},
789716fd348SMartin Matuska 	{ 'p',	"pool-name", "STRING", "Pool name",
790716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_POOL},
791716fd348SMartin Matuska 	{ 'f',	"vdev-file-directory", "PATH", "File directory for vdev files",
792716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_VDEV_DIR},
793716fd348SMartin Matuska 	{ 'M',	"multi-host", NULL,
794716fd348SMartin Matuska 	    "Multi-host; simulate pool imported on remote host",
795716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
796716fd348SMartin Matuska 	{ 'E',	"use-existing-pool", NULL,
797716fd348SMartin Matuska 	    "Use existing pool instead of creating new one", NO_DEFAULT, NULL},
798716fd348SMartin Matuska 	{ 'T',	"run-time", "INTEGER", "Total run time",
799716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_RUN_TIME_STR},
800716fd348SMartin Matuska 	{ 'P',	"pass-time", "INTEGER", "Time per pass",
801716fd348SMartin Matuska 	    NO_DEFAULT, DEFAULT_PASS_TIME_STR},
802716fd348SMartin Matuska 	{ 'F',	"freeze-loops", "INTEGER", "Max loops in spa_freeze()",
803716fd348SMartin Matuska 	    DEFAULT_MAX_LOOPS, NULL},
804716fd348SMartin Matuska 	{ 'B',	"alt-ztest", "PATH", "Alternate ztest path",
805716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
806716fd348SMartin Matuska 	{ 'C',	"vdev-class-state", "on|off|random", "vdev class state",
807716fd348SMartin Matuska 	    NO_DEFAULT, "random"},
808e716630dSMartin Matuska 	{ 'X', "raidz-expansion", NULL,
809e716630dSMartin Matuska 	    "Perform a dedicated raidz expansion test",
810e716630dSMartin Matuska 	    NO_DEFAULT, NULL},
811716fd348SMartin Matuska 	{ 'o',	"option", "\"OPTION=INTEGER\"",
812716fd348SMartin Matuska 	    "Set global variable to an unsigned 32-bit integer value",
813716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
814716fd348SMartin Matuska 	{ 'G',	"dump-debug-msg", NULL,
815716fd348SMartin Matuska 	    "Dump zfs_dbgmsg buffer before exiting due to an error",
816716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
817716fd348SMartin Matuska 	{ 'V',	"verbose", NULL,
818716fd348SMartin Matuska 	    "Verbose (use multiple times for ever more verbosity)",
819716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
820716fd348SMartin Matuska 	{ 'h',	"help",	NULL, "Show this help",
821716fd348SMartin Matuska 	    NO_DEFAULT, NULL},
822716fd348SMartin Matuska 	{0, 0, 0, 0, 0, 0}
823716fd348SMartin Matuska };
824716fd348SMartin Matuska 
825716fd348SMartin Matuska static struct option *long_opts = NULL;
826716fd348SMartin Matuska static char *short_opts = NULL;
827716fd348SMartin Matuska 
828716fd348SMartin Matuska static void
829716fd348SMartin Matuska init_options(void)
830716fd348SMartin Matuska {
831716fd348SMartin Matuska 	ASSERT3P(long_opts, ==, NULL);
832716fd348SMartin Matuska 	ASSERT3P(short_opts, ==, NULL);
833716fd348SMartin Matuska 
834716fd348SMartin Matuska 	int count = sizeof (option_table) / sizeof (option_table[0]);
835716fd348SMartin Matuska 	long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL);
836716fd348SMartin Matuska 
837716fd348SMartin Matuska 	short_opts = umem_alloc(sizeof (char) * 2 * count, UMEM_NOFAIL);
838716fd348SMartin Matuska 	int short_opt_index = 0;
839716fd348SMartin Matuska 
840716fd348SMartin Matuska 	for (int i = 0; i < count; i++) {
841716fd348SMartin Matuska 		long_opts[i].val = option_table[i].short_opt;
842716fd348SMartin Matuska 		long_opts[i].name = option_table[i].long_opt;
843716fd348SMartin Matuska 		long_opts[i].has_arg = option_table[i].long_opt_param != NULL
844716fd348SMartin Matuska 		    ? required_argument : no_argument;
845716fd348SMartin Matuska 		long_opts[i].flag = NULL;
846716fd348SMartin Matuska 		short_opts[short_opt_index++] = option_table[i].short_opt;
847716fd348SMartin Matuska 		if (option_table[i].long_opt_param != NULL) {
848716fd348SMartin Matuska 			short_opts[short_opt_index++] = ':';
849716fd348SMartin Matuska 		}
850716fd348SMartin Matuska 	}
851716fd348SMartin Matuska }
852716fd348SMartin Matuska 
853716fd348SMartin Matuska static void
854716fd348SMartin Matuska fini_options(void)
855716fd348SMartin Matuska {
856716fd348SMartin Matuska 	int count = sizeof (option_table) / sizeof (option_table[0]);
857716fd348SMartin Matuska 
858716fd348SMartin Matuska 	umem_free(long_opts, sizeof (struct option) * count);
859716fd348SMartin Matuska 	umem_free(short_opts, sizeof (char) * 2 * count);
860716fd348SMartin Matuska 
861716fd348SMartin Matuska 	long_opts = NULL;
862716fd348SMartin Matuska 	short_opts = NULL;
863716fd348SMartin Matuska }
864716fd348SMartin Matuska 
865716fd348SMartin Matuska static __attribute__((noreturn)) void
866716fd348SMartin Matuska usage(boolean_t requested)
867716fd348SMartin Matuska {
868716fd348SMartin Matuska 	char option[80];
869716fd348SMartin Matuska 	FILE *fp = requested ? stdout : stderr;
870716fd348SMartin Matuska 
871716fd348SMartin Matuska 	(void) fprintf(fp, "Usage: %s [OPTIONS...]\n", DEFAULT_POOL);
872716fd348SMartin Matuska 	for (int i = 0; option_table[i].short_opt != 0; i++) {
873716fd348SMartin Matuska 		if (option_table[i].long_opt_param != NULL) {
874716fd348SMartin Matuska 			(void) sprintf(option, "  -%c --%s=%s",
875716fd348SMartin Matuska 			    option_table[i].short_opt,
876716fd348SMartin Matuska 			    option_table[i].long_opt,
877716fd348SMartin Matuska 			    option_table[i].long_opt_param);
878716fd348SMartin Matuska 		} else {
879716fd348SMartin Matuska 			(void) sprintf(option, "  -%c --%s",
880716fd348SMartin Matuska 			    option_table[i].short_opt,
881716fd348SMartin Matuska 			    option_table[i].long_opt);
882716fd348SMartin Matuska 		}
883e716630dSMartin Matuska 		(void) fprintf(fp, "  %-43s%s", option,
884716fd348SMartin Matuska 		    option_table[i].comment);
885716fd348SMartin Matuska 
886716fd348SMartin Matuska 		if (option_table[i].long_opt_param != NULL) {
887716fd348SMartin Matuska 			if (option_table[i].default_str != NULL) {
888716fd348SMartin Matuska 				(void) fprintf(fp, " (default: %s)",
889716fd348SMartin Matuska 				    option_table[i].default_str);
890716fd348SMartin Matuska 			} else if (option_table[i].default_int != NO_DEFAULT) {
891716fd348SMartin Matuska 				(void) fprintf(fp, " (default: %u)",
892716fd348SMartin Matuska 				    option_table[i].default_int);
893716fd348SMartin Matuska 			}
894716fd348SMartin Matuska 		}
895716fd348SMartin Matuska 		(void) fprintf(fp, "\n");
896716fd348SMartin Matuska 	}
897716fd348SMartin Matuska 	exit(requested ? 0 : 1);
898716fd348SMartin Matuska }
899716fd348SMartin Matuska 
900716fd348SMartin Matuska static uint64_t
901716fd348SMartin Matuska ztest_random(uint64_t range)
902716fd348SMartin Matuska {
903716fd348SMartin Matuska 	uint64_t r;
904716fd348SMartin Matuska 
905716fd348SMartin Matuska 	ASSERT3S(ztest_fd_rand, >=, 0);
906716fd348SMartin Matuska 
907716fd348SMartin Matuska 	if (range == 0)
908716fd348SMartin Matuska 		return (0);
909716fd348SMartin Matuska 
910716fd348SMartin Matuska 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
911716fd348SMartin Matuska 		fatal(B_TRUE, "short read from /dev/urandom");
912716fd348SMartin Matuska 
913716fd348SMartin Matuska 	return (r % range);
914716fd348SMartin Matuska }
915716fd348SMartin Matuska 
916716fd348SMartin Matuska static void
917716fd348SMartin Matuska ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
918716fd348SMartin Matuska {
919716fd348SMartin Matuska 	char name[32];
920716fd348SMartin Matuska 	char *value;
921716fd348SMartin Matuska 	int state = ZTEST_VDEV_CLASS_RND;
922716fd348SMartin Matuska 
923716fd348SMartin Matuska 	(void) strlcpy(name, input, sizeof (name));
924716fd348SMartin Matuska 
925716fd348SMartin Matuska 	value = strchr(name, '=');
926716fd348SMartin Matuska 	if (value == NULL) {
927716fd348SMartin Matuska 		(void) fprintf(stderr, "missing value in property=value "
928716fd348SMartin Matuska 		    "'-C' argument (%s)\n", input);
929716fd348SMartin Matuska 		usage(B_FALSE);
930716fd348SMartin Matuska 	}
931716fd348SMartin Matuska 	*(value) = '\0';
932716fd348SMartin Matuska 	value++;
933716fd348SMartin Matuska 
934716fd348SMartin Matuska 	if (strcmp(value, "on") == 0) {
935716fd348SMartin Matuska 		state = ZTEST_VDEV_CLASS_ON;
936716fd348SMartin Matuska 	} else if (strcmp(value, "off") == 0) {
937716fd348SMartin Matuska 		state = ZTEST_VDEV_CLASS_OFF;
938716fd348SMartin Matuska 	} else if (strcmp(value, "random") == 0) {
939716fd348SMartin Matuska 		state = ZTEST_VDEV_CLASS_RND;
940716fd348SMartin Matuska 	} else {
941716fd348SMartin Matuska 		(void) fprintf(stderr, "invalid property value '%s'\n", value);
942716fd348SMartin Matuska 		usage(B_FALSE);
943716fd348SMartin Matuska 	}
944716fd348SMartin Matuska 
945716fd348SMartin Matuska 	if (strcmp(name, "special") == 0) {
946716fd348SMartin Matuska 		zo->zo_special_vdevs = state;
947716fd348SMartin Matuska 	} else {
948716fd348SMartin Matuska 		(void) fprintf(stderr, "invalid property name '%s'\n", name);
949716fd348SMartin Matuska 		usage(B_FALSE);
950716fd348SMartin Matuska 	}
951716fd348SMartin Matuska 	if (zo->zo_verbose >= 3)
952716fd348SMartin Matuska 		(void) printf("%s vdev state is '%s'\n", name, value);
953716fd348SMartin Matuska }
954716fd348SMartin Matuska 
955716fd348SMartin Matuska static void
956716fd348SMartin Matuska process_options(int argc, char **argv)
957716fd348SMartin Matuska {
958716fd348SMartin Matuska 	char *path;
959716fd348SMartin Matuska 	ztest_shared_opts_t *zo = &ztest_opts;
960716fd348SMartin Matuska 
961716fd348SMartin Matuska 	int opt;
962716fd348SMartin Matuska 	uint64_t value;
963716fd348SMartin Matuska 	const char *raid_kind = "random";
964716fd348SMartin Matuska 
965716fd348SMartin Matuska 	memcpy(zo, &ztest_opts_defaults, sizeof (*zo));
966716fd348SMartin Matuska 
967716fd348SMartin Matuska 	init_options();
968716fd348SMartin Matuska 
969716fd348SMartin Matuska 	while ((opt = getopt_long(argc, argv, short_opts, long_opts,
970716fd348SMartin Matuska 	    NULL)) != EOF) {
971716fd348SMartin Matuska 		value = 0;
972716fd348SMartin Matuska 		switch (opt) {
973716fd348SMartin Matuska 		case 'v':
974716fd348SMartin Matuska 		case 's':
975716fd348SMartin Matuska 		case 'a':
976716fd348SMartin Matuska 		case 'm':
977716fd348SMartin Matuska 		case 'r':
978716fd348SMartin Matuska 		case 'R':
979716fd348SMartin Matuska 		case 'D':
980716fd348SMartin Matuska 		case 'S':
981716fd348SMartin Matuska 		case 'd':
982716fd348SMartin Matuska 		case 't':
983716fd348SMartin Matuska 		case 'g':
984716fd348SMartin Matuska 		case 'i':
985716fd348SMartin Matuska 		case 'k':
986716fd348SMartin Matuska 		case 'T':
987716fd348SMartin Matuska 		case 'P':
988716fd348SMartin Matuska 		case 'F':
989716fd348SMartin Matuska 			value = nicenumtoull(optarg);
990716fd348SMartin Matuska 		}
991716fd348SMartin Matuska 		switch (opt) {
992716fd348SMartin Matuska 		case 'v':
993716fd348SMartin Matuska 			zo->zo_vdevs = value;
994716fd348SMartin Matuska 			break;
995716fd348SMartin Matuska 		case 's':
996716fd348SMartin Matuska 			zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value);
997716fd348SMartin Matuska 			break;
998716fd348SMartin Matuska 		case 'a':
999716fd348SMartin Matuska 			zo->zo_ashift = value;
1000716fd348SMartin Matuska 			break;
1001716fd348SMartin Matuska 		case 'm':
1002716fd348SMartin Matuska 			zo->zo_mirrors = value;
1003716fd348SMartin Matuska 			break;
1004716fd348SMartin Matuska 		case 'r':
1005716fd348SMartin Matuska 			zo->zo_raid_children = MAX(1, value);
1006716fd348SMartin Matuska 			break;
1007716fd348SMartin Matuska 		case 'R':
1008716fd348SMartin Matuska 			zo->zo_raid_parity = MIN(MAX(value, 1), 3);
1009716fd348SMartin Matuska 			break;
1010716fd348SMartin Matuska 		case 'K':
1011716fd348SMartin Matuska 			raid_kind = optarg;
1012716fd348SMartin Matuska 			break;
1013716fd348SMartin Matuska 		case 'D':
1014716fd348SMartin Matuska 			zo->zo_draid_data = MAX(1, value);
1015716fd348SMartin Matuska 			break;
1016716fd348SMartin Matuska 		case 'S':
1017716fd348SMartin Matuska 			zo->zo_draid_spares = MAX(1, value);
1018716fd348SMartin Matuska 			break;
1019716fd348SMartin Matuska 		case 'd':
1020716fd348SMartin Matuska 			zo->zo_datasets = MAX(1, value);
1021716fd348SMartin Matuska 			break;
1022716fd348SMartin Matuska 		case 't':
1023716fd348SMartin Matuska 			zo->zo_threads = MAX(1, value);
1024716fd348SMartin Matuska 			break;
1025716fd348SMartin Matuska 		case 'g':
1026716fd348SMartin Matuska 			zo->zo_metaslab_force_ganging =
1027716fd348SMartin Matuska 			    MAX(SPA_MINBLOCKSIZE << 1, value);
1028716fd348SMartin Matuska 			break;
1029716fd348SMartin Matuska 		case 'i':
1030716fd348SMartin Matuska 			zo->zo_init = value;
1031716fd348SMartin Matuska 			break;
1032716fd348SMartin Matuska 		case 'k':
1033716fd348SMartin Matuska 			zo->zo_killrate = value;
1034716fd348SMartin Matuska 			break;
1035716fd348SMartin Matuska 		case 'p':
1036716fd348SMartin Matuska 			(void) strlcpy(zo->zo_pool, optarg,
1037716fd348SMartin Matuska 			    sizeof (zo->zo_pool));
1038716fd348SMartin Matuska 			break;
1039716fd348SMartin Matuska 		case 'f':
1040716fd348SMartin Matuska 			path = realpath(optarg, NULL);
1041716fd348SMartin Matuska 			if (path == NULL) {
1042716fd348SMartin Matuska 				(void) fprintf(stderr, "error: %s: %s\n",
1043716fd348SMartin Matuska 				    optarg, strerror(errno));
1044716fd348SMartin Matuska 				usage(B_FALSE);
1045716fd348SMartin Matuska 			} else {
1046716fd348SMartin Matuska 				(void) strlcpy(zo->zo_dir, path,
1047716fd348SMartin Matuska 				    sizeof (zo->zo_dir));
1048716fd348SMartin Matuska 				free(path);
1049716fd348SMartin Matuska 			}
1050716fd348SMartin Matuska 			break;
1051716fd348SMartin Matuska 		case 'M':
1052716fd348SMartin Matuska 			zo->zo_mmp_test = 1;
1053716fd348SMartin Matuska 			break;
1054716fd348SMartin Matuska 		case 'V':
1055716fd348SMartin Matuska 			zo->zo_verbose++;
1056716fd348SMartin Matuska 			break;
1057e716630dSMartin Matuska 		case 'X':
1058e716630dSMartin Matuska 			zo->zo_raidz_expand_test = RAIDZ_EXPAND_REQUESTED;
1059e716630dSMartin Matuska 			break;
1060716fd348SMartin Matuska 		case 'E':
1061716fd348SMartin Matuska 			zo->zo_init = 0;
1062716fd348SMartin Matuska 			break;
1063716fd348SMartin Matuska 		case 'T':
1064716fd348SMartin Matuska 			zo->zo_time = value;
1065716fd348SMartin Matuska 			break;
1066716fd348SMartin Matuska 		case 'P':
1067716fd348SMartin Matuska 			zo->zo_passtime = MAX(1, value);
1068716fd348SMartin Matuska 			break;
1069716fd348SMartin Matuska 		case 'F':
1070716fd348SMartin Matuska 			zo->zo_maxloops = MAX(1, value);
1071716fd348SMartin Matuska 			break;
1072716fd348SMartin Matuska 		case 'B':
1073716fd348SMartin Matuska 			(void) strlcpy(zo->zo_alt_ztest, optarg,
1074716fd348SMartin Matuska 			    sizeof (zo->zo_alt_ztest));
1075716fd348SMartin Matuska 			break;
1076716fd348SMartin Matuska 		case 'C':
1077716fd348SMartin Matuska 			ztest_parse_name_value(optarg, zo);
1078716fd348SMartin Matuska 			break;
1079716fd348SMartin Matuska 		case 'o':
1080716fd348SMartin Matuska 			if (zo->zo_gvars_count >= ZO_GVARS_MAX_COUNT) {
1081716fd348SMartin Matuska 				(void) fprintf(stderr,
1082716fd348SMartin Matuska 				    "max global var count (%zu) exceeded\n",
1083716fd348SMartin Matuska 				    ZO_GVARS_MAX_COUNT);
1084716fd348SMartin Matuska 				usage(B_FALSE);
1085716fd348SMartin Matuska 			}
1086716fd348SMartin Matuska 			char *v = zo->zo_gvars[zo->zo_gvars_count];
1087716fd348SMartin Matuska 			if (strlcpy(v, optarg, ZO_GVARS_MAX_ARGLEN) >=
1088716fd348SMartin Matuska 			    ZO_GVARS_MAX_ARGLEN) {
1089716fd348SMartin Matuska 				(void) fprintf(stderr,
1090716fd348SMartin Matuska 				    "global var option '%s' is too long\n",
1091716fd348SMartin Matuska 				    optarg);
1092716fd348SMartin Matuska 				usage(B_FALSE);
1093716fd348SMartin Matuska 			}
1094716fd348SMartin Matuska 			zo->zo_gvars_count++;
1095716fd348SMartin Matuska 			break;
1096716fd348SMartin Matuska 		case 'G':
1097716fd348SMartin Matuska 			zo->zo_dump_dbgmsg = 1;
1098716fd348SMartin Matuska 			break;
1099716fd348SMartin Matuska 		case 'h':
1100716fd348SMartin Matuska 			usage(B_TRUE);
1101716fd348SMartin Matuska 			break;
1102716fd348SMartin Matuska 		case '?':
1103716fd348SMartin Matuska 		default:
1104716fd348SMartin Matuska 			usage(B_FALSE);
1105716fd348SMartin Matuska 			break;
1106716fd348SMartin Matuska 		}
1107716fd348SMartin Matuska 	}
1108716fd348SMartin Matuska 
1109716fd348SMartin Matuska 	fini_options();
1110716fd348SMartin Matuska 
1111e716630dSMartin Matuska 	/* Force compatible options for raidz expansion run */
1112e716630dSMartin Matuska 	if (zo->zo_raidz_expand_test == RAIDZ_EXPAND_REQUESTED) {
1113e716630dSMartin Matuska 		zo->zo_mmp_test = 0;
1114e716630dSMartin Matuska 		zo->zo_mirrors = 0;
1115e716630dSMartin Matuska 		zo->zo_vdevs = 1;
1116e716630dSMartin Matuska 		zo->zo_vdev_size = DEFAULT_VDEV_SIZE * 2;
1117e716630dSMartin Matuska 		zo->zo_raid_do_expand = B_FALSE;
1118e716630dSMartin Matuska 		raid_kind = "raidz";
1119e716630dSMartin Matuska 	}
1120e716630dSMartin Matuska 
1121716fd348SMartin Matuska 	if (strcmp(raid_kind, "random") == 0) {
1122e716630dSMartin Matuska 		switch (ztest_random(3)) {
1123e716630dSMartin Matuska 		case 0:
1124e716630dSMartin Matuska 			raid_kind = "raidz";
1125e716630dSMartin Matuska 			break;
1126e716630dSMartin Matuska 		case 1:
1127e716630dSMartin Matuska 			raid_kind = "eraidz";
1128e716630dSMartin Matuska 			break;
1129e716630dSMartin Matuska 		case 2:
1130e716630dSMartin Matuska 			raid_kind = "draid";
1131e716630dSMartin Matuska 			break;
1132e716630dSMartin Matuska 		}
1133716fd348SMartin Matuska 
1134716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 3)
1135716fd348SMartin Matuska 			(void) printf("choosing RAID type '%s'\n", raid_kind);
1136716fd348SMartin Matuska 	}
1137716fd348SMartin Matuska 
1138716fd348SMartin Matuska 	if (strcmp(raid_kind, "draid") == 0) {
1139716fd348SMartin Matuska 		uint64_t min_devsize;
1140716fd348SMartin Matuska 
1141716fd348SMartin Matuska 		/* With fewer disk use 256M, otherwise 128M is OK */
1142716fd348SMartin Matuska 		min_devsize = (ztest_opts.zo_raid_children < 16) ?
1143716fd348SMartin Matuska 		    (256ULL << 20) : (128ULL << 20);
1144716fd348SMartin Matuska 
1145716fd348SMartin Matuska 		/* No top-level mirrors with dRAID for now */
1146716fd348SMartin Matuska 		zo->zo_mirrors = 0;
1147716fd348SMartin Matuska 
1148716fd348SMartin Matuska 		/* Use more appropriate defaults for dRAID */
1149716fd348SMartin Matuska 		if (zo->zo_vdevs == ztest_opts_defaults.zo_vdevs)
1150716fd348SMartin Matuska 			zo->zo_vdevs = 1;
1151716fd348SMartin Matuska 		if (zo->zo_raid_children ==
1152716fd348SMartin Matuska 		    ztest_opts_defaults.zo_raid_children)
1153716fd348SMartin Matuska 			zo->zo_raid_children = 16;
1154716fd348SMartin Matuska 		if (zo->zo_ashift < 12)
1155716fd348SMartin Matuska 			zo->zo_ashift = 12;
1156716fd348SMartin Matuska 		if (zo->zo_vdev_size < min_devsize)
1157716fd348SMartin Matuska 			zo->zo_vdev_size = min_devsize;
1158716fd348SMartin Matuska 
1159716fd348SMartin Matuska 		if (zo->zo_draid_data + zo->zo_raid_parity >
1160716fd348SMartin Matuska 		    zo->zo_raid_children - zo->zo_draid_spares) {
1161716fd348SMartin Matuska 			(void) fprintf(stderr, "error: too few draid "
1162716fd348SMartin Matuska 			    "children (%d) for stripe width (%d)\n",
1163716fd348SMartin Matuska 			    zo->zo_raid_children,
1164716fd348SMartin Matuska 			    zo->zo_draid_data + zo->zo_raid_parity);
1165716fd348SMartin Matuska 			usage(B_FALSE);
1166716fd348SMartin Matuska 		}
1167716fd348SMartin Matuska 
1168716fd348SMartin Matuska 		(void) strlcpy(zo->zo_raid_type, VDEV_TYPE_DRAID,
1169716fd348SMartin Matuska 		    sizeof (zo->zo_raid_type));
1170716fd348SMartin Matuska 
1171e716630dSMartin Matuska 	} else if (strcmp(raid_kind, "eraidz") == 0) {
1172e716630dSMartin Matuska 		/* using eraidz (expandable raidz) */
1173e716630dSMartin Matuska 		zo->zo_raid_do_expand = B_TRUE;
1174e716630dSMartin Matuska 
1175e716630dSMartin Matuska 		/* tests expect top-level to be raidz */
1176e716630dSMartin Matuska 		zo->zo_mirrors = 0;
1177e716630dSMartin Matuska 		zo->zo_vdevs = 1;
1178e716630dSMartin Matuska 
1179e716630dSMartin Matuska 		/* Make sure parity is less than data columns */
1180e716630dSMartin Matuska 		zo->zo_raid_parity = MIN(zo->zo_raid_parity,
1181e716630dSMartin Matuska 		    zo->zo_raid_children - 1);
1182e716630dSMartin Matuska 
1183716fd348SMartin Matuska 	} else /* using raidz */ {
1184716fd348SMartin Matuska 		ASSERT0(strcmp(raid_kind, "raidz"));
1185716fd348SMartin Matuska 
1186716fd348SMartin Matuska 		zo->zo_raid_parity = MIN(zo->zo_raid_parity,
1187716fd348SMartin Matuska 		    zo->zo_raid_children - 1);
1188716fd348SMartin Matuska 	}
1189716fd348SMartin Matuska 
1190716fd348SMartin Matuska 	zo->zo_vdevtime =
1191716fd348SMartin Matuska 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
1192716fd348SMartin Matuska 	    UINT64_MAX >> 2);
1193716fd348SMartin Matuska 
1194716fd348SMartin Matuska 	if (*zo->zo_alt_ztest) {
1195716fd348SMartin Matuska 		const char *invalid_what = "ztest";
1196716fd348SMartin Matuska 		char *val = zo->zo_alt_ztest;
1197716fd348SMartin Matuska 		if (0 != access(val, X_OK) ||
119815f0b8c3SMartin Matuska 		    (strrchr(val, '/') == NULL && (errno == EINVAL)))
1199716fd348SMartin Matuska 			goto invalid;
1200716fd348SMartin Matuska 
1201716fd348SMartin Matuska 		int dirlen = strrchr(val, '/') - val;
1202be181ee2SMartin Matuska 		strlcpy(zo->zo_alt_libpath, val,
1203be181ee2SMartin Matuska 		    MIN(sizeof (zo->zo_alt_libpath), dirlen + 1));
1204716fd348SMartin Matuska 		invalid_what = "library path", val = zo->zo_alt_libpath;
120515f0b8c3SMartin Matuska 		if (strrchr(val, '/') == NULL && (errno == EINVAL))
1206716fd348SMartin Matuska 			goto invalid;
1207716fd348SMartin Matuska 		*strrchr(val, '/') = '\0';
1208716fd348SMartin Matuska 		strlcat(val, "/lib", sizeof (zo->zo_alt_libpath));
1209716fd348SMartin Matuska 
1210716fd348SMartin Matuska 		if (0 != access(zo->zo_alt_libpath, X_OK))
1211716fd348SMartin Matuska 			goto invalid;
1212716fd348SMartin Matuska 		return;
1213716fd348SMartin Matuska 
1214716fd348SMartin Matuska invalid:
1215716fd348SMartin Matuska 		ztest_dump_core = B_FALSE;
1216716fd348SMartin Matuska 		fatal(B_TRUE, "invalid alternate %s %s", invalid_what, val);
1217716fd348SMartin Matuska 	}
1218716fd348SMartin Matuska }
1219716fd348SMartin Matuska 
1220716fd348SMartin Matuska static void
1221716fd348SMartin Matuska ztest_kill(ztest_shared_t *zs)
1222716fd348SMartin Matuska {
1223716fd348SMartin Matuska 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
1224716fd348SMartin Matuska 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
1225716fd348SMartin Matuska 
1226716fd348SMartin Matuska 	/*
1227716fd348SMartin Matuska 	 * Before we kill ourselves, make sure that the config is updated.
1228716fd348SMartin Matuska 	 * See comment above spa_write_cachefile().
1229716fd348SMartin Matuska 	 */
1230e716630dSMartin Matuska 	if (raidz_expand_pause_point != RAIDZ_EXPAND_PAUSE_NONE) {
1231e716630dSMartin Matuska 		if (mutex_tryenter(&spa_namespace_lock)) {
1232e716630dSMartin Matuska 			spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE,
1233e716630dSMartin Matuska 			    B_FALSE);
1234e716630dSMartin Matuska 			mutex_exit(&spa_namespace_lock);
1235e716630dSMartin Matuska 
1236e716630dSMartin Matuska 			ztest_scratch_state->zs_raidz_scratch_verify_pause =
1237e716630dSMartin Matuska 			    raidz_expand_pause_point;
1238e716630dSMartin Matuska 		} else {
1239e716630dSMartin Matuska 			/*
1240e716630dSMartin Matuska 			 * Do not verify scratch object in case if
1241e716630dSMartin Matuska 			 * spa_namespace_lock cannot be acquired,
1242e716630dSMartin Matuska 			 * it can cause deadlock in spa_config_update().
1243e716630dSMartin Matuska 			 */
1244e716630dSMartin Matuska 			raidz_expand_pause_point = RAIDZ_EXPAND_PAUSE_NONE;
1245e716630dSMartin Matuska 
1246e716630dSMartin Matuska 			return;
1247e716630dSMartin Matuska 		}
1248e716630dSMartin Matuska 	} else {
1249716fd348SMartin Matuska 		mutex_enter(&spa_namespace_lock);
1250be181ee2SMartin Matuska 		spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE);
1251716fd348SMartin Matuska 		mutex_exit(&spa_namespace_lock);
1252e716630dSMartin Matuska 	}
1253716fd348SMartin Matuska 
1254716fd348SMartin Matuska 	(void) raise(SIGKILL);
1255716fd348SMartin Matuska }
1256716fd348SMartin Matuska 
1257716fd348SMartin Matuska static void
1258716fd348SMartin Matuska ztest_record_enospc(const char *s)
1259716fd348SMartin Matuska {
1260716fd348SMartin Matuska 	(void) s;
1261716fd348SMartin Matuska 	ztest_shared->zs_enospc_count++;
1262716fd348SMartin Matuska }
1263716fd348SMartin Matuska 
1264716fd348SMartin Matuska static uint64_t
1265716fd348SMartin Matuska ztest_get_ashift(void)
1266716fd348SMartin Matuska {
1267716fd348SMartin Matuska 	if (ztest_opts.zo_ashift == 0)
1268716fd348SMartin Matuska 		return (SPA_MINBLOCKSHIFT + ztest_random(5));
1269716fd348SMartin Matuska 	return (ztest_opts.zo_ashift);
1270716fd348SMartin Matuska }
1271716fd348SMartin Matuska 
1272716fd348SMartin Matuska static boolean_t
1273716fd348SMartin Matuska ztest_is_draid_spare(const char *name)
1274716fd348SMartin Matuska {
1275716fd348SMartin Matuska 	uint64_t spare_id = 0, parity = 0, vdev_id = 0;
1276716fd348SMartin Matuska 
1277716fd348SMartin Matuska 	if (sscanf(name, VDEV_TYPE_DRAID "%"PRIu64"-%"PRIu64"-%"PRIu64"",
1278716fd348SMartin Matuska 	    &parity, &vdev_id, &spare_id) == 3) {
1279716fd348SMartin Matuska 		return (B_TRUE);
1280716fd348SMartin Matuska 	}
1281716fd348SMartin Matuska 
1282716fd348SMartin Matuska 	return (B_FALSE);
1283716fd348SMartin Matuska }
1284716fd348SMartin Matuska 
1285716fd348SMartin Matuska static nvlist_t *
1286a0b956f5SMartin Matuska make_vdev_file(const char *path, const char *aux, const char *pool,
1287a0b956f5SMartin Matuska     size_t size, uint64_t ashift)
1288716fd348SMartin Matuska {
1289a0b956f5SMartin Matuska 	char *pathbuf = NULL;
1290716fd348SMartin Matuska 	uint64_t vdev;
1291716fd348SMartin Matuska 	nvlist_t *file;
1292716fd348SMartin Matuska 	boolean_t draid_spare = B_FALSE;
1293716fd348SMartin Matuska 
1294716fd348SMartin Matuska 
1295716fd348SMartin Matuska 	if (ashift == 0)
1296716fd348SMartin Matuska 		ashift = ztest_get_ashift();
1297716fd348SMartin Matuska 
1298716fd348SMartin Matuska 	if (path == NULL) {
1299a0b956f5SMartin Matuska 		pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
1300716fd348SMartin Matuska 		path = pathbuf;
1301716fd348SMartin Matuska 
1302716fd348SMartin Matuska 		if (aux != NULL) {
1303716fd348SMartin Matuska 			vdev = ztest_shared->zs_vdev_aux;
1304a0b956f5SMartin Matuska 			(void) snprintf(pathbuf, MAXPATHLEN,
1305716fd348SMartin Matuska 			    ztest_aux_template, ztest_opts.zo_dir,
1306716fd348SMartin Matuska 			    pool == NULL ? ztest_opts.zo_pool : pool,
1307716fd348SMartin Matuska 			    aux, vdev);
1308716fd348SMartin Matuska 		} else {
1309716fd348SMartin Matuska 			vdev = ztest_shared->zs_vdev_next_leaf++;
1310a0b956f5SMartin Matuska 			(void) snprintf(pathbuf, MAXPATHLEN,
1311716fd348SMartin Matuska 			    ztest_dev_template, ztest_opts.zo_dir,
1312716fd348SMartin Matuska 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
1313716fd348SMartin Matuska 		}
1314716fd348SMartin Matuska 	} else {
1315716fd348SMartin Matuska 		draid_spare = ztest_is_draid_spare(path);
1316716fd348SMartin Matuska 	}
1317716fd348SMartin Matuska 
1318716fd348SMartin Matuska 	if (size != 0 && !draid_spare) {
1319716fd348SMartin Matuska 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
1320716fd348SMartin Matuska 		if (fd == -1)
1321716fd348SMartin Matuska 			fatal(B_TRUE, "can't open %s", path);
1322716fd348SMartin Matuska 		if (ftruncate(fd, size) != 0)
1323716fd348SMartin Matuska 			fatal(B_TRUE, "can't ftruncate %s", path);
1324716fd348SMartin Matuska 		(void) close(fd);
1325716fd348SMartin Matuska 	}
1326716fd348SMartin Matuska 
1327716fd348SMartin Matuska 	file = fnvlist_alloc();
1328716fd348SMartin Matuska 	fnvlist_add_string(file, ZPOOL_CONFIG_TYPE,
1329716fd348SMartin Matuska 	    draid_spare ? VDEV_TYPE_DRAID_SPARE : VDEV_TYPE_FILE);
1330716fd348SMartin Matuska 	fnvlist_add_string(file, ZPOOL_CONFIG_PATH, path);
1331716fd348SMartin Matuska 	fnvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift);
1332716fd348SMartin Matuska 	umem_free(pathbuf, MAXPATHLEN);
1333716fd348SMartin Matuska 
1334716fd348SMartin Matuska 	return (file);
1335716fd348SMartin Matuska }
1336716fd348SMartin Matuska 
1337716fd348SMartin Matuska static nvlist_t *
1338a0b956f5SMartin Matuska make_vdev_raid(const char *path, const char *aux, const char *pool, size_t size,
1339716fd348SMartin Matuska     uint64_t ashift, int r)
1340716fd348SMartin Matuska {
1341716fd348SMartin Matuska 	nvlist_t *raid, **child;
1342716fd348SMartin Matuska 	int c;
1343716fd348SMartin Matuska 
1344716fd348SMartin Matuska 	if (r < 2)
1345716fd348SMartin Matuska 		return (make_vdev_file(path, aux, pool, size, ashift));
1346716fd348SMartin Matuska 	child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
1347716fd348SMartin Matuska 
1348716fd348SMartin Matuska 	for (c = 0; c < r; c++)
1349716fd348SMartin Matuska 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
1350716fd348SMartin Matuska 
1351716fd348SMartin Matuska 	raid = fnvlist_alloc();
1352716fd348SMartin Matuska 	fnvlist_add_string(raid, ZPOOL_CONFIG_TYPE,
1353716fd348SMartin Matuska 	    ztest_opts.zo_raid_type);
1354716fd348SMartin Matuska 	fnvlist_add_uint64(raid, ZPOOL_CONFIG_NPARITY,
1355716fd348SMartin Matuska 	    ztest_opts.zo_raid_parity);
1356716fd348SMartin Matuska 	fnvlist_add_nvlist_array(raid, ZPOOL_CONFIG_CHILDREN,
1357716fd348SMartin Matuska 	    (const nvlist_t **)child, r);
1358716fd348SMartin Matuska 
1359716fd348SMartin Matuska 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) {
1360716fd348SMartin Matuska 		uint64_t ndata = ztest_opts.zo_draid_data;
1361716fd348SMartin Matuska 		uint64_t nparity = ztest_opts.zo_raid_parity;
1362716fd348SMartin Matuska 		uint64_t nspares = ztest_opts.zo_draid_spares;
1363716fd348SMartin Matuska 		uint64_t children = ztest_opts.zo_raid_children;
1364716fd348SMartin Matuska 		uint64_t ngroups = 1;
1365716fd348SMartin Matuska 
1366716fd348SMartin Matuska 		/*
1367716fd348SMartin Matuska 		 * Calculate the minimum number of groups required to fill a
1368716fd348SMartin Matuska 		 * slice. This is the LCM of the stripe width (data + parity)
1369716fd348SMartin Matuska 		 * and the number of data drives (children - spares).
1370716fd348SMartin Matuska 		 */
1371716fd348SMartin Matuska 		while (ngroups * (ndata + nparity) % (children - nspares) != 0)
1372716fd348SMartin Matuska 			ngroups++;
1373716fd348SMartin Matuska 
1374716fd348SMartin Matuska 		/* Store the basic dRAID configuration. */
1375716fd348SMartin Matuska 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata);
1376716fd348SMartin Matuska 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
1377716fd348SMartin Matuska 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
1378716fd348SMartin Matuska 	}
1379716fd348SMartin Matuska 
1380716fd348SMartin Matuska 	for (c = 0; c < r; c++)
1381716fd348SMartin Matuska 		fnvlist_free(child[c]);
1382716fd348SMartin Matuska 
1383716fd348SMartin Matuska 	umem_free(child, r * sizeof (nvlist_t *));
1384716fd348SMartin Matuska 
1385716fd348SMartin Matuska 	return (raid);
1386716fd348SMartin Matuska }
1387716fd348SMartin Matuska 
1388716fd348SMartin Matuska static nvlist_t *
1389a0b956f5SMartin Matuska make_vdev_mirror(const char *path, const char *aux, const char *pool,
1390a0b956f5SMartin Matuska     size_t size, uint64_t ashift, int r, int m)
1391716fd348SMartin Matuska {
1392716fd348SMartin Matuska 	nvlist_t *mirror, **child;
1393716fd348SMartin Matuska 	int c;
1394716fd348SMartin Matuska 
1395716fd348SMartin Matuska 	if (m < 1)
1396716fd348SMartin Matuska 		return (make_vdev_raid(path, aux, pool, size, ashift, r));
1397716fd348SMartin Matuska 
1398716fd348SMartin Matuska 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
1399716fd348SMartin Matuska 
1400716fd348SMartin Matuska 	for (c = 0; c < m; c++)
1401716fd348SMartin Matuska 		child[c] = make_vdev_raid(path, aux, pool, size, ashift, r);
1402716fd348SMartin Matuska 
1403716fd348SMartin Matuska 	mirror = fnvlist_alloc();
1404716fd348SMartin Matuska 	fnvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, VDEV_TYPE_MIRROR);
1405716fd348SMartin Matuska 	fnvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
1406716fd348SMartin Matuska 	    (const nvlist_t **)child, m);
1407716fd348SMartin Matuska 
1408716fd348SMartin Matuska 	for (c = 0; c < m; c++)
1409716fd348SMartin Matuska 		fnvlist_free(child[c]);
1410716fd348SMartin Matuska 
1411716fd348SMartin Matuska 	umem_free(child, m * sizeof (nvlist_t *));
1412716fd348SMartin Matuska 
1413716fd348SMartin Matuska 	return (mirror);
1414716fd348SMartin Matuska }
1415716fd348SMartin Matuska 
1416716fd348SMartin Matuska static nvlist_t *
1417a0b956f5SMartin Matuska make_vdev_root(const char *path, const char *aux, const char *pool, size_t size,
1418a0b956f5SMartin Matuska     uint64_t ashift, const char *class, int r, int m, int t)
1419716fd348SMartin Matuska {
1420716fd348SMartin Matuska 	nvlist_t *root, **child;
1421716fd348SMartin Matuska 	int c;
1422716fd348SMartin Matuska 	boolean_t log;
1423716fd348SMartin Matuska 
1424716fd348SMartin Matuska 	ASSERT3S(t, >, 0);
1425716fd348SMartin Matuska 
1426716fd348SMartin Matuska 	log = (class != NULL && strcmp(class, "log") == 0);
1427716fd348SMartin Matuska 
1428716fd348SMartin Matuska 	child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
1429716fd348SMartin Matuska 
1430716fd348SMartin Matuska 	for (c = 0; c < t; c++) {
1431716fd348SMartin Matuska 		child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
1432716fd348SMartin Matuska 		    r, m);
1433716fd348SMartin Matuska 		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, log);
1434716fd348SMartin Matuska 
1435716fd348SMartin Matuska 		if (class != NULL && class[0] != '\0') {
1436716fd348SMartin Matuska 			ASSERT(m > 1 || log);   /* expecting a mirror */
1437716fd348SMartin Matuska 			fnvlist_add_string(child[c],
1438716fd348SMartin Matuska 			    ZPOOL_CONFIG_ALLOCATION_BIAS, class);
1439716fd348SMartin Matuska 		}
1440716fd348SMartin Matuska 	}
1441716fd348SMartin Matuska 
1442716fd348SMartin Matuska 	root = fnvlist_alloc();
1443716fd348SMartin Matuska 	fnvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
1444716fd348SMartin Matuska 	fnvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
1445716fd348SMartin Matuska 	    (const nvlist_t **)child, t);
1446716fd348SMartin Matuska 
1447716fd348SMartin Matuska 	for (c = 0; c < t; c++)
1448716fd348SMartin Matuska 		fnvlist_free(child[c]);
1449716fd348SMartin Matuska 
1450716fd348SMartin Matuska 	umem_free(child, t * sizeof (nvlist_t *));
1451716fd348SMartin Matuska 
1452716fd348SMartin Matuska 	return (root);
1453716fd348SMartin Matuska }
1454716fd348SMartin Matuska 
1455716fd348SMartin Matuska /*
1456716fd348SMartin Matuska  * Find a random spa version. Returns back a random spa version in the
1457716fd348SMartin Matuska  * range [initial_version, SPA_VERSION_FEATURES].
1458716fd348SMartin Matuska  */
1459716fd348SMartin Matuska static uint64_t
1460716fd348SMartin Matuska ztest_random_spa_version(uint64_t initial_version)
1461716fd348SMartin Matuska {
1462716fd348SMartin Matuska 	uint64_t version = initial_version;
1463716fd348SMartin Matuska 
1464716fd348SMartin Matuska 	if (version <= SPA_VERSION_BEFORE_FEATURES) {
1465716fd348SMartin Matuska 		version = version +
1466716fd348SMartin Matuska 		    ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
1467716fd348SMartin Matuska 	}
1468716fd348SMartin Matuska 
1469716fd348SMartin Matuska 	if (version > SPA_VERSION_BEFORE_FEATURES)
1470716fd348SMartin Matuska 		version = SPA_VERSION_FEATURES;
1471716fd348SMartin Matuska 
1472716fd348SMartin Matuska 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
1473716fd348SMartin Matuska 	return (version);
1474716fd348SMartin Matuska }
1475716fd348SMartin Matuska 
1476716fd348SMartin Matuska static int
1477716fd348SMartin Matuska ztest_random_blocksize(void)
1478716fd348SMartin Matuska {
1479716fd348SMartin Matuska 	ASSERT3U(ztest_spa->spa_max_ashift, !=, 0);
1480716fd348SMartin Matuska 
1481716fd348SMartin Matuska 	/*
1482716fd348SMartin Matuska 	 * Choose a block size >= the ashift.
1483716fd348SMartin Matuska 	 * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
1484716fd348SMartin Matuska 	 */
1485716fd348SMartin Matuska 	int maxbs = SPA_OLD_MAXBLOCKSHIFT;
1486716fd348SMartin Matuska 	if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
1487716fd348SMartin Matuska 		maxbs = 20;
1488716fd348SMartin Matuska 	uint64_t block_shift =
1489716fd348SMartin Matuska 	    ztest_random(maxbs - ztest_spa->spa_max_ashift + 1);
1490716fd348SMartin Matuska 	return (1 << (SPA_MINBLOCKSHIFT + block_shift));
1491716fd348SMartin Matuska }
1492716fd348SMartin Matuska 
1493716fd348SMartin Matuska static int
1494716fd348SMartin Matuska ztest_random_dnodesize(void)
1495716fd348SMartin Matuska {
1496716fd348SMartin Matuska 	int slots;
1497716fd348SMartin Matuska 	int max_slots = spa_maxdnodesize(ztest_spa) >> DNODE_SHIFT;
1498716fd348SMartin Matuska 
1499716fd348SMartin Matuska 	if (max_slots == DNODE_MIN_SLOTS)
1500716fd348SMartin Matuska 		return (DNODE_MIN_SIZE);
1501716fd348SMartin Matuska 
1502716fd348SMartin Matuska 	/*
1503716fd348SMartin Matuska 	 * Weight the random distribution more heavily toward smaller
1504716fd348SMartin Matuska 	 * dnode sizes since that is more likely to reflect real-world
1505716fd348SMartin Matuska 	 * usage.
1506716fd348SMartin Matuska 	 */
1507716fd348SMartin Matuska 	ASSERT3U(max_slots, >, 4);
1508716fd348SMartin Matuska 	switch (ztest_random(10)) {
1509716fd348SMartin Matuska 	case 0:
1510716fd348SMartin Matuska 		slots = 5 + ztest_random(max_slots - 4);
1511716fd348SMartin Matuska 		break;
1512716fd348SMartin Matuska 	case 1 ... 4:
1513716fd348SMartin Matuska 		slots = 2 + ztest_random(3);
1514716fd348SMartin Matuska 		break;
1515716fd348SMartin Matuska 	default:
1516716fd348SMartin Matuska 		slots = 1;
1517716fd348SMartin Matuska 		break;
1518716fd348SMartin Matuska 	}
1519716fd348SMartin Matuska 
1520716fd348SMartin Matuska 	return (slots << DNODE_SHIFT);
1521716fd348SMartin Matuska }
1522716fd348SMartin Matuska 
1523716fd348SMartin Matuska static int
1524716fd348SMartin Matuska ztest_random_ibshift(void)
1525716fd348SMartin Matuska {
1526716fd348SMartin Matuska 	return (DN_MIN_INDBLKSHIFT +
1527716fd348SMartin Matuska 	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
1528716fd348SMartin Matuska }
1529716fd348SMartin Matuska 
1530716fd348SMartin Matuska static uint64_t
1531716fd348SMartin Matuska ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
1532716fd348SMartin Matuska {
1533716fd348SMartin Matuska 	uint64_t top;
1534716fd348SMartin Matuska 	vdev_t *rvd = spa->spa_root_vdev;
1535716fd348SMartin Matuska 	vdev_t *tvd;
1536716fd348SMartin Matuska 
1537716fd348SMartin Matuska 	ASSERT3U(spa_config_held(spa, SCL_ALL, RW_READER), !=, 0);
1538716fd348SMartin Matuska 
1539716fd348SMartin Matuska 	do {
1540716fd348SMartin Matuska 		top = ztest_random(rvd->vdev_children);
1541716fd348SMartin Matuska 		tvd = rvd->vdev_child[top];
1542716fd348SMartin Matuska 	} while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
1543716fd348SMartin Matuska 	    tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
1544716fd348SMartin Matuska 
1545716fd348SMartin Matuska 	return (top);
1546716fd348SMartin Matuska }
1547716fd348SMartin Matuska 
1548716fd348SMartin Matuska static uint64_t
1549716fd348SMartin Matuska ztest_random_dsl_prop(zfs_prop_t prop)
1550716fd348SMartin Matuska {
1551716fd348SMartin Matuska 	uint64_t value;
1552716fd348SMartin Matuska 
1553716fd348SMartin Matuska 	do {
1554716fd348SMartin Matuska 		value = zfs_prop_random_value(prop, ztest_random(-1ULL));
1555716fd348SMartin Matuska 	} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
1556716fd348SMartin Matuska 
1557716fd348SMartin Matuska 	return (value);
1558716fd348SMartin Matuska }
1559716fd348SMartin Matuska 
1560716fd348SMartin Matuska static int
1561716fd348SMartin Matuska ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
1562716fd348SMartin Matuska     boolean_t inherit)
1563716fd348SMartin Matuska {
1564716fd348SMartin Matuska 	const char *propname = zfs_prop_to_name(prop);
1565716fd348SMartin Matuska 	const char *valname;
1566716fd348SMartin Matuska 	char *setpoint;
1567716fd348SMartin Matuska 	uint64_t curval;
1568716fd348SMartin Matuska 	int error;
1569716fd348SMartin Matuska 
1570716fd348SMartin Matuska 	error = dsl_prop_set_int(osname, propname,
1571716fd348SMartin Matuska 	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
1572716fd348SMartin Matuska 
1573716fd348SMartin Matuska 	if (error == ENOSPC) {
1574716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
1575716fd348SMartin Matuska 		return (error);
1576716fd348SMartin Matuska 	}
1577716fd348SMartin Matuska 	ASSERT0(error);
1578716fd348SMartin Matuska 
1579716fd348SMartin Matuska 	setpoint = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
1580716fd348SMartin Matuska 	VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
1581716fd348SMartin Matuska 
1582716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6) {
1583716fd348SMartin Matuska 		int err;
1584716fd348SMartin Matuska 
1585716fd348SMartin Matuska 		err = zfs_prop_index_to_string(prop, curval, &valname);
1586716fd348SMartin Matuska 		if (err)
1587716fd348SMartin Matuska 			(void) printf("%s %s = %llu at '%s'\n", osname,
1588716fd348SMartin Matuska 			    propname, (unsigned long long)curval, setpoint);
1589716fd348SMartin Matuska 		else
1590716fd348SMartin Matuska 			(void) printf("%s %s = %s at '%s'\n",
1591716fd348SMartin Matuska 			    osname, propname, valname, setpoint);
1592716fd348SMartin Matuska 	}
1593716fd348SMartin Matuska 	umem_free(setpoint, MAXPATHLEN);
1594716fd348SMartin Matuska 
1595716fd348SMartin Matuska 	return (error);
1596716fd348SMartin Matuska }
1597716fd348SMartin Matuska 
1598716fd348SMartin Matuska static int
1599716fd348SMartin Matuska ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
1600716fd348SMartin Matuska {
1601716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
1602716fd348SMartin Matuska 	nvlist_t *props = NULL;
1603716fd348SMartin Matuska 	int error;
1604716fd348SMartin Matuska 
1605716fd348SMartin Matuska 	props = fnvlist_alloc();
1606716fd348SMartin Matuska 	fnvlist_add_uint64(props, zpool_prop_to_name(prop), value);
1607716fd348SMartin Matuska 
1608716fd348SMartin Matuska 	error = spa_prop_set(spa, props);
1609716fd348SMartin Matuska 
1610716fd348SMartin Matuska 	fnvlist_free(props);
1611716fd348SMartin Matuska 
1612716fd348SMartin Matuska 	if (error == ENOSPC) {
1613716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
1614716fd348SMartin Matuska 		return (error);
1615716fd348SMartin Matuska 	}
1616716fd348SMartin Matuska 	ASSERT0(error);
1617716fd348SMartin Matuska 
1618716fd348SMartin Matuska 	return (error);
1619716fd348SMartin Matuska }
1620716fd348SMartin Matuska 
1621716fd348SMartin Matuska static int
1622716fd348SMartin Matuska ztest_dmu_objset_own(const char *name, dmu_objset_type_t type,
1623a0b956f5SMartin Matuska     boolean_t readonly, boolean_t decrypt, const void *tag, objset_t **osp)
1624716fd348SMartin Matuska {
1625716fd348SMartin Matuska 	int err;
1626716fd348SMartin Matuska 	char *cp = NULL;
1627716fd348SMartin Matuska 	char ddname[ZFS_MAX_DATASET_NAME_LEN];
1628716fd348SMartin Matuska 
1629be181ee2SMartin Matuska 	strlcpy(ddname, name, sizeof (ddname));
1630716fd348SMartin Matuska 	cp = strchr(ddname, '@');
1631716fd348SMartin Matuska 	if (cp != NULL)
1632716fd348SMartin Matuska 		*cp = '\0';
1633716fd348SMartin Matuska 
1634716fd348SMartin Matuska 	err = dmu_objset_own(name, type, readonly, decrypt, tag, osp);
1635716fd348SMartin Matuska 	while (decrypt && err == EACCES) {
1636716fd348SMartin Matuska 		dsl_crypto_params_t *dcp;
1637716fd348SMartin Matuska 		nvlist_t *crypto_args = fnvlist_alloc();
1638716fd348SMartin Matuska 
1639716fd348SMartin Matuska 		fnvlist_add_uint8_array(crypto_args, "wkeydata",
1640716fd348SMartin Matuska 		    (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN);
1641716fd348SMartin Matuska 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
1642716fd348SMartin Matuska 		    crypto_args, &dcp));
1643716fd348SMartin Matuska 		err = spa_keystore_load_wkey(ddname, dcp, B_FALSE);
1644716fd348SMartin Matuska 		/*
1645716fd348SMartin Matuska 		 * Note: if there was an error loading, the wkey was not
1646716fd348SMartin Matuska 		 * consumed, and needs to be freed.
1647716fd348SMartin Matuska 		 */
1648716fd348SMartin Matuska 		dsl_crypto_params_free(dcp, (err != 0));
1649716fd348SMartin Matuska 		fnvlist_free(crypto_args);
1650716fd348SMartin Matuska 
1651716fd348SMartin Matuska 		if (err == EINVAL) {
1652716fd348SMartin Matuska 			/*
1653716fd348SMartin Matuska 			 * We couldn't load a key for this dataset so try
1654716fd348SMartin Matuska 			 * the parent. This loop will eventually hit the
1655716fd348SMartin Matuska 			 * encryption root since ztest only makes clones
1656716fd348SMartin Matuska 			 * as children of their origin datasets.
1657716fd348SMartin Matuska 			 */
1658716fd348SMartin Matuska 			cp = strrchr(ddname, '/');
1659716fd348SMartin Matuska 			if (cp == NULL)
1660716fd348SMartin Matuska 				return (err);
1661716fd348SMartin Matuska 
1662716fd348SMartin Matuska 			*cp = '\0';
1663716fd348SMartin Matuska 			err = EACCES;
1664716fd348SMartin Matuska 			continue;
1665716fd348SMartin Matuska 		} else if (err != 0) {
1666716fd348SMartin Matuska 			break;
1667716fd348SMartin Matuska 		}
1668716fd348SMartin Matuska 
1669716fd348SMartin Matuska 		err = dmu_objset_own(name, type, readonly, decrypt, tag, osp);
1670716fd348SMartin Matuska 		break;
1671716fd348SMartin Matuska 	}
1672716fd348SMartin Matuska 
1673716fd348SMartin Matuska 	return (err);
1674716fd348SMartin Matuska }
1675716fd348SMartin Matuska 
1676716fd348SMartin Matuska static void
1677716fd348SMartin Matuska ztest_rll_init(rll_t *rll)
1678716fd348SMartin Matuska {
1679716fd348SMartin Matuska 	rll->rll_writer = NULL;
1680716fd348SMartin Matuska 	rll->rll_readers = 0;
1681716fd348SMartin Matuska 	mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL);
1682716fd348SMartin Matuska 	cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL);
1683716fd348SMartin Matuska }
1684716fd348SMartin Matuska 
1685716fd348SMartin Matuska static void
1686716fd348SMartin Matuska ztest_rll_destroy(rll_t *rll)
1687716fd348SMartin Matuska {
1688716fd348SMartin Matuska 	ASSERT3P(rll->rll_writer, ==, NULL);
1689716fd348SMartin Matuska 	ASSERT0(rll->rll_readers);
1690716fd348SMartin Matuska 	mutex_destroy(&rll->rll_lock);
1691716fd348SMartin Matuska 	cv_destroy(&rll->rll_cv);
1692716fd348SMartin Matuska }
1693716fd348SMartin Matuska 
1694716fd348SMartin Matuska static void
1695716fd348SMartin Matuska ztest_rll_lock(rll_t *rll, rl_type_t type)
1696716fd348SMartin Matuska {
1697716fd348SMartin Matuska 	mutex_enter(&rll->rll_lock);
1698716fd348SMartin Matuska 
1699e716630dSMartin Matuska 	if (type == ZTRL_READER) {
1700716fd348SMartin Matuska 		while (rll->rll_writer != NULL)
1701716fd348SMartin Matuska 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
1702716fd348SMartin Matuska 		rll->rll_readers++;
1703716fd348SMartin Matuska 	} else {
1704716fd348SMartin Matuska 		while (rll->rll_writer != NULL || rll->rll_readers)
1705716fd348SMartin Matuska 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
1706716fd348SMartin Matuska 		rll->rll_writer = curthread;
1707716fd348SMartin Matuska 	}
1708716fd348SMartin Matuska 
1709716fd348SMartin Matuska 	mutex_exit(&rll->rll_lock);
1710716fd348SMartin Matuska }
1711716fd348SMartin Matuska 
1712716fd348SMartin Matuska static void
1713716fd348SMartin Matuska ztest_rll_unlock(rll_t *rll)
1714716fd348SMartin Matuska {
1715716fd348SMartin Matuska 	mutex_enter(&rll->rll_lock);
1716716fd348SMartin Matuska 
1717716fd348SMartin Matuska 	if (rll->rll_writer) {
1718716fd348SMartin Matuska 		ASSERT0(rll->rll_readers);
1719716fd348SMartin Matuska 		rll->rll_writer = NULL;
1720716fd348SMartin Matuska 	} else {
1721716fd348SMartin Matuska 		ASSERT3S(rll->rll_readers, >, 0);
1722716fd348SMartin Matuska 		ASSERT3P(rll->rll_writer, ==, NULL);
1723716fd348SMartin Matuska 		rll->rll_readers--;
1724716fd348SMartin Matuska 	}
1725716fd348SMartin Matuska 
1726716fd348SMartin Matuska 	if (rll->rll_writer == NULL && rll->rll_readers == 0)
1727716fd348SMartin Matuska 		cv_broadcast(&rll->rll_cv);
1728716fd348SMartin Matuska 
1729716fd348SMartin Matuska 	mutex_exit(&rll->rll_lock);
1730716fd348SMartin Matuska }
1731716fd348SMartin Matuska 
1732716fd348SMartin Matuska static void
1733716fd348SMartin Matuska ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
1734716fd348SMartin Matuska {
1735716fd348SMartin Matuska 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1736716fd348SMartin Matuska 
1737716fd348SMartin Matuska 	ztest_rll_lock(rll, type);
1738716fd348SMartin Matuska }
1739716fd348SMartin Matuska 
1740716fd348SMartin Matuska static void
1741716fd348SMartin Matuska ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
1742716fd348SMartin Matuska {
1743716fd348SMartin Matuska 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1744716fd348SMartin Matuska 
1745716fd348SMartin Matuska 	ztest_rll_unlock(rll);
1746716fd348SMartin Matuska }
1747716fd348SMartin Matuska 
1748716fd348SMartin Matuska static rl_t *
1749716fd348SMartin Matuska ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
1750716fd348SMartin Matuska     uint64_t size, rl_type_t type)
1751716fd348SMartin Matuska {
1752716fd348SMartin Matuska 	uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
1753716fd348SMartin Matuska 	rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
1754716fd348SMartin Matuska 	rl_t *rl;
1755716fd348SMartin Matuska 
1756716fd348SMartin Matuska 	rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
1757716fd348SMartin Matuska 	rl->rl_object = object;
1758716fd348SMartin Matuska 	rl->rl_offset = offset;
1759716fd348SMartin Matuska 	rl->rl_size = size;
1760716fd348SMartin Matuska 	rl->rl_lock = rll;
1761716fd348SMartin Matuska 
1762716fd348SMartin Matuska 	ztest_rll_lock(rll, type);
1763716fd348SMartin Matuska 
1764716fd348SMartin Matuska 	return (rl);
1765716fd348SMartin Matuska }
1766716fd348SMartin Matuska 
1767716fd348SMartin Matuska static void
1768716fd348SMartin Matuska ztest_range_unlock(rl_t *rl)
1769716fd348SMartin Matuska {
1770716fd348SMartin Matuska 	rll_t *rll = rl->rl_lock;
1771716fd348SMartin Matuska 
1772716fd348SMartin Matuska 	ztest_rll_unlock(rll);
1773716fd348SMartin Matuska 
1774716fd348SMartin Matuska 	umem_free(rl, sizeof (*rl));
1775716fd348SMartin Matuska }
1776716fd348SMartin Matuska 
1777716fd348SMartin Matuska static void
1778716fd348SMartin Matuska ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
1779716fd348SMartin Matuska {
1780716fd348SMartin Matuska 	zd->zd_os = os;
1781716fd348SMartin Matuska 	zd->zd_zilog = dmu_objset_zil(os);
1782716fd348SMartin Matuska 	zd->zd_shared = szd;
1783716fd348SMartin Matuska 	dmu_objset_name(os, zd->zd_name);
1784716fd348SMartin Matuska 	int l;
1785716fd348SMartin Matuska 
1786716fd348SMartin Matuska 	if (zd->zd_shared != NULL)
1787716fd348SMartin Matuska 		zd->zd_shared->zd_seq = 0;
1788716fd348SMartin Matuska 
1789716fd348SMartin Matuska 	VERIFY0(pthread_rwlock_init(&zd->zd_zilog_lock, NULL));
1790716fd348SMartin Matuska 	mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
1791716fd348SMartin Matuska 
1792716fd348SMartin Matuska 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1793716fd348SMartin Matuska 		ztest_rll_init(&zd->zd_object_lock[l]);
1794716fd348SMartin Matuska 
1795716fd348SMartin Matuska 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
1796716fd348SMartin Matuska 		ztest_rll_init(&zd->zd_range_lock[l]);
1797716fd348SMartin Matuska }
1798716fd348SMartin Matuska 
1799716fd348SMartin Matuska static void
1800716fd348SMartin Matuska ztest_zd_fini(ztest_ds_t *zd)
1801716fd348SMartin Matuska {
1802716fd348SMartin Matuska 	int l;
1803716fd348SMartin Matuska 
1804716fd348SMartin Matuska 	mutex_destroy(&zd->zd_dirobj_lock);
1805716fd348SMartin Matuska 	(void) pthread_rwlock_destroy(&zd->zd_zilog_lock);
1806716fd348SMartin Matuska 
1807716fd348SMartin Matuska 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1808716fd348SMartin Matuska 		ztest_rll_destroy(&zd->zd_object_lock[l]);
1809716fd348SMartin Matuska 
1810716fd348SMartin Matuska 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
1811716fd348SMartin Matuska 		ztest_rll_destroy(&zd->zd_range_lock[l]);
1812716fd348SMartin Matuska }
1813716fd348SMartin Matuska 
1814716fd348SMartin Matuska #define	TXG_MIGHTWAIT	(ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
1815716fd348SMartin Matuska 
1816716fd348SMartin Matuska static uint64_t
1817716fd348SMartin Matuska ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
1818716fd348SMartin Matuska {
1819716fd348SMartin Matuska 	uint64_t txg;
1820716fd348SMartin Matuska 	int error;
1821716fd348SMartin Matuska 
1822716fd348SMartin Matuska 	/*
1823716fd348SMartin Matuska 	 * Attempt to assign tx to some transaction group.
1824716fd348SMartin Matuska 	 */
1825716fd348SMartin Matuska 	error = dmu_tx_assign(tx, txg_how);
1826716fd348SMartin Matuska 	if (error) {
1827716fd348SMartin Matuska 		if (error == ERESTART) {
1828716fd348SMartin Matuska 			ASSERT3U(txg_how, ==, TXG_NOWAIT);
1829716fd348SMartin Matuska 			dmu_tx_wait(tx);
1830716fd348SMartin Matuska 		} else {
1831716fd348SMartin Matuska 			ASSERT3U(error, ==, ENOSPC);
1832716fd348SMartin Matuska 			ztest_record_enospc(tag);
1833716fd348SMartin Matuska 		}
1834716fd348SMartin Matuska 		dmu_tx_abort(tx);
1835716fd348SMartin Matuska 		return (0);
1836716fd348SMartin Matuska 	}
1837716fd348SMartin Matuska 	txg = dmu_tx_get_txg(tx);
1838716fd348SMartin Matuska 	ASSERT3U(txg, !=, 0);
1839716fd348SMartin Matuska 	return (txg);
1840716fd348SMartin Matuska }
1841716fd348SMartin Matuska 
1842716fd348SMartin Matuska static void
1843716fd348SMartin Matuska ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
1844716fd348SMartin Matuska     uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
1845716fd348SMartin Matuska     uint64_t crtxg)
1846716fd348SMartin Matuska {
1847716fd348SMartin Matuska 	bt->bt_magic = BT_MAGIC;
1848716fd348SMartin Matuska 	bt->bt_objset = dmu_objset_id(os);
1849716fd348SMartin Matuska 	bt->bt_object = object;
1850716fd348SMartin Matuska 	bt->bt_dnodesize = dnodesize;
1851716fd348SMartin Matuska 	bt->bt_offset = offset;
1852716fd348SMartin Matuska 	bt->bt_gen = gen;
1853716fd348SMartin Matuska 	bt->bt_txg = txg;
1854716fd348SMartin Matuska 	bt->bt_crtxg = crtxg;
1855716fd348SMartin Matuska }
1856716fd348SMartin Matuska 
1857716fd348SMartin Matuska static void
1858716fd348SMartin Matuska ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
1859716fd348SMartin Matuska     uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
1860716fd348SMartin Matuska     uint64_t crtxg)
1861716fd348SMartin Matuska {
1862716fd348SMartin Matuska 	ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
1863716fd348SMartin Matuska 	ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
1864716fd348SMartin Matuska 	ASSERT3U(bt->bt_object, ==, object);
1865716fd348SMartin Matuska 	ASSERT3U(bt->bt_dnodesize, ==, dnodesize);
1866716fd348SMartin Matuska 	ASSERT3U(bt->bt_offset, ==, offset);
1867716fd348SMartin Matuska 	ASSERT3U(bt->bt_gen, <=, gen);
1868716fd348SMartin Matuska 	ASSERT3U(bt->bt_txg, <=, txg);
1869716fd348SMartin Matuska 	ASSERT3U(bt->bt_crtxg, ==, crtxg);
1870716fd348SMartin Matuska }
1871716fd348SMartin Matuska 
1872716fd348SMartin Matuska static ztest_block_tag_t *
1873716fd348SMartin Matuska ztest_bt_bonus(dmu_buf_t *db)
1874716fd348SMartin Matuska {
1875716fd348SMartin Matuska 	dmu_object_info_t doi;
1876716fd348SMartin Matuska 	ztest_block_tag_t *bt;
1877716fd348SMartin Matuska 
1878716fd348SMartin Matuska 	dmu_object_info_from_db(db, &doi);
1879716fd348SMartin Matuska 	ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
1880716fd348SMartin Matuska 	ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
1881716fd348SMartin Matuska 	bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
1882716fd348SMartin Matuska 
1883716fd348SMartin Matuska 	return (bt);
1884716fd348SMartin Matuska }
1885716fd348SMartin Matuska 
1886716fd348SMartin Matuska /*
1887716fd348SMartin Matuska  * Generate a token to fill up unused bonus buffer space.  Try to make
1888716fd348SMartin Matuska  * it unique to the object, generation, and offset to verify that data
1889716fd348SMartin Matuska  * is not getting overwritten by data from other dnodes.
1890716fd348SMartin Matuska  */
1891716fd348SMartin Matuska #define	ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
1892716fd348SMartin Matuska 	(((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
1893716fd348SMartin Matuska 
1894716fd348SMartin Matuska /*
1895716fd348SMartin Matuska  * Fill up the unused bonus buffer region before the block tag with a
1896716fd348SMartin Matuska  * verifiable pattern. Filling the whole bonus area with non-zero data
1897716fd348SMartin Matuska  * helps ensure that all dnode traversal code properly skips the
1898716fd348SMartin Matuska  * interior regions of large dnodes.
1899716fd348SMartin Matuska  */
1900716fd348SMartin Matuska static void
1901716fd348SMartin Matuska ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
1902716fd348SMartin Matuska     objset_t *os, uint64_t gen)
1903716fd348SMartin Matuska {
1904716fd348SMartin Matuska 	uint64_t *bonusp;
1905716fd348SMartin Matuska 
1906716fd348SMartin Matuska 	ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8));
1907716fd348SMartin Matuska 
1908716fd348SMartin Matuska 	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
1909716fd348SMartin Matuska 		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
1910716fd348SMartin Matuska 		    gen, bonusp - (uint64_t *)db->db_data);
1911716fd348SMartin Matuska 		*bonusp = token;
1912716fd348SMartin Matuska 	}
1913716fd348SMartin Matuska }
1914716fd348SMartin Matuska 
1915716fd348SMartin Matuska /*
1916716fd348SMartin Matuska  * Verify that the unused area of a bonus buffer is filled with the
1917716fd348SMartin Matuska  * expected tokens.
1918716fd348SMartin Matuska  */
1919716fd348SMartin Matuska static void
1920716fd348SMartin Matuska ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
1921716fd348SMartin Matuska     objset_t *os, uint64_t gen)
1922716fd348SMartin Matuska {
1923716fd348SMartin Matuska 	uint64_t *bonusp;
1924716fd348SMartin Matuska 
1925716fd348SMartin Matuska 	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
1926716fd348SMartin Matuska 		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
1927716fd348SMartin Matuska 		    gen, bonusp - (uint64_t *)db->db_data);
1928716fd348SMartin Matuska 		VERIFY3U(*bonusp, ==, token);
1929716fd348SMartin Matuska 	}
1930716fd348SMartin Matuska }
1931716fd348SMartin Matuska 
1932716fd348SMartin Matuska /*
1933716fd348SMartin Matuska  * ZIL logging ops
1934716fd348SMartin Matuska  */
1935716fd348SMartin Matuska 
1936716fd348SMartin Matuska #define	lrz_type	lr_mode
1937716fd348SMartin Matuska #define	lrz_blocksize	lr_uid
1938716fd348SMartin Matuska #define	lrz_ibshift	lr_gid
1939716fd348SMartin Matuska #define	lrz_bonustype	lr_rdev
1940716fd348SMartin Matuska #define	lrz_dnodesize	lr_crtime[1]
1941716fd348SMartin Matuska 
1942716fd348SMartin Matuska static void
1943716fd348SMartin Matuska ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
1944716fd348SMartin Matuska {
19457a7741afSMartin Matuska 	char *name = (char *)&lr->lr_data[0];		/* name follows lr */
1946716fd348SMartin Matuska 	size_t namesize = strlen(name) + 1;
1947716fd348SMartin Matuska 	itx_t *itx;
1948716fd348SMartin Matuska 
1949716fd348SMartin Matuska 	if (zil_replaying(zd->zd_zilog, tx))
1950716fd348SMartin Matuska 		return;
1951716fd348SMartin Matuska 
1952716fd348SMartin Matuska 	itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
19537a7741afSMartin Matuska 	memcpy(&itx->itx_lr + 1, &lr->lr_create.lr_common + 1,
1954716fd348SMartin Matuska 	    sizeof (*lr) + namesize - sizeof (lr_t));
1955716fd348SMartin Matuska 
1956716fd348SMartin Matuska 	zil_itx_assign(zd->zd_zilog, itx, tx);
1957716fd348SMartin Matuska }
1958716fd348SMartin Matuska 
1959716fd348SMartin Matuska static void
1960716fd348SMartin Matuska ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
1961716fd348SMartin Matuska {
19627a7741afSMartin Matuska 	char *name = (char *)&lr->lr_data[0];		/* name follows lr */
1963716fd348SMartin Matuska 	size_t namesize = strlen(name) + 1;
1964716fd348SMartin Matuska 	itx_t *itx;
1965716fd348SMartin Matuska 
1966716fd348SMartin Matuska 	if (zil_replaying(zd->zd_zilog, tx))
1967716fd348SMartin Matuska 		return;
1968716fd348SMartin Matuska 
1969716fd348SMartin Matuska 	itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
1970716fd348SMartin Matuska 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
1971716fd348SMartin Matuska 	    sizeof (*lr) + namesize - sizeof (lr_t));
1972716fd348SMartin Matuska 
1973716fd348SMartin Matuska 	itx->itx_oid = object;
1974716fd348SMartin Matuska 	zil_itx_assign(zd->zd_zilog, itx, tx);
1975716fd348SMartin Matuska }
1976716fd348SMartin Matuska 
1977716fd348SMartin Matuska static void
1978716fd348SMartin Matuska ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
1979716fd348SMartin Matuska {
1980716fd348SMartin Matuska 	itx_t *itx;
1981716fd348SMartin Matuska 	itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
1982716fd348SMartin Matuska 
1983716fd348SMartin Matuska 	if (zil_replaying(zd->zd_zilog, tx))
1984716fd348SMartin Matuska 		return;
1985716fd348SMartin Matuska 
19862a58b312SMartin Matuska 	if (lr->lr_length > zil_max_log_data(zd->zd_zilog, sizeof (lr_write_t)))
1987716fd348SMartin Matuska 		write_state = WR_INDIRECT;
1988716fd348SMartin Matuska 
1989716fd348SMartin Matuska 	itx = zil_itx_create(TX_WRITE,
1990716fd348SMartin Matuska 	    sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
1991716fd348SMartin Matuska 
1992716fd348SMartin Matuska 	if (write_state == WR_COPIED &&
1993716fd348SMartin Matuska 	    dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
1994716fd348SMartin Matuska 	    ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
1995716fd348SMartin Matuska 		zil_itx_destroy(itx);
1996716fd348SMartin Matuska 		itx = zil_itx_create(TX_WRITE, sizeof (*lr));
1997716fd348SMartin Matuska 		write_state = WR_NEED_COPY;
1998716fd348SMartin Matuska 	}
1999716fd348SMartin Matuska 	itx->itx_private = zd;
2000716fd348SMartin Matuska 	itx->itx_wr_state = write_state;
2001716fd348SMartin Matuska 	itx->itx_sync = (ztest_random(8) == 0);
2002716fd348SMartin Matuska 
2003716fd348SMartin Matuska 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
2004716fd348SMartin Matuska 	    sizeof (*lr) - sizeof (lr_t));
2005716fd348SMartin Matuska 
2006716fd348SMartin Matuska 	zil_itx_assign(zd->zd_zilog, itx, tx);
2007716fd348SMartin Matuska }
2008716fd348SMartin Matuska 
2009716fd348SMartin Matuska static void
2010716fd348SMartin Matuska ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
2011716fd348SMartin Matuska {
2012716fd348SMartin Matuska 	itx_t *itx;
2013716fd348SMartin Matuska 
2014716fd348SMartin Matuska 	if (zil_replaying(zd->zd_zilog, tx))
2015716fd348SMartin Matuska 		return;
2016716fd348SMartin Matuska 
2017716fd348SMartin Matuska 	itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
2018716fd348SMartin Matuska 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
2019716fd348SMartin Matuska 	    sizeof (*lr) - sizeof (lr_t));
2020716fd348SMartin Matuska 
2021716fd348SMartin Matuska 	itx->itx_sync = B_FALSE;
2022716fd348SMartin Matuska 	zil_itx_assign(zd->zd_zilog, itx, tx);
2023716fd348SMartin Matuska }
2024716fd348SMartin Matuska 
2025716fd348SMartin Matuska static void
2026716fd348SMartin Matuska ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
2027716fd348SMartin Matuska {
2028716fd348SMartin Matuska 	itx_t *itx;
2029716fd348SMartin Matuska 
2030716fd348SMartin Matuska 	if (zil_replaying(zd->zd_zilog, tx))
2031716fd348SMartin Matuska 		return;
2032716fd348SMartin Matuska 
2033716fd348SMartin Matuska 	itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
2034716fd348SMartin Matuska 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
2035716fd348SMartin Matuska 	    sizeof (*lr) - sizeof (lr_t));
2036716fd348SMartin Matuska 
2037716fd348SMartin Matuska 	itx->itx_sync = B_FALSE;
2038716fd348SMartin Matuska 	zil_itx_assign(zd->zd_zilog, itx, tx);
2039716fd348SMartin Matuska }
2040716fd348SMartin Matuska 
2041716fd348SMartin Matuska /*
2042716fd348SMartin Matuska  * ZIL replay ops
2043716fd348SMartin Matuska  */
2044716fd348SMartin Matuska static int
2045716fd348SMartin Matuska ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap)
2046716fd348SMartin Matuska {
2047716fd348SMartin Matuska 	ztest_ds_t *zd = arg1;
20487a7741afSMartin Matuska 	lr_create_t *lrc = arg2;
20497a7741afSMartin Matuska 	_lr_create_t *lr = &lrc->lr_create;
20507a7741afSMartin Matuska 	char *name = (char *)&lrc->lr_data[0];		/* name follows lr */
2051716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2052716fd348SMartin Matuska 	ztest_block_tag_t *bbt;
2053716fd348SMartin Matuska 	dmu_buf_t *db;
2054716fd348SMartin Matuska 	dmu_tx_t *tx;
2055716fd348SMartin Matuska 	uint64_t txg;
2056716fd348SMartin Matuska 	int error = 0;
2057716fd348SMartin Matuska 	int bonuslen;
2058716fd348SMartin Matuska 
2059716fd348SMartin Matuska 	if (byteswap)
2060716fd348SMartin Matuska 		byteswap_uint64_array(lr, sizeof (*lr));
2061716fd348SMartin Matuska 
2062716fd348SMartin Matuska 	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
2063716fd348SMartin Matuska 	ASSERT3S(name[0], !=, '\0');
2064716fd348SMartin Matuska 
2065716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2066716fd348SMartin Matuska 
2067716fd348SMartin Matuska 	dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
2068716fd348SMartin Matuska 
2069716fd348SMartin Matuska 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
2070716fd348SMartin Matuska 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
2071716fd348SMartin Matuska 	} else {
2072716fd348SMartin Matuska 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
2073716fd348SMartin Matuska 	}
2074716fd348SMartin Matuska 
2075716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2076716fd348SMartin Matuska 	if (txg == 0)
2077716fd348SMartin Matuska 		return (ENOSPC);
2078716fd348SMartin Matuska 
2079716fd348SMartin Matuska 	ASSERT3U(dmu_objset_zil(os)->zl_replay, ==, !!lr->lr_foid);
2080716fd348SMartin Matuska 	bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize);
2081716fd348SMartin Matuska 
2082716fd348SMartin Matuska 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
2083716fd348SMartin Matuska 		if (lr->lr_foid == 0) {
2084716fd348SMartin Matuska 			lr->lr_foid = zap_create_dnsize(os,
2085716fd348SMartin Matuska 			    lr->lrz_type, lr->lrz_bonustype,
2086716fd348SMartin Matuska 			    bonuslen, lr->lrz_dnodesize, tx);
2087716fd348SMartin Matuska 		} else {
2088716fd348SMartin Matuska 			error = zap_create_claim_dnsize(os, lr->lr_foid,
2089716fd348SMartin Matuska 			    lr->lrz_type, lr->lrz_bonustype,
2090716fd348SMartin Matuska 			    bonuslen, lr->lrz_dnodesize, tx);
2091716fd348SMartin Matuska 		}
2092716fd348SMartin Matuska 	} else {
2093716fd348SMartin Matuska 		if (lr->lr_foid == 0) {
2094716fd348SMartin Matuska 			lr->lr_foid = dmu_object_alloc_dnsize(os,
2095716fd348SMartin Matuska 			    lr->lrz_type, 0, lr->lrz_bonustype,
2096716fd348SMartin Matuska 			    bonuslen, lr->lrz_dnodesize, tx);
2097716fd348SMartin Matuska 		} else {
2098716fd348SMartin Matuska 			error = dmu_object_claim_dnsize(os, lr->lr_foid,
2099716fd348SMartin Matuska 			    lr->lrz_type, 0, lr->lrz_bonustype,
2100716fd348SMartin Matuska 			    bonuslen, lr->lrz_dnodesize, tx);
2101716fd348SMartin Matuska 		}
2102716fd348SMartin Matuska 	}
2103716fd348SMartin Matuska 
2104716fd348SMartin Matuska 	if (error) {
2105716fd348SMartin Matuska 		ASSERT3U(error, ==, EEXIST);
2106716fd348SMartin Matuska 		ASSERT(zd->zd_zilog->zl_replay);
2107716fd348SMartin Matuska 		dmu_tx_commit(tx);
2108716fd348SMartin Matuska 		return (error);
2109716fd348SMartin Matuska 	}
2110716fd348SMartin Matuska 
2111716fd348SMartin Matuska 	ASSERT3U(lr->lr_foid, !=, 0);
2112716fd348SMartin Matuska 
2113716fd348SMartin Matuska 	if (lr->lrz_type != DMU_OT_ZAP_OTHER)
2114716fd348SMartin Matuska 		VERIFY0(dmu_object_set_blocksize(os, lr->lr_foid,
2115716fd348SMartin Matuska 		    lr->lrz_blocksize, lr->lrz_ibshift, tx));
2116716fd348SMartin Matuska 
2117716fd348SMartin Matuska 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
2118716fd348SMartin Matuska 	bbt = ztest_bt_bonus(db);
2119716fd348SMartin Matuska 	dmu_buf_will_dirty(db, tx);
2120716fd348SMartin Matuska 	ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL,
2121716fd348SMartin Matuska 	    lr->lr_gen, txg, txg);
2122716fd348SMartin Matuska 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen);
2123716fd348SMartin Matuska 	dmu_buf_rele(db, FTAG);
2124716fd348SMartin Matuska 
2125716fd348SMartin Matuska 	VERIFY0(zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
2126716fd348SMartin Matuska 	    &lr->lr_foid, tx));
2127716fd348SMartin Matuska 
21287a7741afSMartin Matuska 	(void) ztest_log_create(zd, tx, lrc);
2129716fd348SMartin Matuska 
2130716fd348SMartin Matuska 	dmu_tx_commit(tx);
2131716fd348SMartin Matuska 
2132716fd348SMartin Matuska 	return (0);
2133716fd348SMartin Matuska }
2134716fd348SMartin Matuska 
2135716fd348SMartin Matuska static int
2136716fd348SMartin Matuska ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
2137716fd348SMartin Matuska {
2138716fd348SMartin Matuska 	ztest_ds_t *zd = arg1;
2139716fd348SMartin Matuska 	lr_remove_t *lr = arg2;
21407a7741afSMartin Matuska 	char *name = (char *)&lr->lr_data[0];		/* name follows lr */
2141716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2142716fd348SMartin Matuska 	dmu_object_info_t doi;
2143716fd348SMartin Matuska 	dmu_tx_t *tx;
2144716fd348SMartin Matuska 	uint64_t object, txg;
2145716fd348SMartin Matuska 
2146716fd348SMartin Matuska 	if (byteswap)
2147716fd348SMartin Matuska 		byteswap_uint64_array(lr, sizeof (*lr));
2148716fd348SMartin Matuska 
2149716fd348SMartin Matuska 	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
2150716fd348SMartin Matuska 	ASSERT3S(name[0], !=, '\0');
2151716fd348SMartin Matuska 
2152716fd348SMartin Matuska 	VERIFY0(
2153716fd348SMartin Matuska 	    zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
2154716fd348SMartin Matuska 	ASSERT3U(object, !=, 0);
2155716fd348SMartin Matuska 
2156e716630dSMartin Matuska 	ztest_object_lock(zd, object, ZTRL_WRITER);
2157716fd348SMartin Matuska 
2158716fd348SMartin Matuska 	VERIFY0(dmu_object_info(os, object, &doi));
2159716fd348SMartin Matuska 
2160716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2161716fd348SMartin Matuska 
2162716fd348SMartin Matuska 	dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
2163716fd348SMartin Matuska 	dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
2164716fd348SMartin Matuska 
2165716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2166716fd348SMartin Matuska 	if (txg == 0) {
2167716fd348SMartin Matuska 		ztest_object_unlock(zd, object);
2168716fd348SMartin Matuska 		return (ENOSPC);
2169716fd348SMartin Matuska 	}
2170716fd348SMartin Matuska 
2171716fd348SMartin Matuska 	if (doi.doi_type == DMU_OT_ZAP_OTHER) {
2172716fd348SMartin Matuska 		VERIFY0(zap_destroy(os, object, tx));
2173716fd348SMartin Matuska 	} else {
2174716fd348SMartin Matuska 		VERIFY0(dmu_object_free(os, object, tx));
2175716fd348SMartin Matuska 	}
2176716fd348SMartin Matuska 
2177716fd348SMartin Matuska 	VERIFY0(zap_remove(os, lr->lr_doid, name, tx));
2178716fd348SMartin Matuska 
2179716fd348SMartin Matuska 	(void) ztest_log_remove(zd, tx, lr, object);
2180716fd348SMartin Matuska 
2181716fd348SMartin Matuska 	dmu_tx_commit(tx);
2182716fd348SMartin Matuska 
2183716fd348SMartin Matuska 	ztest_object_unlock(zd, object);
2184716fd348SMartin Matuska 
2185716fd348SMartin Matuska 	return (0);
2186716fd348SMartin Matuska }
2187716fd348SMartin Matuska 
2188716fd348SMartin Matuska static int
2189716fd348SMartin Matuska ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
2190716fd348SMartin Matuska {
2191716fd348SMartin Matuska 	ztest_ds_t *zd = arg1;
2192716fd348SMartin Matuska 	lr_write_t *lr = arg2;
2193716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
21947a7741afSMartin Matuska 	uint8_t *data = &lr->lr_data[0];		/* data follows lr */
2195716fd348SMartin Matuska 	uint64_t offset, length;
21967a7741afSMartin Matuska 	ztest_block_tag_t *bt = (ztest_block_tag_t *)data;
2197716fd348SMartin Matuska 	ztest_block_tag_t *bbt;
2198716fd348SMartin Matuska 	uint64_t gen, txg, lrtxg, crtxg;
2199716fd348SMartin Matuska 	dmu_object_info_t doi;
2200716fd348SMartin Matuska 	dmu_tx_t *tx;
2201716fd348SMartin Matuska 	dmu_buf_t *db;
2202716fd348SMartin Matuska 	arc_buf_t *abuf = NULL;
2203716fd348SMartin Matuska 	rl_t *rl;
2204716fd348SMartin Matuska 
2205716fd348SMartin Matuska 	if (byteswap)
2206716fd348SMartin Matuska 		byteswap_uint64_array(lr, sizeof (*lr));
2207716fd348SMartin Matuska 
2208716fd348SMartin Matuska 	offset = lr->lr_offset;
2209716fd348SMartin Matuska 	length = lr->lr_length;
2210716fd348SMartin Matuska 
2211716fd348SMartin Matuska 	/* If it's a dmu_sync() block, write the whole block */
2212716fd348SMartin Matuska 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
2213716fd348SMartin Matuska 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
2214716fd348SMartin Matuska 		if (length < blocksize) {
2215716fd348SMartin Matuska 			offset -= offset % blocksize;
2216716fd348SMartin Matuska 			length = blocksize;
2217716fd348SMartin Matuska 		}
2218716fd348SMartin Matuska 	}
2219716fd348SMartin Matuska 
2220716fd348SMartin Matuska 	if (bt->bt_magic == BSWAP_64(BT_MAGIC))
2221716fd348SMartin Matuska 		byteswap_uint64_array(bt, sizeof (*bt));
2222716fd348SMartin Matuska 
2223716fd348SMartin Matuska 	if (bt->bt_magic != BT_MAGIC)
2224716fd348SMartin Matuska 		bt = NULL;
2225716fd348SMartin Matuska 
2226e716630dSMartin Matuska 	ztest_object_lock(zd, lr->lr_foid, ZTRL_READER);
2227e716630dSMartin Matuska 	rl = ztest_range_lock(zd, lr->lr_foid, offset, length, ZTRL_WRITER);
2228716fd348SMartin Matuska 
2229716fd348SMartin Matuska 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
2230716fd348SMartin Matuska 
2231716fd348SMartin Matuska 	dmu_object_info_from_db(db, &doi);
2232716fd348SMartin Matuska 
2233716fd348SMartin Matuska 	bbt = ztest_bt_bonus(db);
2234716fd348SMartin Matuska 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
2235716fd348SMartin Matuska 	gen = bbt->bt_gen;
2236716fd348SMartin Matuska 	crtxg = bbt->bt_crtxg;
2237716fd348SMartin Matuska 	lrtxg = lr->lr_common.lrc_txg;
2238716fd348SMartin Matuska 
2239716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2240716fd348SMartin Matuska 
2241716fd348SMartin Matuska 	dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
2242716fd348SMartin Matuska 
2243716fd348SMartin Matuska 	if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
2244716fd348SMartin Matuska 	    P2PHASE(offset, length) == 0)
2245716fd348SMartin Matuska 		abuf = dmu_request_arcbuf(db, length);
2246716fd348SMartin Matuska 
2247716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2248716fd348SMartin Matuska 	if (txg == 0) {
2249716fd348SMartin Matuska 		if (abuf != NULL)
2250716fd348SMartin Matuska 			dmu_return_arcbuf(abuf);
2251716fd348SMartin Matuska 		dmu_buf_rele(db, FTAG);
2252716fd348SMartin Matuska 		ztest_range_unlock(rl);
2253716fd348SMartin Matuska 		ztest_object_unlock(zd, lr->lr_foid);
2254716fd348SMartin Matuska 		return (ENOSPC);
2255716fd348SMartin Matuska 	}
2256716fd348SMartin Matuska 
2257716fd348SMartin Matuska 	if (bt != NULL) {
2258716fd348SMartin Matuska 		/*
2259716fd348SMartin Matuska 		 * Usually, verify the old data before writing new data --
2260716fd348SMartin Matuska 		 * but not always, because we also want to verify correct
2261716fd348SMartin Matuska 		 * behavior when the data was not recently read into cache.
2262716fd348SMartin Matuska 		 */
2263dbd5678dSMartin Matuska 		ASSERT(doi.doi_data_block_size);
2264716fd348SMartin Matuska 		ASSERT0(offset % doi.doi_data_block_size);
2265716fd348SMartin Matuska 		if (ztest_random(4) != 0) {
2266716fd348SMartin Matuska 			int prefetch = ztest_random(2) ?
2267716fd348SMartin Matuska 			    DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
22687a7741afSMartin Matuska 
22697a7741afSMartin Matuska 			/*
22707a7741afSMartin Matuska 			 * We will randomly set when to do O_DIRECT on a read.
22717a7741afSMartin Matuska 			 */
22727a7741afSMartin Matuska 			if (ztest_random(4) == 0)
22737a7741afSMartin Matuska 				prefetch |= DMU_DIRECTIO;
22747a7741afSMartin Matuska 
2275716fd348SMartin Matuska 			ztest_block_tag_t rbt;
2276716fd348SMartin Matuska 
2277716fd348SMartin Matuska 			VERIFY(dmu_read(os, lr->lr_foid, offset,
2278716fd348SMartin Matuska 			    sizeof (rbt), &rbt, prefetch) == 0);
2279716fd348SMartin Matuska 			if (rbt.bt_magic == BT_MAGIC) {
2280716fd348SMartin Matuska 				ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
2281716fd348SMartin Matuska 				    offset, gen, txg, crtxg);
2282716fd348SMartin Matuska 			}
2283716fd348SMartin Matuska 		}
2284716fd348SMartin Matuska 
2285716fd348SMartin Matuska 		/*
2286716fd348SMartin Matuska 		 * Writes can appear to be newer than the bonus buffer because
2287716fd348SMartin Matuska 		 * the ztest_get_data() callback does a dmu_read() of the
2288716fd348SMartin Matuska 		 * open-context data, which may be different than the data
2289716fd348SMartin Matuska 		 * as it was when the write was generated.
2290716fd348SMartin Matuska 		 */
2291716fd348SMartin Matuska 		if (zd->zd_zilog->zl_replay) {
2292716fd348SMartin Matuska 			ztest_bt_verify(bt, os, lr->lr_foid, 0, offset,
2293716fd348SMartin Matuska 			    MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
2294716fd348SMartin Matuska 			    bt->bt_crtxg);
2295716fd348SMartin Matuska 		}
2296716fd348SMartin Matuska 
2297716fd348SMartin Matuska 		/*
2298716fd348SMartin Matuska 		 * Set the bt's gen/txg to the bonus buffer's gen/txg
2299716fd348SMartin Matuska 		 * so that all of the usual ASSERTs will work.
2300716fd348SMartin Matuska 		 */
2301716fd348SMartin Matuska 		ztest_bt_generate(bt, os, lr->lr_foid, 0, offset, gen, txg,
2302716fd348SMartin Matuska 		    crtxg);
2303716fd348SMartin Matuska 	}
2304716fd348SMartin Matuska 
2305716fd348SMartin Matuska 	if (abuf == NULL) {
2306716fd348SMartin Matuska 		dmu_write(os, lr->lr_foid, offset, length, data, tx);
2307716fd348SMartin Matuska 	} else {
2308716fd348SMartin Matuska 		memcpy(abuf->b_data, data, length);
2309be181ee2SMartin Matuska 		VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx));
2310716fd348SMartin Matuska 	}
2311716fd348SMartin Matuska 
2312716fd348SMartin Matuska 	(void) ztest_log_write(zd, tx, lr);
2313716fd348SMartin Matuska 
2314716fd348SMartin Matuska 	dmu_buf_rele(db, FTAG);
2315716fd348SMartin Matuska 
2316716fd348SMartin Matuska 	dmu_tx_commit(tx);
2317716fd348SMartin Matuska 
2318716fd348SMartin Matuska 	ztest_range_unlock(rl);
2319716fd348SMartin Matuska 	ztest_object_unlock(zd, lr->lr_foid);
2320716fd348SMartin Matuska 
2321716fd348SMartin Matuska 	return (0);
2322716fd348SMartin Matuska }
2323716fd348SMartin Matuska 
2324716fd348SMartin Matuska static int
2325716fd348SMartin Matuska ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
2326716fd348SMartin Matuska {
2327716fd348SMartin Matuska 	ztest_ds_t *zd = arg1;
2328716fd348SMartin Matuska 	lr_truncate_t *lr = arg2;
2329716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2330716fd348SMartin Matuska 	dmu_tx_t *tx;
2331716fd348SMartin Matuska 	uint64_t txg;
2332716fd348SMartin Matuska 	rl_t *rl;
2333716fd348SMartin Matuska 
2334716fd348SMartin Matuska 	if (byteswap)
2335716fd348SMartin Matuska 		byteswap_uint64_array(lr, sizeof (*lr));
2336716fd348SMartin Matuska 
2337e716630dSMartin Matuska 	ztest_object_lock(zd, lr->lr_foid, ZTRL_READER);
2338716fd348SMartin Matuska 	rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
2339e716630dSMartin Matuska 	    ZTRL_WRITER);
2340716fd348SMartin Matuska 
2341716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2342716fd348SMartin Matuska 
2343716fd348SMartin Matuska 	dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
2344716fd348SMartin Matuska 
2345716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2346716fd348SMartin Matuska 	if (txg == 0) {
2347716fd348SMartin Matuska 		ztest_range_unlock(rl);
2348716fd348SMartin Matuska 		ztest_object_unlock(zd, lr->lr_foid);
2349716fd348SMartin Matuska 		return (ENOSPC);
2350716fd348SMartin Matuska 	}
2351716fd348SMartin Matuska 
2352716fd348SMartin Matuska 	VERIFY0(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
2353716fd348SMartin Matuska 	    lr->lr_length, tx));
2354716fd348SMartin Matuska 
2355716fd348SMartin Matuska 	(void) ztest_log_truncate(zd, tx, lr);
2356716fd348SMartin Matuska 
2357716fd348SMartin Matuska 	dmu_tx_commit(tx);
2358716fd348SMartin Matuska 
2359716fd348SMartin Matuska 	ztest_range_unlock(rl);
2360716fd348SMartin Matuska 	ztest_object_unlock(zd, lr->lr_foid);
2361716fd348SMartin Matuska 
2362716fd348SMartin Matuska 	return (0);
2363716fd348SMartin Matuska }
2364716fd348SMartin Matuska 
2365716fd348SMartin Matuska static int
2366716fd348SMartin Matuska ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
2367716fd348SMartin Matuska {
2368716fd348SMartin Matuska 	ztest_ds_t *zd = arg1;
2369716fd348SMartin Matuska 	lr_setattr_t *lr = arg2;
2370716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2371716fd348SMartin Matuska 	dmu_tx_t *tx;
2372716fd348SMartin Matuska 	dmu_buf_t *db;
2373716fd348SMartin Matuska 	ztest_block_tag_t *bbt;
2374716fd348SMartin Matuska 	uint64_t txg, lrtxg, crtxg, dnodesize;
2375716fd348SMartin Matuska 
2376716fd348SMartin Matuska 	if (byteswap)
2377716fd348SMartin Matuska 		byteswap_uint64_array(lr, sizeof (*lr));
2378716fd348SMartin Matuska 
2379e716630dSMartin Matuska 	ztest_object_lock(zd, lr->lr_foid, ZTRL_WRITER);
2380716fd348SMartin Matuska 
2381716fd348SMartin Matuska 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
2382716fd348SMartin Matuska 
2383716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2384716fd348SMartin Matuska 	dmu_tx_hold_bonus(tx, lr->lr_foid);
2385716fd348SMartin Matuska 
2386716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2387716fd348SMartin Matuska 	if (txg == 0) {
2388716fd348SMartin Matuska 		dmu_buf_rele(db, FTAG);
2389716fd348SMartin Matuska 		ztest_object_unlock(zd, lr->lr_foid);
2390716fd348SMartin Matuska 		return (ENOSPC);
2391716fd348SMartin Matuska 	}
2392716fd348SMartin Matuska 
2393716fd348SMartin Matuska 	bbt = ztest_bt_bonus(db);
2394716fd348SMartin Matuska 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
2395716fd348SMartin Matuska 	crtxg = bbt->bt_crtxg;
2396716fd348SMartin Matuska 	lrtxg = lr->lr_common.lrc_txg;
2397716fd348SMartin Matuska 	dnodesize = bbt->bt_dnodesize;
2398716fd348SMartin Matuska 
2399716fd348SMartin Matuska 	if (zd->zd_zilog->zl_replay) {
2400716fd348SMartin Matuska 		ASSERT3U(lr->lr_size, !=, 0);
2401716fd348SMartin Matuska 		ASSERT3U(lr->lr_mode, !=, 0);
2402716fd348SMartin Matuska 		ASSERT3U(lrtxg, !=, 0);
2403716fd348SMartin Matuska 	} else {
2404716fd348SMartin Matuska 		/*
2405716fd348SMartin Matuska 		 * Randomly change the size and increment the generation.
2406716fd348SMartin Matuska 		 */
2407716fd348SMartin Matuska 		lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
2408716fd348SMartin Matuska 		    sizeof (*bbt);
2409716fd348SMartin Matuska 		lr->lr_mode = bbt->bt_gen + 1;
2410716fd348SMartin Matuska 		ASSERT0(lrtxg);
2411716fd348SMartin Matuska 	}
2412716fd348SMartin Matuska 
2413716fd348SMartin Matuska 	/*
2414716fd348SMartin Matuska 	 * Verify that the current bonus buffer is not newer than our txg.
2415716fd348SMartin Matuska 	 */
2416716fd348SMartin Matuska 	ztest_bt_verify(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
2417716fd348SMartin Matuska 	    MAX(txg, lrtxg), crtxg);
2418716fd348SMartin Matuska 
2419716fd348SMartin Matuska 	dmu_buf_will_dirty(db, tx);
2420716fd348SMartin Matuska 
2421716fd348SMartin Matuska 	ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
2422716fd348SMartin Matuska 	ASSERT3U(lr->lr_size, <=, db->db_size);
2423716fd348SMartin Matuska 	VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
2424716fd348SMartin Matuska 	bbt = ztest_bt_bonus(db);
2425716fd348SMartin Matuska 
2426716fd348SMartin Matuska 	ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
2427716fd348SMartin Matuska 	    txg, crtxg);
2428716fd348SMartin Matuska 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
2429716fd348SMartin Matuska 	dmu_buf_rele(db, FTAG);
2430716fd348SMartin Matuska 
2431716fd348SMartin Matuska 	(void) ztest_log_setattr(zd, tx, lr);
2432716fd348SMartin Matuska 
2433716fd348SMartin Matuska 	dmu_tx_commit(tx);
2434716fd348SMartin Matuska 
2435716fd348SMartin Matuska 	ztest_object_unlock(zd, lr->lr_foid);
2436716fd348SMartin Matuska 
2437716fd348SMartin Matuska 	return (0);
2438716fd348SMartin Matuska }
2439716fd348SMartin Matuska 
2440dbd5678dSMartin Matuska static zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
2441716fd348SMartin Matuska 	NULL,			/* 0 no such transaction type */
2442716fd348SMartin Matuska 	ztest_replay_create,	/* TX_CREATE */
2443716fd348SMartin Matuska 	NULL,			/* TX_MKDIR */
2444716fd348SMartin Matuska 	NULL,			/* TX_MKXATTR */
2445716fd348SMartin Matuska 	NULL,			/* TX_SYMLINK */
2446716fd348SMartin Matuska 	ztest_replay_remove,	/* TX_REMOVE */
2447716fd348SMartin Matuska 	NULL,			/* TX_RMDIR */
2448716fd348SMartin Matuska 	NULL,			/* TX_LINK */
2449716fd348SMartin Matuska 	NULL,			/* TX_RENAME */
2450716fd348SMartin Matuska 	ztest_replay_write,	/* TX_WRITE */
2451716fd348SMartin Matuska 	ztest_replay_truncate,	/* TX_TRUNCATE */
2452716fd348SMartin Matuska 	ztest_replay_setattr,	/* TX_SETATTR */
2453716fd348SMartin Matuska 	NULL,			/* TX_ACL */
2454716fd348SMartin Matuska 	NULL,			/* TX_CREATE_ACL */
2455716fd348SMartin Matuska 	NULL,			/* TX_CREATE_ATTR */
2456716fd348SMartin Matuska 	NULL,			/* TX_CREATE_ACL_ATTR */
2457716fd348SMartin Matuska 	NULL,			/* TX_MKDIR_ACL */
2458716fd348SMartin Matuska 	NULL,			/* TX_MKDIR_ATTR */
2459716fd348SMartin Matuska 	NULL,			/* TX_MKDIR_ACL_ATTR */
2460716fd348SMartin Matuska 	NULL,			/* TX_WRITE2 */
2461716fd348SMartin Matuska 	NULL,			/* TX_SETSAXATTR */
2462dbd5678dSMartin Matuska 	NULL,			/* TX_RENAME_EXCHANGE */
2463dbd5678dSMartin Matuska 	NULL,			/* TX_RENAME_WHITEOUT */
2464716fd348SMartin Matuska };
2465716fd348SMartin Matuska 
2466716fd348SMartin Matuska /*
2467716fd348SMartin Matuska  * ZIL get_data callbacks
2468716fd348SMartin Matuska  */
2469716fd348SMartin Matuska 
2470716fd348SMartin Matuska static void
2471716fd348SMartin Matuska ztest_get_done(zgd_t *zgd, int error)
2472716fd348SMartin Matuska {
2473716fd348SMartin Matuska 	(void) error;
2474716fd348SMartin Matuska 	ztest_ds_t *zd = zgd->zgd_private;
2475716fd348SMartin Matuska 	uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object;
2476716fd348SMartin Matuska 
2477716fd348SMartin Matuska 	if (zgd->zgd_db)
2478716fd348SMartin Matuska 		dmu_buf_rele(zgd->zgd_db, zgd);
2479716fd348SMartin Matuska 
2480716fd348SMartin Matuska 	ztest_range_unlock((rl_t *)zgd->zgd_lr);
2481716fd348SMartin Matuska 	ztest_object_unlock(zd, object);
2482716fd348SMartin Matuska 
2483716fd348SMartin Matuska 	umem_free(zgd, sizeof (*zgd));
2484716fd348SMartin Matuska }
2485716fd348SMartin Matuska 
2486716fd348SMartin Matuska static int
2487716fd348SMartin Matuska ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
2488716fd348SMartin Matuska     struct lwb *lwb, zio_t *zio)
2489716fd348SMartin Matuska {
2490716fd348SMartin Matuska 	(void) arg2;
2491716fd348SMartin Matuska 	ztest_ds_t *zd = arg;
2492716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2493716fd348SMartin Matuska 	uint64_t object = lr->lr_foid;
2494716fd348SMartin Matuska 	uint64_t offset = lr->lr_offset;
2495716fd348SMartin Matuska 	uint64_t size = lr->lr_length;
2496716fd348SMartin Matuska 	uint64_t txg = lr->lr_common.lrc_txg;
2497716fd348SMartin Matuska 	uint64_t crtxg;
2498716fd348SMartin Matuska 	dmu_object_info_t doi;
2499716fd348SMartin Matuska 	dmu_buf_t *db;
2500716fd348SMartin Matuska 	zgd_t *zgd;
2501716fd348SMartin Matuska 	int error;
2502716fd348SMartin Matuska 
2503716fd348SMartin Matuska 	ASSERT3P(lwb, !=, NULL);
2504716fd348SMartin Matuska 	ASSERT3U(size, !=, 0);
2505716fd348SMartin Matuska 
2506e716630dSMartin Matuska 	ztest_object_lock(zd, object, ZTRL_READER);
2507716fd348SMartin Matuska 	error = dmu_bonus_hold(os, object, FTAG, &db);
2508716fd348SMartin Matuska 	if (error) {
2509716fd348SMartin Matuska 		ztest_object_unlock(zd, object);
2510716fd348SMartin Matuska 		return (error);
2511716fd348SMartin Matuska 	}
2512716fd348SMartin Matuska 
2513716fd348SMartin Matuska 	crtxg = ztest_bt_bonus(db)->bt_crtxg;
2514716fd348SMartin Matuska 
2515716fd348SMartin Matuska 	if (crtxg == 0 || crtxg > txg) {
2516716fd348SMartin Matuska 		dmu_buf_rele(db, FTAG);
2517716fd348SMartin Matuska 		ztest_object_unlock(zd, object);
2518716fd348SMartin Matuska 		return (ENOENT);
2519716fd348SMartin Matuska 	}
2520716fd348SMartin Matuska 
2521716fd348SMartin Matuska 	dmu_object_info_from_db(db, &doi);
2522716fd348SMartin Matuska 	dmu_buf_rele(db, FTAG);
2523716fd348SMartin Matuska 	db = NULL;
2524716fd348SMartin Matuska 
2525716fd348SMartin Matuska 	zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
2526716fd348SMartin Matuska 	zgd->zgd_lwb = lwb;
2527716fd348SMartin Matuska 	zgd->zgd_private = zd;
2528716fd348SMartin Matuska 
2529716fd348SMartin Matuska 	if (buf != NULL) {	/* immediate write */
2530716fd348SMartin Matuska 		zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
2531e716630dSMartin Matuska 		    object, offset, size, ZTRL_READER);
2532716fd348SMartin Matuska 
2533716fd348SMartin Matuska 		error = dmu_read(os, object, offset, size, buf,
2534716fd348SMartin Matuska 		    DMU_READ_NO_PREFETCH);
2535716fd348SMartin Matuska 		ASSERT0(error);
2536716fd348SMartin Matuska 	} else {
2537315ee00fSMartin Matuska 		ASSERT3P(zio, !=, NULL);
2538716fd348SMartin Matuska 		size = doi.doi_data_block_size;
2539716fd348SMartin Matuska 		if (ISP2(size)) {
2540aca928a5SMartin Matuska 			offset = P2ALIGN_TYPED(offset, size, uint64_t);
2541716fd348SMartin Matuska 		} else {
2542716fd348SMartin Matuska 			ASSERT3U(offset, <, size);
2543716fd348SMartin Matuska 			offset = 0;
2544716fd348SMartin Matuska 		}
2545716fd348SMartin Matuska 
2546716fd348SMartin Matuska 		zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
2547e716630dSMartin Matuska 		    object, offset, size, ZTRL_READER);
2548716fd348SMartin Matuska 
2549abcdc1b9SMartin Matuska 		error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
2550716fd348SMartin Matuska 
2551716fd348SMartin Matuska 		if (error == 0) {
2552716fd348SMartin Matuska 			blkptr_t *bp = &lr->lr_blkptr;
2553716fd348SMartin Matuska 
2554716fd348SMartin Matuska 			zgd->zgd_db = db;
2555716fd348SMartin Matuska 			zgd->zgd_bp = bp;
2556716fd348SMartin Matuska 
2557716fd348SMartin Matuska 			ASSERT3U(db->db_offset, ==, offset);
2558716fd348SMartin Matuska 			ASSERT3U(db->db_size, ==, size);
2559716fd348SMartin Matuska 
2560716fd348SMartin Matuska 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
2561716fd348SMartin Matuska 			    ztest_get_done, zgd);
2562716fd348SMartin Matuska 
2563716fd348SMartin Matuska 			if (error == 0)
2564716fd348SMartin Matuska 				return (0);
2565716fd348SMartin Matuska 		}
2566716fd348SMartin Matuska 	}
2567716fd348SMartin Matuska 
2568716fd348SMartin Matuska 	ztest_get_done(zgd, error);
2569716fd348SMartin Matuska 
2570716fd348SMartin Matuska 	return (error);
2571716fd348SMartin Matuska }
2572716fd348SMartin Matuska 
2573716fd348SMartin Matuska static void *
2574716fd348SMartin Matuska ztest_lr_alloc(size_t lrsize, char *name)
2575716fd348SMartin Matuska {
2576716fd348SMartin Matuska 	char *lr;
2577716fd348SMartin Matuska 	size_t namesize = name ? strlen(name) + 1 : 0;
2578716fd348SMartin Matuska 
2579716fd348SMartin Matuska 	lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
2580716fd348SMartin Matuska 
2581716fd348SMartin Matuska 	if (name)
2582716fd348SMartin Matuska 		memcpy(lr + lrsize, name, namesize);
2583716fd348SMartin Matuska 
2584716fd348SMartin Matuska 	return (lr);
2585716fd348SMartin Matuska }
2586716fd348SMartin Matuska 
2587716fd348SMartin Matuska static void
2588716fd348SMartin Matuska ztest_lr_free(void *lr, size_t lrsize, char *name)
2589716fd348SMartin Matuska {
2590716fd348SMartin Matuska 	size_t namesize = name ? strlen(name) + 1 : 0;
2591716fd348SMartin Matuska 
2592716fd348SMartin Matuska 	umem_free(lr, lrsize + namesize);
2593716fd348SMartin Matuska }
2594716fd348SMartin Matuska 
2595716fd348SMartin Matuska /*
2596716fd348SMartin Matuska  * Lookup a bunch of objects.  Returns the number of objects not found.
2597716fd348SMartin Matuska  */
2598716fd348SMartin Matuska static int
2599716fd348SMartin Matuska ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
2600716fd348SMartin Matuska {
2601716fd348SMartin Matuska 	int missing = 0;
2602716fd348SMartin Matuska 	int error;
2603716fd348SMartin Matuska 	int i;
2604716fd348SMartin Matuska 
2605716fd348SMartin Matuska 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
2606716fd348SMartin Matuska 
2607716fd348SMartin Matuska 	for (i = 0; i < count; i++, od++) {
2608716fd348SMartin Matuska 		od->od_object = 0;
2609716fd348SMartin Matuska 		error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
2610716fd348SMartin Matuska 		    sizeof (uint64_t), 1, &od->od_object);
2611716fd348SMartin Matuska 		if (error) {
2612716fd348SMartin Matuska 			ASSERT3S(error, ==, ENOENT);
2613716fd348SMartin Matuska 			ASSERT0(od->od_object);
2614716fd348SMartin Matuska 			missing++;
2615716fd348SMartin Matuska 		} else {
2616716fd348SMartin Matuska 			dmu_buf_t *db;
2617716fd348SMartin Matuska 			ztest_block_tag_t *bbt;
2618716fd348SMartin Matuska 			dmu_object_info_t doi;
2619716fd348SMartin Matuska 
2620716fd348SMartin Matuska 			ASSERT3U(od->od_object, !=, 0);
2621716fd348SMartin Matuska 			ASSERT0(missing);	/* there should be no gaps */
2622716fd348SMartin Matuska 
2623e716630dSMartin Matuska 			ztest_object_lock(zd, od->od_object, ZTRL_READER);
2624716fd348SMartin Matuska 			VERIFY0(dmu_bonus_hold(zd->zd_os, od->od_object,
2625716fd348SMartin Matuska 			    FTAG, &db));
2626716fd348SMartin Matuska 			dmu_object_info_from_db(db, &doi);
2627716fd348SMartin Matuska 			bbt = ztest_bt_bonus(db);
2628716fd348SMartin Matuska 			ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
2629716fd348SMartin Matuska 			od->od_type = doi.doi_type;
2630716fd348SMartin Matuska 			od->od_blocksize = doi.doi_data_block_size;
2631716fd348SMartin Matuska 			od->od_gen = bbt->bt_gen;
2632716fd348SMartin Matuska 			dmu_buf_rele(db, FTAG);
2633716fd348SMartin Matuska 			ztest_object_unlock(zd, od->od_object);
2634716fd348SMartin Matuska 		}
2635716fd348SMartin Matuska 	}
2636716fd348SMartin Matuska 
2637716fd348SMartin Matuska 	return (missing);
2638716fd348SMartin Matuska }
2639716fd348SMartin Matuska 
2640716fd348SMartin Matuska static int
2641716fd348SMartin Matuska ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
2642716fd348SMartin Matuska {
2643716fd348SMartin Matuska 	int missing = 0;
2644716fd348SMartin Matuska 	int i;
2645716fd348SMartin Matuska 
2646716fd348SMartin Matuska 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
2647716fd348SMartin Matuska 
2648716fd348SMartin Matuska 	for (i = 0; i < count; i++, od++) {
2649716fd348SMartin Matuska 		if (missing) {
2650716fd348SMartin Matuska 			od->od_object = 0;
2651716fd348SMartin Matuska 			missing++;
2652716fd348SMartin Matuska 			continue;
2653716fd348SMartin Matuska 		}
2654716fd348SMartin Matuska 
26557a7741afSMartin Matuska 		lr_create_t *lrc = ztest_lr_alloc(sizeof (*lrc), od->od_name);
26567a7741afSMartin Matuska 		_lr_create_t *lr = &lrc->lr_create;
2657716fd348SMartin Matuska 
2658716fd348SMartin Matuska 		lr->lr_doid = od->od_dir;
2659716fd348SMartin Matuska 		lr->lr_foid = 0;	/* 0 to allocate, > 0 to claim */
2660716fd348SMartin Matuska 		lr->lrz_type = od->od_crtype;
2661716fd348SMartin Matuska 		lr->lrz_blocksize = od->od_crblocksize;
2662716fd348SMartin Matuska 		lr->lrz_ibshift = ztest_random_ibshift();
2663716fd348SMartin Matuska 		lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
2664716fd348SMartin Matuska 		lr->lrz_dnodesize = od->od_crdnodesize;
2665716fd348SMartin Matuska 		lr->lr_gen = od->od_crgen;
2666716fd348SMartin Matuska 		lr->lr_crtime[0] = time(NULL);
2667716fd348SMartin Matuska 
2668716fd348SMartin Matuska 		if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
2669716fd348SMartin Matuska 			ASSERT0(missing);
2670716fd348SMartin Matuska 			od->od_object = 0;
2671716fd348SMartin Matuska 			missing++;
2672716fd348SMartin Matuska 		} else {
2673716fd348SMartin Matuska 			od->od_object = lr->lr_foid;
2674716fd348SMartin Matuska 			od->od_type = od->od_crtype;
2675716fd348SMartin Matuska 			od->od_blocksize = od->od_crblocksize;
2676716fd348SMartin Matuska 			od->od_gen = od->od_crgen;
2677716fd348SMartin Matuska 			ASSERT3U(od->od_object, !=, 0);
2678716fd348SMartin Matuska 		}
2679716fd348SMartin Matuska 
2680716fd348SMartin Matuska 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
2681716fd348SMartin Matuska 	}
2682716fd348SMartin Matuska 
2683716fd348SMartin Matuska 	return (missing);
2684716fd348SMartin Matuska }
2685716fd348SMartin Matuska 
2686716fd348SMartin Matuska static int
2687716fd348SMartin Matuska ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
2688716fd348SMartin Matuska {
2689716fd348SMartin Matuska 	int missing = 0;
2690716fd348SMartin Matuska 	int error;
2691716fd348SMartin Matuska 	int i;
2692716fd348SMartin Matuska 
2693716fd348SMartin Matuska 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
2694716fd348SMartin Matuska 
2695716fd348SMartin Matuska 	od += count - 1;
2696716fd348SMartin Matuska 
2697716fd348SMartin Matuska 	for (i = count - 1; i >= 0; i--, od--) {
2698716fd348SMartin Matuska 		if (missing) {
2699716fd348SMartin Matuska 			missing++;
2700716fd348SMartin Matuska 			continue;
2701716fd348SMartin Matuska 		}
2702716fd348SMartin Matuska 
2703716fd348SMartin Matuska 		/*
2704716fd348SMartin Matuska 		 * No object was found.
2705716fd348SMartin Matuska 		 */
2706716fd348SMartin Matuska 		if (od->od_object == 0)
2707716fd348SMartin Matuska 			continue;
2708716fd348SMartin Matuska 
2709716fd348SMartin Matuska 		lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
2710716fd348SMartin Matuska 
2711716fd348SMartin Matuska 		lr->lr_doid = od->od_dir;
2712716fd348SMartin Matuska 
2713716fd348SMartin Matuska 		if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
2714716fd348SMartin Matuska 			ASSERT3U(error, ==, ENOSPC);
2715716fd348SMartin Matuska 			missing++;
2716716fd348SMartin Matuska 		} else {
2717716fd348SMartin Matuska 			od->od_object = 0;
2718716fd348SMartin Matuska 		}
2719716fd348SMartin Matuska 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
2720716fd348SMartin Matuska 	}
2721716fd348SMartin Matuska 
2722716fd348SMartin Matuska 	return (missing);
2723716fd348SMartin Matuska }
2724716fd348SMartin Matuska 
2725716fd348SMartin Matuska static int
2726716fd348SMartin Matuska ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
2727e716630dSMartin Matuska     const void *data)
2728716fd348SMartin Matuska {
2729716fd348SMartin Matuska 	lr_write_t *lr;
2730716fd348SMartin Matuska 	int error;
2731716fd348SMartin Matuska 
2732716fd348SMartin Matuska 	lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
2733716fd348SMartin Matuska 
2734716fd348SMartin Matuska 	lr->lr_foid = object;
2735716fd348SMartin Matuska 	lr->lr_offset = offset;
2736716fd348SMartin Matuska 	lr->lr_length = size;
2737716fd348SMartin Matuska 	lr->lr_blkoff = 0;
2738716fd348SMartin Matuska 	BP_ZERO(&lr->lr_blkptr);
2739716fd348SMartin Matuska 
27407a7741afSMartin Matuska 	memcpy(&lr->lr_data[0], data, size);
2741716fd348SMartin Matuska 
2742716fd348SMartin Matuska 	error = ztest_replay_write(zd, lr, B_FALSE);
2743716fd348SMartin Matuska 
2744716fd348SMartin Matuska 	ztest_lr_free(lr, sizeof (*lr) + size, NULL);
2745716fd348SMartin Matuska 
2746716fd348SMartin Matuska 	return (error);
2747716fd348SMartin Matuska }
2748716fd348SMartin Matuska 
2749716fd348SMartin Matuska static int
2750716fd348SMartin Matuska ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
2751716fd348SMartin Matuska {
2752716fd348SMartin Matuska 	lr_truncate_t *lr;
2753716fd348SMartin Matuska 	int error;
2754716fd348SMartin Matuska 
2755716fd348SMartin Matuska 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
2756716fd348SMartin Matuska 
2757716fd348SMartin Matuska 	lr->lr_foid = object;
2758716fd348SMartin Matuska 	lr->lr_offset = offset;
2759716fd348SMartin Matuska 	lr->lr_length = size;
2760716fd348SMartin Matuska 
2761716fd348SMartin Matuska 	error = ztest_replay_truncate(zd, lr, B_FALSE);
2762716fd348SMartin Matuska 
2763716fd348SMartin Matuska 	ztest_lr_free(lr, sizeof (*lr), NULL);
2764716fd348SMartin Matuska 
2765716fd348SMartin Matuska 	return (error);
2766716fd348SMartin Matuska }
2767716fd348SMartin Matuska 
2768716fd348SMartin Matuska static int
2769716fd348SMartin Matuska ztest_setattr(ztest_ds_t *zd, uint64_t object)
2770716fd348SMartin Matuska {
2771716fd348SMartin Matuska 	lr_setattr_t *lr;
2772716fd348SMartin Matuska 	int error;
2773716fd348SMartin Matuska 
2774716fd348SMartin Matuska 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
2775716fd348SMartin Matuska 
2776716fd348SMartin Matuska 	lr->lr_foid = object;
2777716fd348SMartin Matuska 	lr->lr_size = 0;
2778716fd348SMartin Matuska 	lr->lr_mode = 0;
2779716fd348SMartin Matuska 
2780716fd348SMartin Matuska 	error = ztest_replay_setattr(zd, lr, B_FALSE);
2781716fd348SMartin Matuska 
2782716fd348SMartin Matuska 	ztest_lr_free(lr, sizeof (*lr), NULL);
2783716fd348SMartin Matuska 
2784716fd348SMartin Matuska 	return (error);
2785716fd348SMartin Matuska }
2786716fd348SMartin Matuska 
2787716fd348SMartin Matuska static void
2788716fd348SMartin Matuska ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
2789716fd348SMartin Matuska {
2790716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2791716fd348SMartin Matuska 	dmu_tx_t *tx;
2792716fd348SMartin Matuska 	uint64_t txg;
2793716fd348SMartin Matuska 	rl_t *rl;
2794716fd348SMartin Matuska 
2795716fd348SMartin Matuska 	txg_wait_synced(dmu_objset_pool(os), 0);
2796716fd348SMartin Matuska 
2797e716630dSMartin Matuska 	ztest_object_lock(zd, object, ZTRL_READER);
2798e716630dSMartin Matuska 	rl = ztest_range_lock(zd, object, offset, size, ZTRL_WRITER);
2799716fd348SMartin Matuska 
2800716fd348SMartin Matuska 	tx = dmu_tx_create(os);
2801716fd348SMartin Matuska 
2802716fd348SMartin Matuska 	dmu_tx_hold_write(tx, object, offset, size);
2803716fd348SMartin Matuska 
2804716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
2805716fd348SMartin Matuska 
2806716fd348SMartin Matuska 	if (txg != 0) {
2807716fd348SMartin Matuska 		dmu_prealloc(os, object, offset, size, tx);
2808716fd348SMartin Matuska 		dmu_tx_commit(tx);
2809716fd348SMartin Matuska 		txg_wait_synced(dmu_objset_pool(os), txg);
2810716fd348SMartin Matuska 	} else {
2811716fd348SMartin Matuska 		(void) dmu_free_long_range(os, object, offset, size);
2812716fd348SMartin Matuska 	}
2813716fd348SMartin Matuska 
2814716fd348SMartin Matuska 	ztest_range_unlock(rl);
2815716fd348SMartin Matuska 	ztest_object_unlock(zd, object);
2816716fd348SMartin Matuska }
2817716fd348SMartin Matuska 
2818716fd348SMartin Matuska static void
2819716fd348SMartin Matuska ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
2820716fd348SMartin Matuska {
2821716fd348SMartin Matuska 	int err;
2822716fd348SMartin Matuska 	ztest_block_tag_t wbt;
2823716fd348SMartin Matuska 	dmu_object_info_t doi;
2824716fd348SMartin Matuska 	enum ztest_io_type io_type;
2825716fd348SMartin Matuska 	uint64_t blocksize;
2826716fd348SMartin Matuska 	void *data;
28277a7741afSMartin Matuska 	uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH;
28287a7741afSMartin Matuska 
28297a7741afSMartin Matuska 	/*
28307a7741afSMartin Matuska 	 * We will randomly set when to do O_DIRECT on a read.
28317a7741afSMartin Matuska 	 */
28327a7741afSMartin Matuska 	if (ztest_random(4) == 0)
28337a7741afSMartin Matuska 		dmu_read_flags |= DMU_DIRECTIO;
2834716fd348SMartin Matuska 
2835716fd348SMartin Matuska 	VERIFY0(dmu_object_info(zd->zd_os, object, &doi));
2836716fd348SMartin Matuska 	blocksize = doi.doi_data_block_size;
2837716fd348SMartin Matuska 	data = umem_alloc(blocksize, UMEM_NOFAIL);
2838716fd348SMartin Matuska 
2839716fd348SMartin Matuska 	/*
2840716fd348SMartin Matuska 	 * Pick an i/o type at random, biased toward writing block tags.
2841716fd348SMartin Matuska 	 */
2842716fd348SMartin Matuska 	io_type = ztest_random(ZTEST_IO_TYPES);
2843716fd348SMartin Matuska 	if (ztest_random(2) == 0)
2844716fd348SMartin Matuska 		io_type = ZTEST_IO_WRITE_TAG;
2845716fd348SMartin Matuska 
2846716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
2847716fd348SMartin Matuska 
2848716fd348SMartin Matuska 	switch (io_type) {
2849716fd348SMartin Matuska 
2850716fd348SMartin Matuska 	case ZTEST_IO_WRITE_TAG:
2851716fd348SMartin Matuska 		ztest_bt_generate(&wbt, zd->zd_os, object, doi.doi_dnodesize,
2852716fd348SMartin Matuska 		    offset, 0, 0, 0);
2853716fd348SMartin Matuska 		(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
2854716fd348SMartin Matuska 		break;
2855716fd348SMartin Matuska 
2856716fd348SMartin Matuska 	case ZTEST_IO_WRITE_PATTERN:
2857716fd348SMartin Matuska 		(void) memset(data, 'a' + (object + offset) % 5, blocksize);
2858716fd348SMartin Matuska 		if (ztest_random(2) == 0) {
2859716fd348SMartin Matuska 			/*
2860716fd348SMartin Matuska 			 * Induce fletcher2 collisions to ensure that
2861716fd348SMartin Matuska 			 * zio_ddt_collision() detects and resolves them
2862716fd348SMartin Matuska 			 * when using fletcher2-verify for deduplication.
2863716fd348SMartin Matuska 			 */
2864716fd348SMartin Matuska 			((uint64_t *)data)[0] ^= 1ULL << 63;
2865716fd348SMartin Matuska 			((uint64_t *)data)[4] ^= 1ULL << 63;
2866716fd348SMartin Matuska 		}
2867716fd348SMartin Matuska 		(void) ztest_write(zd, object, offset, blocksize, data);
2868716fd348SMartin Matuska 		break;
2869716fd348SMartin Matuska 
2870716fd348SMartin Matuska 	case ZTEST_IO_WRITE_ZEROES:
2871716fd348SMartin Matuska 		memset(data, 0, blocksize);
2872716fd348SMartin Matuska 		(void) ztest_write(zd, object, offset, blocksize, data);
2873716fd348SMartin Matuska 		break;
2874716fd348SMartin Matuska 
2875716fd348SMartin Matuska 	case ZTEST_IO_TRUNCATE:
2876716fd348SMartin Matuska 		(void) ztest_truncate(zd, object, offset, blocksize);
2877716fd348SMartin Matuska 		break;
2878716fd348SMartin Matuska 
2879716fd348SMartin Matuska 	case ZTEST_IO_SETATTR:
2880716fd348SMartin Matuska 		(void) ztest_setattr(zd, object);
2881716fd348SMartin Matuska 		break;
2882716fd348SMartin Matuska 	default:
2883716fd348SMartin Matuska 		break;
2884716fd348SMartin Matuska 
2885716fd348SMartin Matuska 	case ZTEST_IO_REWRITE:
2886716fd348SMartin Matuska 		(void) pthread_rwlock_rdlock(&ztest_name_lock);
2887716fd348SMartin Matuska 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
2888716fd348SMartin Matuska 		    ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
2889716fd348SMartin Matuska 		    B_FALSE);
289015f0b8c3SMartin Matuska 		ASSERT(err == 0 || err == ENOSPC);
2891716fd348SMartin Matuska 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
2892716fd348SMartin Matuska 		    ZFS_PROP_COMPRESSION,
2893716fd348SMartin Matuska 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
2894716fd348SMartin Matuska 		    B_FALSE);
289515f0b8c3SMartin Matuska 		ASSERT(err == 0 || err == ENOSPC);
2896716fd348SMartin Matuska 		(void) pthread_rwlock_unlock(&ztest_name_lock);
2897716fd348SMartin Matuska 
2898716fd348SMartin Matuska 		VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
28997a7741afSMartin Matuska 		    dmu_read_flags));
2900716fd348SMartin Matuska 
2901716fd348SMartin Matuska 		(void) ztest_write(zd, object, offset, blocksize, data);
2902716fd348SMartin Matuska 		break;
2903716fd348SMartin Matuska 	}
2904716fd348SMartin Matuska 
2905716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
2906716fd348SMartin Matuska 
2907716fd348SMartin Matuska 	umem_free(data, blocksize);
2908716fd348SMartin Matuska }
2909716fd348SMartin Matuska 
2910716fd348SMartin Matuska /*
2911716fd348SMartin Matuska  * Initialize an object description template.
2912716fd348SMartin Matuska  */
2913716fd348SMartin Matuska static void
2914a0b956f5SMartin Matuska ztest_od_init(ztest_od_t *od, uint64_t id, const char *tag, uint64_t index,
2915716fd348SMartin Matuska     dmu_object_type_t type, uint64_t blocksize, uint64_t dnodesize,
2916716fd348SMartin Matuska     uint64_t gen)
2917716fd348SMartin Matuska {
2918716fd348SMartin Matuska 	od->od_dir = ZTEST_DIROBJ;
2919716fd348SMartin Matuska 	od->od_object = 0;
2920716fd348SMartin Matuska 
2921716fd348SMartin Matuska 	od->od_crtype = type;
2922716fd348SMartin Matuska 	od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
2923716fd348SMartin Matuska 	od->od_crdnodesize = dnodesize ? dnodesize : ztest_random_dnodesize();
2924716fd348SMartin Matuska 	od->od_crgen = gen;
2925716fd348SMartin Matuska 
2926716fd348SMartin Matuska 	od->od_type = DMU_OT_NONE;
2927716fd348SMartin Matuska 	od->od_blocksize = 0;
2928716fd348SMartin Matuska 	od->od_gen = 0;
2929716fd348SMartin Matuska 
2930716fd348SMartin Matuska 	(void) snprintf(od->od_name, sizeof (od->od_name),
2931716fd348SMartin Matuska 	    "%s(%"PRId64")[%"PRIu64"]",
2932716fd348SMartin Matuska 	    tag, id, index);
2933716fd348SMartin Matuska }
2934716fd348SMartin Matuska 
2935716fd348SMartin Matuska /*
2936716fd348SMartin Matuska  * Lookup or create the objects for a test using the od template.
2937716fd348SMartin Matuska  * If the objects do not all exist, or if 'remove' is specified,
2938716fd348SMartin Matuska  * remove any existing objects and create new ones.  Otherwise,
2939716fd348SMartin Matuska  * use the existing objects.
2940716fd348SMartin Matuska  */
2941716fd348SMartin Matuska static int
2942716fd348SMartin Matuska ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
2943716fd348SMartin Matuska {
2944716fd348SMartin Matuska 	int count = size / sizeof (*od);
2945716fd348SMartin Matuska 	int rv = 0;
2946716fd348SMartin Matuska 
2947716fd348SMartin Matuska 	mutex_enter(&zd->zd_dirobj_lock);
2948716fd348SMartin Matuska 	if ((ztest_lookup(zd, od, count) != 0 || remove) &&
2949716fd348SMartin Matuska 	    (ztest_remove(zd, od, count) != 0 ||
2950716fd348SMartin Matuska 	    ztest_create(zd, od, count) != 0))
2951716fd348SMartin Matuska 		rv = -1;
2952716fd348SMartin Matuska 	zd->zd_od = od;
2953716fd348SMartin Matuska 	mutex_exit(&zd->zd_dirobj_lock);
2954716fd348SMartin Matuska 
2955716fd348SMartin Matuska 	return (rv);
2956716fd348SMartin Matuska }
2957716fd348SMartin Matuska 
2958716fd348SMartin Matuska void
2959716fd348SMartin Matuska ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
2960716fd348SMartin Matuska {
2961716fd348SMartin Matuska 	(void) id;
2962716fd348SMartin Matuska 	zilog_t *zilog = zd->zd_zilog;
2963716fd348SMartin Matuska 
2964716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
2965716fd348SMartin Matuska 
2966716fd348SMartin Matuska 	zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
2967716fd348SMartin Matuska 
2968716fd348SMartin Matuska 	/*
2969716fd348SMartin Matuska 	 * Remember the committed values in zd, which is in parent/child
2970716fd348SMartin Matuska 	 * shared memory.  If we die, the next iteration of ztest_run()
2971716fd348SMartin Matuska 	 * will verify that the log really does contain this record.
2972716fd348SMartin Matuska 	 */
2973716fd348SMartin Matuska 	mutex_enter(&zilog->zl_lock);
2974716fd348SMartin Matuska 	ASSERT3P(zd->zd_shared, !=, NULL);
2975716fd348SMartin Matuska 	ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
2976716fd348SMartin Matuska 	zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
2977716fd348SMartin Matuska 	mutex_exit(&zilog->zl_lock);
2978716fd348SMartin Matuska 
2979716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
2980716fd348SMartin Matuska }
2981716fd348SMartin Matuska 
2982716fd348SMartin Matuska /*
2983716fd348SMartin Matuska  * This function is designed to simulate the operations that occur during a
2984716fd348SMartin Matuska  * mount/unmount operation.  We hold the dataset across these operations in an
2985716fd348SMartin Matuska  * attempt to expose any implicit assumptions about ZIL management.
2986716fd348SMartin Matuska  */
2987716fd348SMartin Matuska void
2988716fd348SMartin Matuska ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
2989716fd348SMartin Matuska {
2990716fd348SMartin Matuska 	(void) id;
2991716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
2992716fd348SMartin Matuska 
2993716fd348SMartin Matuska 	/*
2994716fd348SMartin Matuska 	 * We hold the ztest_vdev_lock so we don't cause problems with
2995716fd348SMartin Matuska 	 * other threads that wish to remove a log device, such as
2996716fd348SMartin Matuska 	 * ztest_device_removal().
2997716fd348SMartin Matuska 	 */
2998716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
2999716fd348SMartin Matuska 
3000716fd348SMartin Matuska 	/*
3001716fd348SMartin Matuska 	 * We grab the zd_dirobj_lock to ensure that no other thread is
3002716fd348SMartin Matuska 	 * updating the zil (i.e. adding in-memory log records) and the
3003716fd348SMartin Matuska 	 * zd_zilog_lock to block any I/O.
3004716fd348SMartin Matuska 	 */
3005716fd348SMartin Matuska 	mutex_enter(&zd->zd_dirobj_lock);
3006716fd348SMartin Matuska 	(void) pthread_rwlock_wrlock(&zd->zd_zilog_lock);
3007716fd348SMartin Matuska 
3008716fd348SMartin Matuska 	/* zfsvfs_teardown() */
3009716fd348SMartin Matuska 	zil_close(zd->zd_zilog);
3010716fd348SMartin Matuska 
3011716fd348SMartin Matuska 	/* zfsvfs_setup() */
3012271171e0SMartin Matuska 	VERIFY3P(zil_open(os, ztest_get_data, NULL), ==, zd->zd_zilog);
3013716fd348SMartin Matuska 	zil_replay(os, zd, ztest_replay_vector);
3014716fd348SMartin Matuska 
3015716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
3016716fd348SMartin Matuska 	mutex_exit(&zd->zd_dirobj_lock);
3017716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3018716fd348SMartin Matuska }
3019716fd348SMartin Matuska 
3020716fd348SMartin Matuska /*
3021716fd348SMartin Matuska  * Verify that we can't destroy an active pool, create an existing pool,
3022716fd348SMartin Matuska  * or create a pool with a bad vdev spec.
3023716fd348SMartin Matuska  */
3024716fd348SMartin Matuska void
3025716fd348SMartin Matuska ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
3026716fd348SMartin Matuska {
3027716fd348SMartin Matuska 	(void) zd, (void) id;
3028716fd348SMartin Matuska 	ztest_shared_opts_t *zo = &ztest_opts;
3029716fd348SMartin Matuska 	spa_t *spa;
3030716fd348SMartin Matuska 	nvlist_t *nvroot;
3031716fd348SMartin Matuska 
3032716fd348SMartin Matuska 	if (zo->zo_mmp_test)
3033716fd348SMartin Matuska 		return;
3034716fd348SMartin Matuska 
3035716fd348SMartin Matuska 	/*
3036716fd348SMartin Matuska 	 * Attempt to create using a bad file.
3037716fd348SMartin Matuska 	 */
3038716fd348SMartin Matuska 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
3039716fd348SMartin Matuska 	VERIFY3U(ENOENT, ==,
3040716fd348SMartin Matuska 	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
3041716fd348SMartin Matuska 	fnvlist_free(nvroot);
3042716fd348SMartin Matuska 
3043716fd348SMartin Matuska 	/*
3044716fd348SMartin Matuska 	 * Attempt to create using a bad mirror.
3045716fd348SMartin Matuska 	 */
3046716fd348SMartin Matuska 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1);
3047716fd348SMartin Matuska 	VERIFY3U(ENOENT, ==,
3048716fd348SMartin Matuska 	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
3049716fd348SMartin Matuska 	fnvlist_free(nvroot);
3050716fd348SMartin Matuska 
3051716fd348SMartin Matuska 	/*
3052716fd348SMartin Matuska 	 * Attempt to create an existing pool.  It shouldn't matter
3053716fd348SMartin Matuska 	 * what's in the nvroot; we should fail with EEXIST.
3054716fd348SMartin Matuska 	 */
3055716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
3056716fd348SMartin Matuska 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
3057716fd348SMartin Matuska 	VERIFY3U(EEXIST, ==,
3058716fd348SMartin Matuska 	    spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
3059716fd348SMartin Matuska 	fnvlist_free(nvroot);
3060716fd348SMartin Matuska 
3061716fd348SMartin Matuska 	/*
3062716fd348SMartin Matuska 	 * We open a reference to the spa and then we try to export it
3063716fd348SMartin Matuska 	 * expecting one of the following errors:
3064716fd348SMartin Matuska 	 *
3065716fd348SMartin Matuska 	 * EBUSY
3066716fd348SMartin Matuska 	 *	Because of the reference we just opened.
3067716fd348SMartin Matuska 	 *
3068716fd348SMartin Matuska 	 * ZFS_ERR_EXPORT_IN_PROGRESS
3069716fd348SMartin Matuska 	 *	For the case that there is another ztest thread doing
3070716fd348SMartin Matuska 	 *	an export concurrently.
3071716fd348SMartin Matuska 	 */
3072716fd348SMartin Matuska 	VERIFY0(spa_open(zo->zo_pool, &spa, FTAG));
3073716fd348SMartin Matuska 	int error = spa_destroy(zo->zo_pool);
3074716fd348SMartin Matuska 	if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) {
3075716fd348SMartin Matuska 		fatal(B_FALSE, "spa_destroy(%s) returned unexpected value %d",
3076716fd348SMartin Matuska 		    spa->spa_name, error);
3077716fd348SMartin Matuska 	}
3078716fd348SMartin Matuska 	spa_close(spa, FTAG);
3079716fd348SMartin Matuska 
3080716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
3081716fd348SMartin Matuska }
3082716fd348SMartin Matuska 
3083716fd348SMartin Matuska /*
3084716fd348SMartin Matuska  * Start and then stop the MMP threads to ensure the startup and shutdown code
3085716fd348SMartin Matuska  * works properly.  Actual protection and property-related code tested via ZTS.
3086716fd348SMartin Matuska  */
3087716fd348SMartin Matuska void
3088716fd348SMartin Matuska ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id)
3089716fd348SMartin Matuska {
3090716fd348SMartin Matuska 	(void) zd, (void) id;
3091716fd348SMartin Matuska 	ztest_shared_opts_t *zo = &ztest_opts;
3092716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3093716fd348SMartin Matuska 
3094716fd348SMartin Matuska 	if (zo->zo_mmp_test)
3095716fd348SMartin Matuska 		return;
3096716fd348SMartin Matuska 
3097716fd348SMartin Matuska 	/*
3098716fd348SMartin Matuska 	 * Since enabling MMP involves setting a property, it could not be done
3099716fd348SMartin Matuska 	 * while the pool is suspended.
3100716fd348SMartin Matuska 	 */
3101716fd348SMartin Matuska 	if (spa_suspended(spa))
3102716fd348SMartin Matuska 		return;
3103716fd348SMartin Matuska 
3104716fd348SMartin Matuska 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3105716fd348SMartin Matuska 	mutex_enter(&spa->spa_props_lock);
3106716fd348SMartin Matuska 
3107716fd348SMartin Matuska 	zfs_multihost_fail_intervals = 0;
3108716fd348SMartin Matuska 
3109716fd348SMartin Matuska 	if (!spa_multihost(spa)) {
3110716fd348SMartin Matuska 		spa->spa_multihost = B_TRUE;
3111716fd348SMartin Matuska 		mmp_thread_start(spa);
3112716fd348SMartin Matuska 	}
3113716fd348SMartin Matuska 
3114716fd348SMartin Matuska 	mutex_exit(&spa->spa_props_lock);
3115716fd348SMartin Matuska 	spa_config_exit(spa, SCL_CONFIG, FTAG);
3116716fd348SMartin Matuska 
3117716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
3118716fd348SMartin Matuska 	mmp_signal_all_threads();
3119716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
3120716fd348SMartin Matuska 
3121716fd348SMartin Matuska 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3122716fd348SMartin Matuska 	mutex_enter(&spa->spa_props_lock);
3123716fd348SMartin Matuska 
3124716fd348SMartin Matuska 	if (spa_multihost(spa)) {
3125716fd348SMartin Matuska 		mmp_thread_stop(spa);
3126716fd348SMartin Matuska 		spa->spa_multihost = B_FALSE;
3127716fd348SMartin Matuska 	}
3128716fd348SMartin Matuska 
3129716fd348SMartin Matuska 	mutex_exit(&spa->spa_props_lock);
3130716fd348SMartin Matuska 	spa_config_exit(spa, SCL_CONFIG, FTAG);
3131716fd348SMartin Matuska }
3132716fd348SMartin Matuska 
3133e716630dSMartin Matuska static int
3134e716630dSMartin Matuska ztest_get_raidz_children(spa_t *spa)
3135e716630dSMartin Matuska {
3136e716630dSMartin Matuska 	(void) spa;
3137e716630dSMartin Matuska 	vdev_t *raidvd;
3138e716630dSMartin Matuska 
3139e716630dSMartin Matuska 	ASSERT(MUTEX_HELD(&ztest_vdev_lock));
3140e716630dSMartin Matuska 
3141e716630dSMartin Matuska 	if (ztest_opts.zo_raid_do_expand) {
3142e716630dSMartin Matuska 		raidvd = ztest_spa->spa_root_vdev->vdev_child[0];
3143e716630dSMartin Matuska 
3144e716630dSMartin Matuska 		ASSERT(raidvd->vdev_ops == &vdev_raidz_ops);
3145e716630dSMartin Matuska 
3146e716630dSMartin Matuska 		return (raidvd->vdev_children);
3147e716630dSMartin Matuska 	}
3148e716630dSMartin Matuska 
3149e716630dSMartin Matuska 	return (ztest_opts.zo_raid_children);
3150e716630dSMartin Matuska }
3151e716630dSMartin Matuska 
3152716fd348SMartin Matuska void
3153716fd348SMartin Matuska ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
3154716fd348SMartin Matuska {
3155716fd348SMartin Matuska 	(void) zd, (void) id;
3156716fd348SMartin Matuska 	spa_t *spa;
3157716fd348SMartin Matuska 	uint64_t initial_version = SPA_VERSION_INITIAL;
3158e716630dSMartin Matuska 	uint64_t raidz_children, version, newversion;
3159716fd348SMartin Matuska 	nvlist_t *nvroot, *props;
3160716fd348SMartin Matuska 	char *name;
3161716fd348SMartin Matuska 
3162716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
3163716fd348SMartin Matuska 		return;
3164716fd348SMartin Matuska 
3165716fd348SMartin Matuska 	/* dRAID added after feature flags, skip upgrade test. */
3166716fd348SMartin Matuska 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0)
3167716fd348SMartin Matuska 		return;
3168716fd348SMartin Matuska 
3169716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3170716fd348SMartin Matuska 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
3171716fd348SMartin Matuska 
3172716fd348SMartin Matuska 	/*
3173716fd348SMartin Matuska 	 * Clean up from previous runs.
3174716fd348SMartin Matuska 	 */
3175716fd348SMartin Matuska 	(void) spa_destroy(name);
3176716fd348SMartin Matuska 
3177e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(ztest_spa);
3178e716630dSMartin Matuska 
3179716fd348SMartin Matuska 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
3180e716630dSMartin Matuska 	    NULL, raidz_children, ztest_opts.zo_mirrors, 1);
3181716fd348SMartin Matuska 
3182716fd348SMartin Matuska 	/*
3183716fd348SMartin Matuska 	 * If we're configuring a RAIDZ device then make sure that the
3184716fd348SMartin Matuska 	 * initial version is capable of supporting that feature.
3185716fd348SMartin Matuska 	 */
3186716fd348SMartin Matuska 	switch (ztest_opts.zo_raid_parity) {
3187716fd348SMartin Matuska 	case 0:
3188716fd348SMartin Matuska 	case 1:
3189716fd348SMartin Matuska 		initial_version = SPA_VERSION_INITIAL;
3190716fd348SMartin Matuska 		break;
3191716fd348SMartin Matuska 	case 2:
3192716fd348SMartin Matuska 		initial_version = SPA_VERSION_RAIDZ2;
3193716fd348SMartin Matuska 		break;
3194716fd348SMartin Matuska 	case 3:
3195716fd348SMartin Matuska 		initial_version = SPA_VERSION_RAIDZ3;
3196716fd348SMartin Matuska 		break;
3197716fd348SMartin Matuska 	}
3198716fd348SMartin Matuska 
3199716fd348SMartin Matuska 	/*
3200716fd348SMartin Matuska 	 * Create a pool with a spa version that can be upgraded. Pick
3201716fd348SMartin Matuska 	 * a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
3202716fd348SMartin Matuska 	 */
3203716fd348SMartin Matuska 	do {
3204716fd348SMartin Matuska 		version = ztest_random_spa_version(initial_version);
3205716fd348SMartin Matuska 	} while (version > SPA_VERSION_BEFORE_FEATURES);
3206716fd348SMartin Matuska 
3207716fd348SMartin Matuska 	props = fnvlist_alloc();
3208716fd348SMartin Matuska 	fnvlist_add_uint64(props,
3209716fd348SMartin Matuska 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
3210716fd348SMartin Matuska 	VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
3211716fd348SMartin Matuska 	fnvlist_free(nvroot);
3212716fd348SMartin Matuska 	fnvlist_free(props);
3213716fd348SMartin Matuska 
3214716fd348SMartin Matuska 	VERIFY0(spa_open(name, &spa, FTAG));
3215716fd348SMartin Matuska 	VERIFY3U(spa_version(spa), ==, version);
3216716fd348SMartin Matuska 	newversion = ztest_random_spa_version(version + 1);
3217716fd348SMartin Matuska 
3218716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 4) {
3219716fd348SMartin Matuska 		(void) printf("upgrading spa version from "
3220716fd348SMartin Matuska 		    "%"PRIu64" to %"PRIu64"\n",
3221716fd348SMartin Matuska 		    version, newversion);
3222716fd348SMartin Matuska 	}
3223716fd348SMartin Matuska 
3224716fd348SMartin Matuska 	spa_upgrade(spa, newversion);
3225716fd348SMartin Matuska 	VERIFY3U(spa_version(spa), >, version);
3226716fd348SMartin Matuska 	VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
3227716fd348SMartin Matuska 	    zpool_prop_to_name(ZPOOL_PROP_VERSION)));
3228716fd348SMartin Matuska 	spa_close(spa, FTAG);
3229716fd348SMartin Matuska 
3230716fd348SMartin Matuska 	kmem_strfree(name);
3231716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3232716fd348SMartin Matuska }
3233716fd348SMartin Matuska 
3234716fd348SMartin Matuska static void
3235716fd348SMartin Matuska ztest_spa_checkpoint(spa_t *spa)
3236716fd348SMartin Matuska {
3237716fd348SMartin Matuska 	ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
3238716fd348SMartin Matuska 
3239716fd348SMartin Matuska 	int error = spa_checkpoint(spa->spa_name);
3240716fd348SMartin Matuska 
3241716fd348SMartin Matuska 	switch (error) {
3242716fd348SMartin Matuska 	case 0:
3243716fd348SMartin Matuska 	case ZFS_ERR_DEVRM_IN_PROGRESS:
3244716fd348SMartin Matuska 	case ZFS_ERR_DISCARDING_CHECKPOINT:
3245716fd348SMartin Matuska 	case ZFS_ERR_CHECKPOINT_EXISTS:
3246e716630dSMartin Matuska 	case ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS:
3247716fd348SMartin Matuska 		break;
3248716fd348SMartin Matuska 	case ENOSPC:
3249716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
3250716fd348SMartin Matuska 		break;
3251716fd348SMartin Matuska 	default:
3252716fd348SMartin Matuska 		fatal(B_FALSE, "spa_checkpoint(%s) = %d", spa->spa_name, error);
3253716fd348SMartin Matuska 	}
3254716fd348SMartin Matuska }
3255716fd348SMartin Matuska 
3256716fd348SMartin Matuska static void
3257716fd348SMartin Matuska ztest_spa_discard_checkpoint(spa_t *spa)
3258716fd348SMartin Matuska {
3259716fd348SMartin Matuska 	ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
3260716fd348SMartin Matuska 
3261716fd348SMartin Matuska 	int error = spa_checkpoint_discard(spa->spa_name);
3262716fd348SMartin Matuska 
3263716fd348SMartin Matuska 	switch (error) {
3264716fd348SMartin Matuska 	case 0:
3265716fd348SMartin Matuska 	case ZFS_ERR_DISCARDING_CHECKPOINT:
3266716fd348SMartin Matuska 	case ZFS_ERR_NO_CHECKPOINT:
3267716fd348SMartin Matuska 		break;
3268716fd348SMartin Matuska 	default:
3269716fd348SMartin Matuska 		fatal(B_FALSE, "spa_discard_checkpoint(%s) = %d",
3270716fd348SMartin Matuska 		    spa->spa_name, error);
3271716fd348SMartin Matuska 	}
3272716fd348SMartin Matuska 
3273716fd348SMartin Matuska }
3274716fd348SMartin Matuska 
3275716fd348SMartin Matuska void
3276716fd348SMartin Matuska ztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id)
3277716fd348SMartin Matuska {
3278716fd348SMartin Matuska 	(void) zd, (void) id;
3279716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3280716fd348SMartin Matuska 
3281716fd348SMartin Matuska 	mutex_enter(&ztest_checkpoint_lock);
3282716fd348SMartin Matuska 	if (ztest_random(2) == 0) {
3283716fd348SMartin Matuska 		ztest_spa_checkpoint(spa);
3284716fd348SMartin Matuska 	} else {
3285716fd348SMartin Matuska 		ztest_spa_discard_checkpoint(spa);
3286716fd348SMartin Matuska 	}
3287716fd348SMartin Matuska 	mutex_exit(&ztest_checkpoint_lock);
3288716fd348SMartin Matuska }
3289716fd348SMartin Matuska 
3290716fd348SMartin Matuska 
3291716fd348SMartin Matuska static vdev_t *
3292716fd348SMartin Matuska vdev_lookup_by_path(vdev_t *vd, const char *path)
3293716fd348SMartin Matuska {
3294716fd348SMartin Matuska 	vdev_t *mvd;
3295716fd348SMartin Matuska 	int c;
3296716fd348SMartin Matuska 
3297716fd348SMartin Matuska 	if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
3298716fd348SMartin Matuska 		return (vd);
3299716fd348SMartin Matuska 
3300716fd348SMartin Matuska 	for (c = 0; c < vd->vdev_children; c++)
3301716fd348SMartin Matuska 		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
3302716fd348SMartin Matuska 		    NULL)
3303716fd348SMartin Matuska 			return (mvd);
3304716fd348SMartin Matuska 
3305716fd348SMartin Matuska 	return (NULL);
3306716fd348SMartin Matuska }
3307716fd348SMartin Matuska 
3308716fd348SMartin Matuska static int
3309716fd348SMartin Matuska spa_num_top_vdevs(spa_t *spa)
3310716fd348SMartin Matuska {
3311716fd348SMartin Matuska 	vdev_t *rvd = spa->spa_root_vdev;
3312716fd348SMartin Matuska 	ASSERT3U(spa_config_held(spa, SCL_VDEV, RW_READER), ==, SCL_VDEV);
3313716fd348SMartin Matuska 	return (rvd->vdev_children);
3314716fd348SMartin Matuska }
3315716fd348SMartin Matuska 
3316716fd348SMartin Matuska /*
3317716fd348SMartin Matuska  * Verify that vdev_add() works as expected.
3318716fd348SMartin Matuska  */
3319716fd348SMartin Matuska void
3320716fd348SMartin Matuska ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
3321716fd348SMartin Matuska {
3322716fd348SMartin Matuska 	(void) zd, (void) id;
3323716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
3324716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3325716fd348SMartin Matuska 	uint64_t leaves;
3326716fd348SMartin Matuska 	uint64_t guid;
3327e716630dSMartin Matuska 	uint64_t raidz_children;
3328e716630dSMartin Matuska 
3329716fd348SMartin Matuska 	nvlist_t *nvroot;
3330716fd348SMartin Matuska 	int error;
3331716fd348SMartin Matuska 
3332716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
3333716fd348SMartin Matuska 		return;
3334716fd348SMartin Matuska 
3335716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3336e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(spa);
3337e716630dSMartin Matuska 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children;
3338716fd348SMartin Matuska 
3339716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3340716fd348SMartin Matuska 
3341716fd348SMartin Matuska 	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
3342716fd348SMartin Matuska 
3343716fd348SMartin Matuska 	/*
3344716fd348SMartin Matuska 	 * If we have slogs then remove them 1/4 of the time.
3345716fd348SMartin Matuska 	 */
3346716fd348SMartin Matuska 	if (spa_has_slogs(spa) && ztest_random(4) == 0) {
3347716fd348SMartin Matuska 		metaslab_group_t *mg;
3348716fd348SMartin Matuska 
3349716fd348SMartin Matuska 		/*
3350716fd348SMartin Matuska 		 * find the first real slog in log allocation class
3351716fd348SMartin Matuska 		 */
3352716fd348SMartin Matuska 		mg =  spa_log_class(spa)->mc_allocator[0].mca_rotor;
3353716fd348SMartin Matuska 		while (!mg->mg_vd->vdev_islog)
3354716fd348SMartin Matuska 			mg = mg->mg_next;
3355716fd348SMartin Matuska 
3356716fd348SMartin Matuska 		guid = mg->mg_vd->vdev_guid;
3357716fd348SMartin Matuska 
3358716fd348SMartin Matuska 		spa_config_exit(spa, SCL_VDEV, FTAG);
3359716fd348SMartin Matuska 
3360716fd348SMartin Matuska 		/*
3361716fd348SMartin Matuska 		 * We have to grab the zs_name_lock as writer to
3362716fd348SMartin Matuska 		 * prevent a race between removing a slog (dmu_objset_find)
3363716fd348SMartin Matuska 		 * and destroying a dataset. Removing the slog will
3364716fd348SMartin Matuska 		 * grab a reference on the dataset which may cause
3365716fd348SMartin Matuska 		 * dsl_destroy_head() to fail with EBUSY thus
3366716fd348SMartin Matuska 		 * leaving the dataset in an inconsistent state.
3367716fd348SMartin Matuska 		 */
3368716fd348SMartin Matuska 		pthread_rwlock_wrlock(&ztest_name_lock);
3369716fd348SMartin Matuska 		error = spa_vdev_remove(spa, guid, B_FALSE);
3370716fd348SMartin Matuska 		pthread_rwlock_unlock(&ztest_name_lock);
3371716fd348SMartin Matuska 
3372716fd348SMartin Matuska 		switch (error) {
3373716fd348SMartin Matuska 		case 0:
3374716fd348SMartin Matuska 		case EEXIST:	/* Generic zil_reset() error */
3375716fd348SMartin Matuska 		case EBUSY:	/* Replay required */
3376716fd348SMartin Matuska 		case EACCES:	/* Crypto key not loaded */
3377716fd348SMartin Matuska 		case ZFS_ERR_CHECKPOINT_EXISTS:
3378716fd348SMartin Matuska 		case ZFS_ERR_DISCARDING_CHECKPOINT:
3379716fd348SMartin Matuska 			break;
3380716fd348SMartin Matuska 		default:
3381716fd348SMartin Matuska 			fatal(B_FALSE, "spa_vdev_remove() = %d", error);
3382716fd348SMartin Matuska 		}
3383716fd348SMartin Matuska 	} else {
3384716fd348SMartin Matuska 		spa_config_exit(spa, SCL_VDEV, FTAG);
3385716fd348SMartin Matuska 
3386716fd348SMartin Matuska 		/*
3387716fd348SMartin Matuska 		 * Make 1/4 of the devices be log devices
3388716fd348SMartin Matuska 		 */
3389716fd348SMartin Matuska 		nvroot = make_vdev_root(NULL, NULL, NULL,
3390716fd348SMartin Matuska 		    ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ?
3391e716630dSMartin Matuska 		    "log" : NULL, raidz_children, zs->zs_mirrors,
3392716fd348SMartin Matuska 		    1);
3393716fd348SMartin Matuska 
3394783d3ff6SMartin Matuska 		error = spa_vdev_add(spa, nvroot, B_FALSE);
3395716fd348SMartin Matuska 		fnvlist_free(nvroot);
3396716fd348SMartin Matuska 
3397716fd348SMartin Matuska 		switch (error) {
3398716fd348SMartin Matuska 		case 0:
3399716fd348SMartin Matuska 			break;
3400716fd348SMartin Matuska 		case ENOSPC:
3401716fd348SMartin Matuska 			ztest_record_enospc("spa_vdev_add");
3402716fd348SMartin Matuska 			break;
3403716fd348SMartin Matuska 		default:
3404716fd348SMartin Matuska 			fatal(B_FALSE, "spa_vdev_add() = %d", error);
3405716fd348SMartin Matuska 		}
3406716fd348SMartin Matuska 	}
3407716fd348SMartin Matuska 
3408716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3409716fd348SMartin Matuska }
3410716fd348SMartin Matuska 
3411716fd348SMartin Matuska void
3412716fd348SMartin Matuska ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id)
3413716fd348SMartin Matuska {
3414716fd348SMartin Matuska 	(void) zd, (void) id;
3415716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
3416716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3417716fd348SMartin Matuska 	uint64_t leaves;
3418716fd348SMartin Matuska 	nvlist_t *nvroot;
3419e716630dSMartin Matuska 	uint64_t raidz_children;
3420716fd348SMartin Matuska 	const char *class = (ztest_random(2) == 0) ?
3421716fd348SMartin Matuska 	    VDEV_ALLOC_BIAS_SPECIAL : VDEV_ALLOC_BIAS_DEDUP;
3422716fd348SMartin Matuska 	int error;
3423716fd348SMartin Matuska 
3424716fd348SMartin Matuska 	/*
3425716fd348SMartin Matuska 	 * By default add a special vdev 50% of the time
3426716fd348SMartin Matuska 	 */
3427716fd348SMartin Matuska 	if ((ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_OFF) ||
3428716fd348SMartin Matuska 	    (ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_RND &&
3429716fd348SMartin Matuska 	    ztest_random(2) == 0)) {
3430716fd348SMartin Matuska 		return;
3431716fd348SMartin Matuska 	}
3432716fd348SMartin Matuska 
3433716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3434716fd348SMartin Matuska 
3435716fd348SMartin Matuska 	/* Only test with mirrors */
3436716fd348SMartin Matuska 	if (zs->zs_mirrors < 2) {
3437716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
3438716fd348SMartin Matuska 		return;
3439716fd348SMartin Matuska 	}
3440716fd348SMartin Matuska 
3441716fd348SMartin Matuska 	/* requires feature@allocation_classes */
3442716fd348SMartin Matuska 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES)) {
3443716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
3444716fd348SMartin Matuska 		return;
3445716fd348SMartin Matuska 	}
3446716fd348SMartin Matuska 
3447e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(spa);
3448e716630dSMartin Matuska 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children;
3449716fd348SMartin Matuska 
3450716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3451716fd348SMartin Matuska 	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
3452716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
3453716fd348SMartin Matuska 
3454716fd348SMartin Matuska 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
3455e716630dSMartin Matuska 	    class, raidz_children, zs->zs_mirrors, 1);
3456716fd348SMartin Matuska 
3457783d3ff6SMartin Matuska 	error = spa_vdev_add(spa, nvroot, B_FALSE);
3458716fd348SMartin Matuska 	fnvlist_free(nvroot);
3459716fd348SMartin Matuska 
3460716fd348SMartin Matuska 	if (error == ENOSPC)
3461716fd348SMartin Matuska 		ztest_record_enospc("spa_vdev_add");
3462716fd348SMartin Matuska 	else if (error != 0)
3463716fd348SMartin Matuska 		fatal(B_FALSE, "spa_vdev_add() = %d", error);
3464716fd348SMartin Matuska 
3465716fd348SMartin Matuska 	/*
3466716fd348SMartin Matuska 	 * 50% of the time allow small blocks in the special class
3467716fd348SMartin Matuska 	 */
3468716fd348SMartin Matuska 	if (error == 0 &&
3469716fd348SMartin Matuska 	    spa_special_class(spa)->mc_groups == 1 && ztest_random(2) == 0) {
3470716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 3)
3471716fd348SMartin Matuska 			(void) printf("Enabling special VDEV small blocks\n");
347215f0b8c3SMartin Matuska 		error = ztest_dsl_prop_set_uint64(zd->zd_name,
3473716fd348SMartin Matuska 		    ZFS_PROP_SPECIAL_SMALL_BLOCKS, 32768, B_FALSE);
347415f0b8c3SMartin Matuska 		ASSERT(error == 0 || error == ENOSPC);
3475716fd348SMartin Matuska 	}
3476716fd348SMartin Matuska 
3477716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3478716fd348SMartin Matuska 
3479716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 3) {
3480716fd348SMartin Matuska 		metaslab_class_t *mc;
3481716fd348SMartin Matuska 
3482716fd348SMartin Matuska 		if (strcmp(class, VDEV_ALLOC_BIAS_SPECIAL) == 0)
3483716fd348SMartin Matuska 			mc = spa_special_class(spa);
3484716fd348SMartin Matuska 		else
3485716fd348SMartin Matuska 			mc = spa_dedup_class(spa);
3486716fd348SMartin Matuska 		(void) printf("Added a %s mirrored vdev (of %d)\n",
3487716fd348SMartin Matuska 		    class, (int)mc->mc_groups);
3488716fd348SMartin Matuska 	}
3489716fd348SMartin Matuska }
3490716fd348SMartin Matuska 
3491716fd348SMartin Matuska /*
3492716fd348SMartin Matuska  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
3493716fd348SMartin Matuska  */
3494716fd348SMartin Matuska void
3495716fd348SMartin Matuska ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
3496716fd348SMartin Matuska {
3497716fd348SMartin Matuska 	(void) zd, (void) id;
3498716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
3499716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3500716fd348SMartin Matuska 	vdev_t *rvd = spa->spa_root_vdev;
3501716fd348SMartin Matuska 	spa_aux_vdev_t *sav;
3502a0b956f5SMartin Matuska 	const char *aux;
3503716fd348SMartin Matuska 	char *path;
3504716fd348SMartin Matuska 	uint64_t guid = 0;
3505716fd348SMartin Matuska 	int error, ignore_err = 0;
3506716fd348SMartin Matuska 
3507716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
3508716fd348SMartin Matuska 		return;
3509716fd348SMartin Matuska 
3510716fd348SMartin Matuska 	path = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
3511716fd348SMartin Matuska 
3512716fd348SMartin Matuska 	if (ztest_random(2) == 0) {
3513716fd348SMartin Matuska 		sav = &spa->spa_spares;
3514716fd348SMartin Matuska 		aux = ZPOOL_CONFIG_SPARES;
3515716fd348SMartin Matuska 	} else {
3516716fd348SMartin Matuska 		sav = &spa->spa_l2cache;
3517716fd348SMartin Matuska 		aux = ZPOOL_CONFIG_L2CACHE;
3518716fd348SMartin Matuska 	}
3519716fd348SMartin Matuska 
3520716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3521716fd348SMartin Matuska 
3522716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3523716fd348SMartin Matuska 
3524716fd348SMartin Matuska 	if (sav->sav_count != 0 && ztest_random(4) == 0) {
3525716fd348SMartin Matuska 		/*
3526716fd348SMartin Matuska 		 * Pick a random device to remove.
3527716fd348SMartin Matuska 		 */
3528716fd348SMartin Matuska 		vdev_t *svd = sav->sav_vdevs[ztest_random(sav->sav_count)];
3529716fd348SMartin Matuska 
3530716fd348SMartin Matuska 		/* dRAID spares cannot be removed; try anyways to see ENOTSUP */
3531716fd348SMartin Matuska 		if (strstr(svd->vdev_path, VDEV_TYPE_DRAID) != NULL)
3532716fd348SMartin Matuska 			ignore_err = ENOTSUP;
3533716fd348SMartin Matuska 
3534716fd348SMartin Matuska 		guid = svd->vdev_guid;
3535716fd348SMartin Matuska 	} else {
3536716fd348SMartin Matuska 		/*
3537716fd348SMartin Matuska 		 * Find an unused device we can add.
3538716fd348SMartin Matuska 		 */
3539716fd348SMartin Matuska 		zs->zs_vdev_aux = 0;
3540716fd348SMartin Matuska 		for (;;) {
3541716fd348SMartin Matuska 			int c;
3542716fd348SMartin Matuska 			(void) snprintf(path, MAXPATHLEN, ztest_aux_template,
3543716fd348SMartin Matuska 			    ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
3544716fd348SMartin Matuska 			    zs->zs_vdev_aux);
3545716fd348SMartin Matuska 			for (c = 0; c < sav->sav_count; c++)
3546716fd348SMartin Matuska 				if (strcmp(sav->sav_vdevs[c]->vdev_path,
3547716fd348SMartin Matuska 				    path) == 0)
3548716fd348SMartin Matuska 					break;
3549716fd348SMartin Matuska 			if (c == sav->sav_count &&
3550716fd348SMartin Matuska 			    vdev_lookup_by_path(rvd, path) == NULL)
3551716fd348SMartin Matuska 				break;
3552716fd348SMartin Matuska 			zs->zs_vdev_aux++;
3553716fd348SMartin Matuska 		}
3554716fd348SMartin Matuska 	}
3555716fd348SMartin Matuska 
3556716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
3557716fd348SMartin Matuska 
3558716fd348SMartin Matuska 	if (guid == 0) {
3559716fd348SMartin Matuska 		/*
3560716fd348SMartin Matuska 		 * Add a new device.
3561716fd348SMartin Matuska 		 */
3562716fd348SMartin Matuska 		nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
3563716fd348SMartin Matuska 		    (ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1);
3564783d3ff6SMartin Matuska 		error = spa_vdev_add(spa, nvroot, B_FALSE);
3565716fd348SMartin Matuska 
3566716fd348SMartin Matuska 		switch (error) {
3567716fd348SMartin Matuska 		case 0:
3568716fd348SMartin Matuska 			break;
3569716fd348SMartin Matuska 		default:
3570716fd348SMartin Matuska 			fatal(B_FALSE, "spa_vdev_add(%p) = %d", nvroot, error);
3571716fd348SMartin Matuska 		}
3572716fd348SMartin Matuska 		fnvlist_free(nvroot);
3573716fd348SMartin Matuska 	} else {
3574716fd348SMartin Matuska 		/*
3575716fd348SMartin Matuska 		 * Remove an existing device.  Sometimes, dirty its
3576716fd348SMartin Matuska 		 * vdev state first to make sure we handle removal
3577716fd348SMartin Matuska 		 * of devices that have pending state changes.
3578716fd348SMartin Matuska 		 */
3579716fd348SMartin Matuska 		if (ztest_random(2) == 0)
3580716fd348SMartin Matuska 			(void) vdev_online(spa, guid, 0, NULL);
3581716fd348SMartin Matuska 
3582716fd348SMartin Matuska 		error = spa_vdev_remove(spa, guid, B_FALSE);
3583716fd348SMartin Matuska 
3584716fd348SMartin Matuska 		switch (error) {
3585716fd348SMartin Matuska 		case 0:
3586716fd348SMartin Matuska 		case EBUSY:
3587716fd348SMartin Matuska 		case ZFS_ERR_CHECKPOINT_EXISTS:
3588716fd348SMartin Matuska 		case ZFS_ERR_DISCARDING_CHECKPOINT:
3589716fd348SMartin Matuska 			break;
3590716fd348SMartin Matuska 		default:
3591716fd348SMartin Matuska 			if (error != ignore_err)
3592716fd348SMartin Matuska 				fatal(B_FALSE,
3593716fd348SMartin Matuska 				    "spa_vdev_remove(%"PRIu64") = %d",
3594716fd348SMartin Matuska 				    guid, error);
3595716fd348SMartin Matuska 		}
3596716fd348SMartin Matuska 	}
3597716fd348SMartin Matuska 
3598716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3599716fd348SMartin Matuska 
3600716fd348SMartin Matuska 	umem_free(path, MAXPATHLEN);
3601716fd348SMartin Matuska }
3602716fd348SMartin Matuska 
3603716fd348SMartin Matuska /*
3604716fd348SMartin Matuska  * split a pool if it has mirror tlvdevs
3605716fd348SMartin Matuska  */
3606716fd348SMartin Matuska void
3607716fd348SMartin Matuska ztest_split_pool(ztest_ds_t *zd, uint64_t id)
3608716fd348SMartin Matuska {
3609716fd348SMartin Matuska 	(void) zd, (void) id;
3610716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
3611716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3612716fd348SMartin Matuska 	vdev_t *rvd = spa->spa_root_vdev;
3613716fd348SMartin Matuska 	nvlist_t *tree, **child, *config, *split, **schild;
3614716fd348SMartin Matuska 	uint_t c, children, schildren = 0, lastlogid = 0;
3615716fd348SMartin Matuska 	int error = 0;
3616716fd348SMartin Matuska 
3617716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
3618716fd348SMartin Matuska 		return;
3619716fd348SMartin Matuska 
3620716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3621716fd348SMartin Matuska 
3622716fd348SMartin Matuska 	/* ensure we have a usable config; mirrors of raidz aren't supported */
3623716fd348SMartin Matuska 	if (zs->zs_mirrors < 3 || ztest_opts.zo_raid_children > 1) {
3624716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
3625716fd348SMartin Matuska 		return;
3626716fd348SMartin Matuska 	}
3627716fd348SMartin Matuska 
3628716fd348SMartin Matuska 	/* clean up the old pool, if any */
3629716fd348SMartin Matuska 	(void) spa_destroy("splitp");
3630716fd348SMartin Matuska 
3631716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3632716fd348SMartin Matuska 
3633716fd348SMartin Matuska 	/* generate a config from the existing config */
3634716fd348SMartin Matuska 	mutex_enter(&spa->spa_props_lock);
3635716fd348SMartin Matuska 	tree = fnvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE);
3636716fd348SMartin Matuska 	mutex_exit(&spa->spa_props_lock);
3637716fd348SMartin Matuska 
3638716fd348SMartin Matuska 	VERIFY0(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
3639716fd348SMartin Matuska 	    &child, &children));
3640716fd348SMartin Matuska 
3641dbd5678dSMartin Matuska 	schild = umem_alloc(rvd->vdev_children * sizeof (nvlist_t *),
3642dbd5678dSMartin Matuska 	    UMEM_NOFAIL);
3643716fd348SMartin Matuska 	for (c = 0; c < children; c++) {
3644716fd348SMartin Matuska 		vdev_t *tvd = rvd->vdev_child[c];
3645716fd348SMartin Matuska 		nvlist_t **mchild;
3646716fd348SMartin Matuska 		uint_t mchildren;
3647716fd348SMartin Matuska 
3648716fd348SMartin Matuska 		if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
3649716fd348SMartin Matuska 			schild[schildren] = fnvlist_alloc();
3650716fd348SMartin Matuska 			fnvlist_add_string(schild[schildren],
3651716fd348SMartin Matuska 			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE);
3652716fd348SMartin Matuska 			fnvlist_add_uint64(schild[schildren],
3653716fd348SMartin Matuska 			    ZPOOL_CONFIG_IS_HOLE, 1);
3654716fd348SMartin Matuska 			if (lastlogid == 0)
3655716fd348SMartin Matuska 				lastlogid = schildren;
3656716fd348SMartin Matuska 			++schildren;
3657716fd348SMartin Matuska 			continue;
3658716fd348SMartin Matuska 		}
3659716fd348SMartin Matuska 		lastlogid = 0;
3660716fd348SMartin Matuska 		VERIFY0(nvlist_lookup_nvlist_array(child[c],
3661716fd348SMartin Matuska 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren));
3662716fd348SMartin Matuska 		schild[schildren++] = fnvlist_dup(mchild[0]);
3663716fd348SMartin Matuska 	}
3664716fd348SMartin Matuska 
3665716fd348SMartin Matuska 	/* OK, create a config that can be used to split */
3666716fd348SMartin Matuska 	split = fnvlist_alloc();
3667716fd348SMartin Matuska 	fnvlist_add_string(split, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
3668716fd348SMartin Matuska 	fnvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN,
3669716fd348SMartin Matuska 	    (const nvlist_t **)schild, lastlogid != 0 ? lastlogid : schildren);
3670716fd348SMartin Matuska 
3671716fd348SMartin Matuska 	config = fnvlist_alloc();
3672716fd348SMartin Matuska 	fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split);
3673716fd348SMartin Matuska 
3674716fd348SMartin Matuska 	for (c = 0; c < schildren; c++)
3675716fd348SMartin Matuska 		fnvlist_free(schild[c]);
3676dbd5678dSMartin Matuska 	umem_free(schild, rvd->vdev_children * sizeof (nvlist_t *));
3677716fd348SMartin Matuska 	fnvlist_free(split);
3678716fd348SMartin Matuska 
3679716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
3680716fd348SMartin Matuska 
3681716fd348SMartin Matuska 	(void) pthread_rwlock_wrlock(&ztest_name_lock);
3682716fd348SMartin Matuska 	error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
3683716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
3684716fd348SMartin Matuska 
3685716fd348SMartin Matuska 	fnvlist_free(config);
3686716fd348SMartin Matuska 
3687716fd348SMartin Matuska 	if (error == 0) {
3688716fd348SMartin Matuska 		(void) printf("successful split - results:\n");
3689716fd348SMartin Matuska 		mutex_enter(&spa_namespace_lock);
3690716fd348SMartin Matuska 		show_pool_stats(spa);
3691716fd348SMartin Matuska 		show_pool_stats(spa_lookup("splitp"));
3692716fd348SMartin Matuska 		mutex_exit(&spa_namespace_lock);
3693716fd348SMartin Matuska 		++zs->zs_splits;
3694716fd348SMartin Matuska 		--zs->zs_mirrors;
3695716fd348SMartin Matuska 	}
3696716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3697716fd348SMartin Matuska }
3698716fd348SMartin Matuska 
3699716fd348SMartin Matuska /*
3700716fd348SMartin Matuska  * Verify that we can attach and detach devices.
3701716fd348SMartin Matuska  */
3702716fd348SMartin Matuska void
3703716fd348SMartin Matuska ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
3704716fd348SMartin Matuska {
3705716fd348SMartin Matuska 	(void) zd, (void) id;
3706716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
3707716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
3708716fd348SMartin Matuska 	spa_aux_vdev_t *sav = &spa->spa_spares;
3709716fd348SMartin Matuska 	vdev_t *rvd = spa->spa_root_vdev;
3710716fd348SMartin Matuska 	vdev_t *oldvd, *newvd, *pvd;
3711716fd348SMartin Matuska 	nvlist_t *root;
3712716fd348SMartin Matuska 	uint64_t leaves;
3713716fd348SMartin Matuska 	uint64_t leaf, top;
3714716fd348SMartin Matuska 	uint64_t ashift = ztest_get_ashift();
3715716fd348SMartin Matuska 	uint64_t oldguid, pguid;
3716716fd348SMartin Matuska 	uint64_t oldsize, newsize;
3717e716630dSMartin Matuska 	uint64_t raidz_children;
3718716fd348SMartin Matuska 	char *oldpath, *newpath;
3719716fd348SMartin Matuska 	int replacing;
3720716fd348SMartin Matuska 	int oldvd_has_siblings = B_FALSE;
3721716fd348SMartin Matuska 	int newvd_is_spare = B_FALSE;
3722716fd348SMartin Matuska 	int newvd_is_dspare = B_FALSE;
3723716fd348SMartin Matuska 	int oldvd_is_log;
372415f0b8c3SMartin Matuska 	int oldvd_is_special;
3725716fd348SMartin Matuska 	int error, expected_error;
3726716fd348SMartin Matuska 
3727716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
3728716fd348SMartin Matuska 		return;
3729716fd348SMartin Matuska 
3730716fd348SMartin Matuska 	oldpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
3731716fd348SMartin Matuska 	newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
3732716fd348SMartin Matuska 
3733716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3734e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(spa);
3735e716630dSMartin Matuska 	leaves = MAX(zs->zs_mirrors, 1) * raidz_children;
3736716fd348SMartin Matuska 
3737716fd348SMartin Matuska 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3738716fd348SMartin Matuska 
3739716fd348SMartin Matuska 	/*
3740716fd348SMartin Matuska 	 * If a vdev is in the process of being removed, its removal may
3741716fd348SMartin Matuska 	 * finish while we are in progress, leading to an unexpected error
3742716fd348SMartin Matuska 	 * value.  Don't bother trying to attach while we are in the middle
3743716fd348SMartin Matuska 	 * of removal.
3744716fd348SMartin Matuska 	 */
3745716fd348SMartin Matuska 	if (ztest_device_removal_active) {
3746716fd348SMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
3747716fd348SMartin Matuska 		goto out;
3748716fd348SMartin Matuska 	}
3749716fd348SMartin Matuska 
3750716fd348SMartin Matuska 	/*
3751e716630dSMartin Matuska 	 * RAIDZ leaf VDEV mirrors are not currently supported while a
3752e716630dSMartin Matuska 	 * RAIDZ expansion is in progress.
3753e716630dSMartin Matuska 	 */
3754e716630dSMartin Matuska 	if (ztest_opts.zo_raid_do_expand) {
3755e716630dSMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
3756e716630dSMartin Matuska 		goto out;
3757e716630dSMartin Matuska 	}
3758e716630dSMartin Matuska 
3759e716630dSMartin Matuska 	/*
3760716fd348SMartin Matuska 	 * Decide whether to do an attach or a replace.
3761716fd348SMartin Matuska 	 */
3762716fd348SMartin Matuska 	replacing = ztest_random(2);
3763716fd348SMartin Matuska 
3764716fd348SMartin Matuska 	/*
3765716fd348SMartin Matuska 	 * Pick a random top-level vdev.
3766716fd348SMartin Matuska 	 */
3767716fd348SMartin Matuska 	top = ztest_random_vdev_top(spa, B_TRUE);
3768716fd348SMartin Matuska 
3769716fd348SMartin Matuska 	/*
3770716fd348SMartin Matuska 	 * Pick a random leaf within it.
3771716fd348SMartin Matuska 	 */
3772716fd348SMartin Matuska 	leaf = ztest_random(leaves);
3773716fd348SMartin Matuska 
3774716fd348SMartin Matuska 	/*
3775716fd348SMartin Matuska 	 * Locate this vdev.
3776716fd348SMartin Matuska 	 */
3777716fd348SMartin Matuska 	oldvd = rvd->vdev_child[top];
3778716fd348SMartin Matuska 
3779716fd348SMartin Matuska 	/* pick a child from the mirror */
3780716fd348SMartin Matuska 	if (zs->zs_mirrors >= 1) {
3781716fd348SMartin Matuska 		ASSERT3P(oldvd->vdev_ops, ==, &vdev_mirror_ops);
3782716fd348SMartin Matuska 		ASSERT3U(oldvd->vdev_children, >=, zs->zs_mirrors);
3783e716630dSMartin Matuska 		oldvd = oldvd->vdev_child[leaf / raidz_children];
3784716fd348SMartin Matuska 	}
3785716fd348SMartin Matuska 
3786716fd348SMartin Matuska 	/* pick a child out of the raidz group */
3787716fd348SMartin Matuska 	if (ztest_opts.zo_raid_children > 1) {
3788716fd348SMartin Matuska 		if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0)
3789716fd348SMartin Matuska 			ASSERT3P(oldvd->vdev_ops, ==, &vdev_raidz_ops);
3790716fd348SMartin Matuska 		else
3791716fd348SMartin Matuska 			ASSERT3P(oldvd->vdev_ops, ==, &vdev_draid_ops);
3792e716630dSMartin Matuska 		oldvd = oldvd->vdev_child[leaf % raidz_children];
3793716fd348SMartin Matuska 	}
3794716fd348SMartin Matuska 
3795716fd348SMartin Matuska 	/*
3796716fd348SMartin Matuska 	 * If we're already doing an attach or replace, oldvd may be a
3797716fd348SMartin Matuska 	 * mirror vdev -- in which case, pick a random child.
3798716fd348SMartin Matuska 	 */
3799716fd348SMartin Matuska 	while (oldvd->vdev_children != 0) {
3800716fd348SMartin Matuska 		oldvd_has_siblings = B_TRUE;
3801716fd348SMartin Matuska 		ASSERT3U(oldvd->vdev_children, >=, 2);
3802716fd348SMartin Matuska 		oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
3803716fd348SMartin Matuska 	}
3804716fd348SMartin Matuska 
3805716fd348SMartin Matuska 	oldguid = oldvd->vdev_guid;
3806716fd348SMartin Matuska 	oldsize = vdev_get_min_asize(oldvd);
3807716fd348SMartin Matuska 	oldvd_is_log = oldvd->vdev_top->vdev_islog;
380815f0b8c3SMartin Matuska 	oldvd_is_special =
380915f0b8c3SMartin Matuska 	    oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_SPECIAL ||
381015f0b8c3SMartin Matuska 	    oldvd->vdev_top->vdev_alloc_bias == VDEV_BIAS_DEDUP;
3811be181ee2SMartin Matuska 	(void) strlcpy(oldpath, oldvd->vdev_path, MAXPATHLEN);
3812716fd348SMartin Matuska 	pvd = oldvd->vdev_parent;
3813716fd348SMartin Matuska 	pguid = pvd->vdev_guid;
3814716fd348SMartin Matuska 
3815716fd348SMartin Matuska 	/*
3816716fd348SMartin Matuska 	 * If oldvd has siblings, then half of the time, detach it.  Prior
3817716fd348SMartin Matuska 	 * to the detach the pool is scrubbed in order to prevent creating
3818716fd348SMartin Matuska 	 * unrepairable blocks as a result of the data corruption injection.
3819716fd348SMartin Matuska 	 */
3820716fd348SMartin Matuska 	if (oldvd_has_siblings && ztest_random(2) == 0) {
3821716fd348SMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
3822716fd348SMartin Matuska 
3823716fd348SMartin Matuska 		error = ztest_scrub_impl(spa);
3824716fd348SMartin Matuska 		if (error)
3825716fd348SMartin Matuska 			goto out;
3826716fd348SMartin Matuska 
3827716fd348SMartin Matuska 		error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
3828716fd348SMartin Matuska 		if (error != 0 && error != ENODEV && error != EBUSY &&
3829716fd348SMartin Matuska 		    error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS &&
3830716fd348SMartin Matuska 		    error != ZFS_ERR_DISCARDING_CHECKPOINT)
3831716fd348SMartin Matuska 			fatal(B_FALSE, "detach (%s) returned %d",
3832716fd348SMartin Matuska 			    oldpath, error);
3833716fd348SMartin Matuska 		goto out;
3834716fd348SMartin Matuska 	}
3835716fd348SMartin Matuska 
3836716fd348SMartin Matuska 	/*
3837716fd348SMartin Matuska 	 * For the new vdev, choose with equal probability between the two
3838716fd348SMartin Matuska 	 * standard paths (ending in either 'a' or 'b') or a random hot spare.
3839716fd348SMartin Matuska 	 */
3840716fd348SMartin Matuska 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
3841716fd348SMartin Matuska 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
3842716fd348SMartin Matuska 		newvd_is_spare = B_TRUE;
3843716fd348SMartin Matuska 
3844716fd348SMartin Matuska 		if (newvd->vdev_ops == &vdev_draid_spare_ops)
3845716fd348SMartin Matuska 			newvd_is_dspare = B_TRUE;
3846716fd348SMartin Matuska 
3847be181ee2SMartin Matuska 		(void) strlcpy(newpath, newvd->vdev_path, MAXPATHLEN);
3848716fd348SMartin Matuska 	} else {
3849716fd348SMartin Matuska 		(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
3850716fd348SMartin Matuska 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
3851716fd348SMartin Matuska 		    top * leaves + leaf);
3852716fd348SMartin Matuska 		if (ztest_random(2) == 0)
3853716fd348SMartin Matuska 			newpath[strlen(newpath) - 1] = 'b';
3854716fd348SMartin Matuska 		newvd = vdev_lookup_by_path(rvd, newpath);
3855716fd348SMartin Matuska 	}
3856716fd348SMartin Matuska 
3857716fd348SMartin Matuska 	if (newvd) {
3858716fd348SMartin Matuska 		/*
3859716fd348SMartin Matuska 		 * Reopen to ensure the vdev's asize field isn't stale.
3860716fd348SMartin Matuska 		 */
3861716fd348SMartin Matuska 		vdev_reopen(newvd);
3862716fd348SMartin Matuska 		newsize = vdev_get_min_asize(newvd);
3863716fd348SMartin Matuska 	} else {
3864716fd348SMartin Matuska 		/*
3865716fd348SMartin Matuska 		 * Make newsize a little bigger or smaller than oldsize.
3866716fd348SMartin Matuska 		 * If it's smaller, the attach should fail.
3867716fd348SMartin Matuska 		 * If it's larger, and we're doing a replace,
3868716fd348SMartin Matuska 		 * we should get dynamic LUN growth when we're done.
3869716fd348SMartin Matuska 		 */
3870716fd348SMartin Matuska 		newsize = 10 * oldsize / (9 + ztest_random(3));
3871716fd348SMartin Matuska 	}
3872716fd348SMartin Matuska 
3873716fd348SMartin Matuska 	/*
3874716fd348SMartin Matuska 	 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
3875716fd348SMartin Matuska 	 * unless it's a replace; in that case any non-replacing parent is OK.
3876716fd348SMartin Matuska 	 *
3877716fd348SMartin Matuska 	 * If newvd is already part of the pool, it should fail with EBUSY.
3878716fd348SMartin Matuska 	 *
3879716fd348SMartin Matuska 	 * If newvd is too small, it should fail with EOVERFLOW.
3880716fd348SMartin Matuska 	 *
3881716fd348SMartin Matuska 	 * If newvd is a distributed spare and it's being attached to a
3882716fd348SMartin Matuska 	 * dRAID which is not its parent it should fail with EINVAL.
3883716fd348SMartin Matuska 	 */
3884716fd348SMartin Matuska 	if (pvd->vdev_ops != &vdev_mirror_ops &&
3885716fd348SMartin Matuska 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
3886716fd348SMartin Matuska 	    pvd->vdev_ops == &vdev_replacing_ops ||
3887716fd348SMartin Matuska 	    pvd->vdev_ops == &vdev_spare_ops))
3888716fd348SMartin Matuska 		expected_error = ENOTSUP;
388915f0b8c3SMartin Matuska 	else if (newvd_is_spare &&
389015f0b8c3SMartin Matuska 	    (!replacing || oldvd_is_log || oldvd_is_special))
3891716fd348SMartin Matuska 		expected_error = ENOTSUP;
3892716fd348SMartin Matuska 	else if (newvd == oldvd)
3893716fd348SMartin Matuska 		expected_error = replacing ? 0 : EBUSY;
3894716fd348SMartin Matuska 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
3895716fd348SMartin Matuska 		expected_error = EBUSY;
3896716fd348SMartin Matuska 	else if (!newvd_is_dspare && newsize < oldsize)
3897716fd348SMartin Matuska 		expected_error = EOVERFLOW;
3898716fd348SMartin Matuska 	else if (ashift > oldvd->vdev_top->vdev_ashift)
3899716fd348SMartin Matuska 		expected_error = EDOM;
3900716fd348SMartin Matuska 	else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
3901cbfe9975SMartin Matuska 		expected_error = EINVAL;
3902716fd348SMartin Matuska 	else
3903716fd348SMartin Matuska 		expected_error = 0;
3904716fd348SMartin Matuska 
3905716fd348SMartin Matuska 	spa_config_exit(spa, SCL_ALL, FTAG);
3906716fd348SMartin Matuska 
3907716fd348SMartin Matuska 	/*
3908716fd348SMartin Matuska 	 * Build the nvlist describing newpath.
3909716fd348SMartin Matuska 	 */
3910716fd348SMartin Matuska 	root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
3911716fd348SMartin Matuska 	    ashift, NULL, 0, 0, 1);
3912716fd348SMartin Matuska 
3913716fd348SMartin Matuska 	/*
3914716fd348SMartin Matuska 	 * When supported select either a healing or sequential resilver.
3915716fd348SMartin Matuska 	 */
3916716fd348SMartin Matuska 	boolean_t rebuilding = B_FALSE;
3917716fd348SMartin Matuska 	if (pvd->vdev_ops == &vdev_mirror_ops ||
3918716fd348SMartin Matuska 	    pvd->vdev_ops ==  &vdev_root_ops) {
3919716fd348SMartin Matuska 		rebuilding = !!ztest_random(2);
3920716fd348SMartin Matuska 	}
3921716fd348SMartin Matuska 
3922716fd348SMartin Matuska 	error = spa_vdev_attach(spa, oldguid, root, replacing, rebuilding);
3923716fd348SMartin Matuska 
3924716fd348SMartin Matuska 	fnvlist_free(root);
3925716fd348SMartin Matuska 
3926716fd348SMartin Matuska 	/*
3927716fd348SMartin Matuska 	 * If our parent was the replacing vdev, but the replace completed,
3928716fd348SMartin Matuska 	 * then instead of failing with ENOTSUP we may either succeed,
3929716fd348SMartin Matuska 	 * fail with ENODEV, or fail with EOVERFLOW.
3930716fd348SMartin Matuska 	 */
3931716fd348SMartin Matuska 	if (expected_error == ENOTSUP &&
3932716fd348SMartin Matuska 	    (error == 0 || error == ENODEV || error == EOVERFLOW))
3933716fd348SMartin Matuska 		expected_error = error;
3934716fd348SMartin Matuska 
3935716fd348SMartin Matuska 	/*
3936716fd348SMartin Matuska 	 * If someone grew the LUN, the replacement may be too small.
3937716fd348SMartin Matuska 	 */
3938716fd348SMartin Matuska 	if (error == EOVERFLOW || error == EBUSY)
3939716fd348SMartin Matuska 		expected_error = error;
3940716fd348SMartin Matuska 
3941716fd348SMartin Matuska 	if (error == ZFS_ERR_CHECKPOINT_EXISTS ||
3942716fd348SMartin Matuska 	    error == ZFS_ERR_DISCARDING_CHECKPOINT ||
3943716fd348SMartin Matuska 	    error == ZFS_ERR_RESILVER_IN_PROGRESS ||
3944716fd348SMartin Matuska 	    error == ZFS_ERR_REBUILD_IN_PROGRESS)
3945716fd348SMartin Matuska 		expected_error = error;
3946716fd348SMartin Matuska 
3947716fd348SMartin Matuska 	if (error != expected_error && expected_error != EBUSY) {
3948716fd348SMartin Matuska 		fatal(B_FALSE, "attach (%s %"PRIu64", %s %"PRIu64", %d) "
3949716fd348SMartin Matuska 		    "returned %d, expected %d",
3950716fd348SMartin Matuska 		    oldpath, oldsize, newpath,
3951716fd348SMartin Matuska 		    newsize, replacing, error, expected_error);
3952716fd348SMartin Matuska 	}
3953716fd348SMartin Matuska out:
3954716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
3955716fd348SMartin Matuska 
3956716fd348SMartin Matuska 	umem_free(oldpath, MAXPATHLEN);
3957716fd348SMartin Matuska 	umem_free(newpath, MAXPATHLEN);
3958716fd348SMartin Matuska }
3959716fd348SMartin Matuska 
3960e716630dSMartin Matuska static void
3961e716630dSMartin Matuska raidz_scratch_verify(void)
3962e716630dSMartin Matuska {
3963e716630dSMartin Matuska 	spa_t *spa;
3964e716630dSMartin Matuska 	uint64_t write_size, logical_size, offset;
3965e716630dSMartin Matuska 	raidz_reflow_scratch_state_t state;
3966e716630dSMartin Matuska 	vdev_raidz_expand_t *vre;
3967e716630dSMartin Matuska 	vdev_t *raidvd;
3968e716630dSMartin Matuska 
3969e716630dSMartin Matuska 	ASSERT(raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE);
3970e716630dSMartin Matuska 
3971e716630dSMartin Matuska 	if (ztest_scratch_state->zs_raidz_scratch_verify_pause == 0)
3972e716630dSMartin Matuska 		return;
3973e716630dSMartin Matuska 
3974e716630dSMartin Matuska 	kernel_init(SPA_MODE_READ);
3975e716630dSMartin Matuska 
3976e716630dSMartin Matuska 	mutex_enter(&spa_namespace_lock);
3977e716630dSMartin Matuska 	spa = spa_lookup(ztest_opts.zo_pool);
3978e716630dSMartin Matuska 	ASSERT(spa);
3979e716630dSMartin Matuska 	spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
3980e716630dSMartin Matuska 	mutex_exit(&spa_namespace_lock);
3981e716630dSMartin Matuska 
3982e716630dSMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
3983e716630dSMartin Matuska 
3984e716630dSMartin Matuska 	ASSERT3U(RRSS_GET_OFFSET(&spa->spa_uberblock), !=, UINT64_MAX);
3985e716630dSMartin Matuska 
3986e716630dSMartin Matuska 	mutex_enter(&ztest_vdev_lock);
3987e716630dSMartin Matuska 
3988e716630dSMartin Matuska 	spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
3989e716630dSMartin Matuska 
3990e716630dSMartin Matuska 	vre = spa->spa_raidz_expand;
3991e716630dSMartin Matuska 	if (vre == NULL)
3992e716630dSMartin Matuska 		goto out;
3993e716630dSMartin Matuska 
3994e716630dSMartin Matuska 	raidvd = vdev_lookup_top(spa, vre->vre_vdev_id);
3995e716630dSMartin Matuska 	offset = RRSS_GET_OFFSET(&spa->spa_uberblock);
3996e716630dSMartin Matuska 	state = RRSS_GET_STATE(&spa->spa_uberblock);
3997aca928a5SMartin Matuska 	write_size = P2ALIGN_TYPED(VDEV_BOOT_SIZE, 1 << raidvd->vdev_ashift,
3998aca928a5SMartin Matuska 	    uint64_t);
3999e716630dSMartin Matuska 	logical_size = write_size * raidvd->vdev_children;
4000e716630dSMartin Matuska 
4001e716630dSMartin Matuska 	switch (state) {
4002e716630dSMartin Matuska 		/*
4003e716630dSMartin Matuska 		 * Initial state of reflow process.  RAIDZ expansion was
4004e716630dSMartin Matuska 		 * requested by user, but scratch object was not created.
4005e716630dSMartin Matuska 		 */
4006e716630dSMartin Matuska 		case RRSS_SCRATCH_NOT_IN_USE:
4007e716630dSMartin Matuska 			ASSERT3U(offset, ==, 0);
4008e716630dSMartin Matuska 			break;
4009e716630dSMartin Matuska 
4010e716630dSMartin Matuska 		/*
4011e716630dSMartin Matuska 		 * Scratch object was synced and stored in boot area.
4012e716630dSMartin Matuska 		 */
4013e716630dSMartin Matuska 		case RRSS_SCRATCH_VALID:
4014e716630dSMartin Matuska 
4015e716630dSMartin Matuska 		/*
4016e716630dSMartin Matuska 		 * Scratch object was synced back to raidz start offset,
4017e716630dSMartin Matuska 		 * raidz is ready for sector by sector reflow process.
4018e716630dSMartin Matuska 		 */
4019e716630dSMartin Matuska 		case RRSS_SCRATCH_INVALID_SYNCED:
4020e716630dSMartin Matuska 
4021e716630dSMartin Matuska 		/*
4022e716630dSMartin Matuska 		 * Scratch object was synced back to raidz start offset
4023e716630dSMartin Matuska 		 * on zpool importing, raidz is ready for sector by sector
4024e716630dSMartin Matuska 		 * reflow process.
4025e716630dSMartin Matuska 		 */
4026e716630dSMartin Matuska 		case RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT:
4027e716630dSMartin Matuska 			ASSERT3U(offset, ==, logical_size);
4028e716630dSMartin Matuska 			break;
4029e716630dSMartin Matuska 
4030e716630dSMartin Matuska 		/*
4031e716630dSMartin Matuska 		 * Sector by sector reflow process started.
4032e716630dSMartin Matuska 		 */
4033e716630dSMartin Matuska 		case RRSS_SCRATCH_INVALID_SYNCED_REFLOW:
4034e716630dSMartin Matuska 			ASSERT3U(offset, >=, logical_size);
4035e716630dSMartin Matuska 			break;
4036e716630dSMartin Matuska 	}
4037e716630dSMartin Matuska 
4038e716630dSMartin Matuska out:
4039e716630dSMartin Matuska 	spa_config_exit(spa, SCL_ALL, FTAG);
4040e716630dSMartin Matuska 
4041e716630dSMartin Matuska 	mutex_exit(&ztest_vdev_lock);
4042e716630dSMartin Matuska 
4043e716630dSMartin Matuska 	ztest_scratch_state->zs_raidz_scratch_verify_pause = 0;
4044e716630dSMartin Matuska 
4045e716630dSMartin Matuska 	spa_close(spa, FTAG);
4046e716630dSMartin Matuska 	kernel_fini();
4047e716630dSMartin Matuska }
4048e716630dSMartin Matuska 
4049e716630dSMartin Matuska static void
4050e716630dSMartin Matuska ztest_scratch_thread(void *arg)
4051e716630dSMartin Matuska {
4052e716630dSMartin Matuska 	(void) arg;
4053e716630dSMartin Matuska 
4054e716630dSMartin Matuska 	/* wait up to 10 seconds */
4055e716630dSMartin Matuska 	for (int t = 100; t > 0; t -= 1) {
4056e716630dSMartin Matuska 		if (raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE)
4057e716630dSMartin Matuska 			thread_exit();
4058e716630dSMartin Matuska 
4059e716630dSMartin Matuska 		(void) poll(NULL, 0, 100);
4060e716630dSMartin Matuska 	}
4061e716630dSMartin Matuska 
4062e716630dSMartin Matuska 	/* killed when the scratch area progress reached a certain point */
4063e716630dSMartin Matuska 	ztest_kill(ztest_shared);
4064e716630dSMartin Matuska }
4065e716630dSMartin Matuska 
4066e716630dSMartin Matuska /*
4067e716630dSMartin Matuska  * Verify that we can attach raidz device.
4068e716630dSMartin Matuska  */
4069e716630dSMartin Matuska void
4070e716630dSMartin Matuska ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id)
4071e716630dSMartin Matuska {
4072e716630dSMartin Matuska 	(void) zd, (void) id;
4073e716630dSMartin Matuska 	ztest_shared_t *zs = ztest_shared;
4074e716630dSMartin Matuska 	spa_t *spa = ztest_spa;
4075e716630dSMartin Matuska 	uint64_t leaves, raidz_children, newsize, ashift = ztest_get_ashift();
4076e716630dSMartin Matuska 	kthread_t *scratch_thread = NULL;
4077e716630dSMartin Matuska 	vdev_t *newvd, *pvd;
4078e716630dSMartin Matuska 	nvlist_t *root;
4079e716630dSMartin Matuska 	char *newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
4080e716630dSMartin Matuska 	int error, expected_error = 0;
4081e716630dSMartin Matuska 
4082e716630dSMartin Matuska 	mutex_enter(&ztest_vdev_lock);
4083e716630dSMartin Matuska 
4084e716630dSMartin Matuska 	spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
4085e716630dSMartin Matuska 
4086e716630dSMartin Matuska 	/* Only allow attach when raid-kind = 'eraidz' */
4087e716630dSMartin Matuska 	if (!ztest_opts.zo_raid_do_expand) {
4088e716630dSMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
4089e716630dSMartin Matuska 		goto out;
4090e716630dSMartin Matuska 	}
4091e716630dSMartin Matuska 
4092e716630dSMartin Matuska 	if (ztest_opts.zo_mmp_test) {
4093e716630dSMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
4094e716630dSMartin Matuska 		goto out;
4095e716630dSMartin Matuska 	}
4096e716630dSMartin Matuska 
4097e716630dSMartin Matuska 	if (ztest_device_removal_active) {
4098e716630dSMartin Matuska 		spa_config_exit(spa, SCL_ALL, FTAG);
4099e716630dSMartin Matuska 		goto out;
4100e716630dSMartin Matuska 	}
4101e716630dSMartin Matuska 
4102e716630dSMartin Matuska 	pvd = vdev_lookup_top(spa, 0);
4103e716630dSMartin Matuska 
4104e716630dSMartin Matuska 	ASSERT(pvd->vdev_ops == &vdev_raidz_ops);
4105e716630dSMartin Matuska 
4106e716630dSMartin Matuska 	/*
4107e716630dSMartin Matuska 	 * Get size of a child of the raidz group,
4108e716630dSMartin Matuska 	 * make sure device is a bit bigger
4109e716630dSMartin Matuska 	 */
4110e716630dSMartin Matuska 	newvd = pvd->vdev_child[ztest_random(pvd->vdev_children)];
4111e716630dSMartin Matuska 	newsize = 10 * vdev_get_min_asize(newvd) / (9 + ztest_random(2));
4112e716630dSMartin Matuska 
4113e716630dSMartin Matuska 	/*
4114e716630dSMartin Matuska 	 * Get next attached leaf id
4115e716630dSMartin Matuska 	 */
4116e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(spa);
4117e716630dSMartin Matuska 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * raidz_children;
4118e716630dSMartin Matuska 	zs->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
4119e716630dSMartin Matuska 
4120e716630dSMartin Matuska 	if (spa->spa_raidz_expand)
4121e716630dSMartin Matuska 		expected_error = ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS;
4122e716630dSMartin Matuska 
4123e716630dSMartin Matuska 	spa_config_exit(spa, SCL_ALL, FTAG);
4124e716630dSMartin Matuska 
4125e716630dSMartin Matuska 	/*
4126e716630dSMartin Matuska 	 * Path to vdev to be attached
4127e716630dSMartin Matuska 	 */
4128e716630dSMartin Matuska 	(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
4129e716630dSMartin Matuska 	    ztest_opts.zo_dir, ztest_opts.zo_pool, zs->zs_vdev_next_leaf);
4130e716630dSMartin Matuska 
4131e716630dSMartin Matuska 	/*
4132e716630dSMartin Matuska 	 * Build the nvlist describing newpath.
4133e716630dSMartin Matuska 	 */
4134e716630dSMartin Matuska 	root = make_vdev_root(newpath, NULL, NULL, newsize, ashift, NULL,
4135e716630dSMartin Matuska 	    0, 0, 1);
4136e716630dSMartin Matuska 
4137e716630dSMartin Matuska 	/*
4138e716630dSMartin Matuska 	 * 50% of the time, set raidz_expand_pause_point to cause
4139e716630dSMartin Matuska 	 * raidz_reflow_scratch_sync() to pause at a certain point and
4140e716630dSMartin Matuska 	 * then kill the test after 10 seconds so raidz_scratch_verify()
4141e716630dSMartin Matuska 	 * can confirm consistency when the pool is imported.
4142e716630dSMartin Matuska 	 */
4143e716630dSMartin Matuska 	if (ztest_random(2) == 0 && expected_error == 0) {
4144e716630dSMartin Matuska 		raidz_expand_pause_point =
4145e716630dSMartin Matuska 		    ztest_random(RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2) + 1;
4146e716630dSMartin Matuska 		scratch_thread = thread_create(NULL, 0, ztest_scratch_thread,
4147e716630dSMartin Matuska 		    ztest_shared, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
4148e716630dSMartin Matuska 	}
4149e716630dSMartin Matuska 
4150e716630dSMartin Matuska 	error = spa_vdev_attach(spa, pvd->vdev_guid, root, B_FALSE, B_FALSE);
4151e716630dSMartin Matuska 
4152e716630dSMartin Matuska 	nvlist_free(root);
4153e716630dSMartin Matuska 
4154e716630dSMartin Matuska 	if (error == EOVERFLOW || error == ENXIO ||
4155e716630dSMartin Matuska 	    error == ZFS_ERR_CHECKPOINT_EXISTS ||
4156e716630dSMartin Matuska 	    error == ZFS_ERR_DISCARDING_CHECKPOINT)
4157e716630dSMartin Matuska 		expected_error = error;
4158e716630dSMartin Matuska 
4159e716630dSMartin Matuska 	if (error != 0 && error != expected_error) {
4160e716630dSMartin Matuska 		fatal(0, "raidz attach (%s %"PRIu64") returned %d, expected %d",
4161e716630dSMartin Matuska 		    newpath, newsize, error, expected_error);
4162e716630dSMartin Matuska 	}
4163e716630dSMartin Matuska 
4164e716630dSMartin Matuska 	if (raidz_expand_pause_point) {
4165e716630dSMartin Matuska 		if (error != 0) {
4166e716630dSMartin Matuska 			/*
4167e716630dSMartin Matuska 			 * Do not verify scratch object in case of error
4168e716630dSMartin Matuska 			 * returned by vdev attaching.
4169e716630dSMartin Matuska 			 */
4170e716630dSMartin Matuska 			raidz_expand_pause_point = RAIDZ_EXPAND_PAUSE_NONE;
4171e716630dSMartin Matuska 		}
4172e716630dSMartin Matuska 
4173e716630dSMartin Matuska 		VERIFY0(thread_join(scratch_thread));
4174e716630dSMartin Matuska 	}
4175e716630dSMartin Matuska out:
4176e716630dSMartin Matuska 	mutex_exit(&ztest_vdev_lock);
4177e716630dSMartin Matuska 
4178e716630dSMartin Matuska 	umem_free(newpath, MAXPATHLEN);
4179e716630dSMartin Matuska }
4180e716630dSMartin Matuska 
4181716fd348SMartin Matuska void
4182716fd348SMartin Matuska ztest_device_removal(ztest_ds_t *zd, uint64_t id)
4183716fd348SMartin Matuska {
4184716fd348SMartin Matuska 	(void) zd, (void) id;
4185716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
4186716fd348SMartin Matuska 	vdev_t *vd;
4187716fd348SMartin Matuska 	uint64_t guid;
4188716fd348SMartin Matuska 	int error;
4189716fd348SMartin Matuska 
4190716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
4191716fd348SMartin Matuska 
4192716fd348SMartin Matuska 	if (ztest_device_removal_active) {
4193716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4194716fd348SMartin Matuska 		return;
4195716fd348SMartin Matuska 	}
4196716fd348SMartin Matuska 
4197716fd348SMartin Matuska 	/*
4198716fd348SMartin Matuska 	 * Remove a random top-level vdev and wait for removal to finish.
4199716fd348SMartin Matuska 	 */
4200716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
4201716fd348SMartin Matuska 	vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE));
4202716fd348SMartin Matuska 	guid = vd->vdev_guid;
4203716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
4204716fd348SMartin Matuska 
4205716fd348SMartin Matuska 	error = spa_vdev_remove(spa, guid, B_FALSE);
4206716fd348SMartin Matuska 	if (error == 0) {
4207716fd348SMartin Matuska 		ztest_device_removal_active = B_TRUE;
4208716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4209716fd348SMartin Matuska 
4210716fd348SMartin Matuska 		/*
4211716fd348SMartin Matuska 		 * spa->spa_vdev_removal is created in a sync task that
4212716fd348SMartin Matuska 		 * is initiated via dsl_sync_task_nowait(). Since the
4213716fd348SMartin Matuska 		 * task may not run before spa_vdev_remove() returns, we
4214716fd348SMartin Matuska 		 * must wait at least 1 txg to ensure that the removal
4215716fd348SMartin Matuska 		 * struct has been created.
4216716fd348SMartin Matuska 		 */
4217716fd348SMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
4218716fd348SMartin Matuska 
4219716fd348SMartin Matuska 		while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
4220716fd348SMartin Matuska 			txg_wait_synced(spa_get_dsl(spa), 0);
4221716fd348SMartin Matuska 	} else {
4222716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4223716fd348SMartin Matuska 		return;
4224716fd348SMartin Matuska 	}
4225716fd348SMartin Matuska 
4226716fd348SMartin Matuska 	/*
4227716fd348SMartin Matuska 	 * The pool needs to be scrubbed after completing device removal.
4228716fd348SMartin Matuska 	 * Failure to do so may result in checksum errors due to the
4229716fd348SMartin Matuska 	 * strategy employed by ztest_fault_inject() when selecting which
4230716fd348SMartin Matuska 	 * offset are redundant and can be damaged.
4231716fd348SMartin Matuska 	 */
4232716fd348SMartin Matuska 	error = spa_scan(spa, POOL_SCAN_SCRUB);
4233716fd348SMartin Matuska 	if (error == 0) {
4234716fd348SMartin Matuska 		while (dsl_scan_scrubbing(spa_get_dsl(spa)))
4235716fd348SMartin Matuska 			txg_wait_synced(spa_get_dsl(spa), 0);
4236716fd348SMartin Matuska 	}
4237716fd348SMartin Matuska 
4238716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
4239716fd348SMartin Matuska 	ztest_device_removal_active = B_FALSE;
4240716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
4241716fd348SMartin Matuska }
4242716fd348SMartin Matuska 
4243716fd348SMartin Matuska /*
4244716fd348SMartin Matuska  * Callback function which expands the physical size of the vdev.
4245716fd348SMartin Matuska  */
4246716fd348SMartin Matuska static vdev_t *
4247716fd348SMartin Matuska grow_vdev(vdev_t *vd, void *arg)
4248716fd348SMartin Matuska {
4249716fd348SMartin Matuska 	spa_t *spa __maybe_unused = vd->vdev_spa;
4250716fd348SMartin Matuska 	size_t *newsize = arg;
4251716fd348SMartin Matuska 	size_t fsize;
4252716fd348SMartin Matuska 	int fd;
4253716fd348SMartin Matuska 
4254716fd348SMartin Matuska 	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
4255716fd348SMartin Matuska 	ASSERT(vd->vdev_ops->vdev_op_leaf);
4256716fd348SMartin Matuska 
4257716fd348SMartin Matuska 	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
4258716fd348SMartin Matuska 		return (vd);
4259716fd348SMartin Matuska 
4260716fd348SMartin Matuska 	fsize = lseek(fd, 0, SEEK_END);
4261716fd348SMartin Matuska 	VERIFY0(ftruncate(fd, *newsize));
4262716fd348SMartin Matuska 
4263716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6) {
4264716fd348SMartin Matuska 		(void) printf("%s grew from %lu to %lu bytes\n",
4265716fd348SMartin Matuska 		    vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
4266716fd348SMartin Matuska 	}
4267716fd348SMartin Matuska 	(void) close(fd);
4268716fd348SMartin Matuska 	return (NULL);
4269716fd348SMartin Matuska }
4270716fd348SMartin Matuska 
4271716fd348SMartin Matuska /*
4272716fd348SMartin Matuska  * Callback function which expands a given vdev by calling vdev_online().
4273716fd348SMartin Matuska  */
4274716fd348SMartin Matuska static vdev_t *
4275716fd348SMartin Matuska online_vdev(vdev_t *vd, void *arg)
4276716fd348SMartin Matuska {
4277716fd348SMartin Matuska 	(void) arg;
4278716fd348SMartin Matuska 	spa_t *spa = vd->vdev_spa;
4279716fd348SMartin Matuska 	vdev_t *tvd = vd->vdev_top;
4280716fd348SMartin Matuska 	uint64_t guid = vd->vdev_guid;
4281716fd348SMartin Matuska 	uint64_t generation = spa->spa_config_generation + 1;
4282716fd348SMartin Matuska 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
4283716fd348SMartin Matuska 	int error;
4284716fd348SMartin Matuska 
4285716fd348SMartin Matuska 	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
4286716fd348SMartin Matuska 	ASSERT(vd->vdev_ops->vdev_op_leaf);
4287716fd348SMartin Matuska 
4288716fd348SMartin Matuska 	/* Calling vdev_online will initialize the new metaslabs */
4289716fd348SMartin Matuska 	spa_config_exit(spa, SCL_STATE, spa);
4290716fd348SMartin Matuska 	error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
4291716fd348SMartin Matuska 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
4292716fd348SMartin Matuska 
4293716fd348SMartin Matuska 	/*
4294716fd348SMartin Matuska 	 * If vdev_online returned an error or the underlying vdev_open
4295716fd348SMartin Matuska 	 * failed then we abort the expand. The only way to know that
4296716fd348SMartin Matuska 	 * vdev_open fails is by checking the returned newstate.
4297716fd348SMartin Matuska 	 */
4298716fd348SMartin Matuska 	if (error || newstate != VDEV_STATE_HEALTHY) {
4299716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 5) {
4300716fd348SMartin Matuska 			(void) printf("Unable to expand vdev, state %u, "
4301716fd348SMartin Matuska 			    "error %d\n", newstate, error);
4302716fd348SMartin Matuska 		}
4303716fd348SMartin Matuska 		return (vd);
4304716fd348SMartin Matuska 	}
4305716fd348SMartin Matuska 	ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
4306716fd348SMartin Matuska 
4307716fd348SMartin Matuska 	/*
4308716fd348SMartin Matuska 	 * Since we dropped the lock we need to ensure that we're
4309716fd348SMartin Matuska 	 * still talking to the original vdev. It's possible this
4310716fd348SMartin Matuska 	 * vdev may have been detached/replaced while we were
4311716fd348SMartin Matuska 	 * trying to online it.
4312716fd348SMartin Matuska 	 */
4313716fd348SMartin Matuska 	if (generation != spa->spa_config_generation) {
4314716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 5) {
4315716fd348SMartin Matuska 			(void) printf("vdev configuration has changed, "
4316716fd348SMartin Matuska 			    "guid %"PRIu64", state %"PRIu64", "
4317716fd348SMartin Matuska 			    "expected gen %"PRIu64", got gen %"PRIu64"\n",
4318716fd348SMartin Matuska 			    guid,
4319716fd348SMartin Matuska 			    tvd->vdev_state,
4320716fd348SMartin Matuska 			    generation,
4321716fd348SMartin Matuska 			    spa->spa_config_generation);
4322716fd348SMartin Matuska 		}
4323716fd348SMartin Matuska 		return (vd);
4324716fd348SMartin Matuska 	}
4325716fd348SMartin Matuska 	return (NULL);
4326716fd348SMartin Matuska }
4327716fd348SMartin Matuska 
4328716fd348SMartin Matuska /*
4329716fd348SMartin Matuska  * Traverse the vdev tree calling the supplied function.
4330716fd348SMartin Matuska  * We continue to walk the tree until we either have walked all
4331716fd348SMartin Matuska  * children or we receive a non-NULL return from the callback.
4332716fd348SMartin Matuska  * If a NULL callback is passed, then we just return back the first
4333716fd348SMartin Matuska  * leaf vdev we encounter.
4334716fd348SMartin Matuska  */
4335716fd348SMartin Matuska static vdev_t *
4336716fd348SMartin Matuska vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
4337716fd348SMartin Matuska {
4338716fd348SMartin Matuska 	uint_t c;
4339716fd348SMartin Matuska 
4340716fd348SMartin Matuska 	if (vd->vdev_ops->vdev_op_leaf) {
4341716fd348SMartin Matuska 		if (func == NULL)
4342716fd348SMartin Matuska 			return (vd);
4343716fd348SMartin Matuska 		else
4344716fd348SMartin Matuska 			return (func(vd, arg));
4345716fd348SMartin Matuska 	}
4346716fd348SMartin Matuska 
4347716fd348SMartin Matuska 	for (c = 0; c < vd->vdev_children; c++) {
4348716fd348SMartin Matuska 		vdev_t *cvd = vd->vdev_child[c];
4349716fd348SMartin Matuska 		if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
4350716fd348SMartin Matuska 			return (cvd);
4351716fd348SMartin Matuska 	}
4352716fd348SMartin Matuska 	return (NULL);
4353716fd348SMartin Matuska }
4354716fd348SMartin Matuska 
4355716fd348SMartin Matuska /*
4356716fd348SMartin Matuska  * Verify that dynamic LUN growth works as expected.
4357716fd348SMartin Matuska  */
4358716fd348SMartin Matuska void
4359716fd348SMartin Matuska ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
4360716fd348SMartin Matuska {
4361716fd348SMartin Matuska 	(void) zd, (void) id;
4362716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
4363716fd348SMartin Matuska 	vdev_t *vd, *tvd;
4364716fd348SMartin Matuska 	metaslab_class_t *mc;
4365716fd348SMartin Matuska 	metaslab_group_t *mg;
4366716fd348SMartin Matuska 	size_t psize, newsize;
4367716fd348SMartin Matuska 	uint64_t top;
4368716fd348SMartin Matuska 	uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
4369716fd348SMartin Matuska 
4370716fd348SMartin Matuska 	mutex_enter(&ztest_checkpoint_lock);
4371716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
4372716fd348SMartin Matuska 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
4373716fd348SMartin Matuska 
4374716fd348SMartin Matuska 	/*
4375716fd348SMartin Matuska 	 * If there is a vdev removal in progress, it could complete while
4376716fd348SMartin Matuska 	 * we are running, in which case we would not be able to verify
4377716fd348SMartin Matuska 	 * that the metaslab_class space increased (because it decreases
4378716fd348SMartin Matuska 	 * when the device removal completes).
4379716fd348SMartin Matuska 	 */
4380716fd348SMartin Matuska 	if (ztest_device_removal_active) {
4381716fd348SMartin Matuska 		spa_config_exit(spa, SCL_STATE, spa);
4382716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4383716fd348SMartin Matuska 		mutex_exit(&ztest_checkpoint_lock);
4384716fd348SMartin Matuska 		return;
4385716fd348SMartin Matuska 	}
4386716fd348SMartin Matuska 
4387e716630dSMartin Matuska 	/*
4388e716630dSMartin Matuska 	 * If we are under raidz expansion, the test can failed because the
4389e716630dSMartin Matuska 	 * metaslabs count will not increase immediately after the vdev is
4390e716630dSMartin Matuska 	 * expanded. It will happen only after raidz expansion completion.
4391e716630dSMartin Matuska 	 */
4392e716630dSMartin Matuska 	if (spa->spa_raidz_expand) {
4393e716630dSMartin Matuska 		spa_config_exit(spa, SCL_STATE, spa);
4394e716630dSMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4395e716630dSMartin Matuska 		mutex_exit(&ztest_checkpoint_lock);
4396e716630dSMartin Matuska 		return;
4397e716630dSMartin Matuska 	}
4398e716630dSMartin Matuska 
4399716fd348SMartin Matuska 	top = ztest_random_vdev_top(spa, B_TRUE);
4400716fd348SMartin Matuska 
4401716fd348SMartin Matuska 	tvd = spa->spa_root_vdev->vdev_child[top];
4402716fd348SMartin Matuska 	mg = tvd->vdev_mg;
4403716fd348SMartin Matuska 	mc = mg->mg_class;
4404716fd348SMartin Matuska 	old_ms_count = tvd->vdev_ms_count;
4405716fd348SMartin Matuska 	old_class_space = metaslab_class_get_space(mc);
4406716fd348SMartin Matuska 
4407716fd348SMartin Matuska 	/*
4408716fd348SMartin Matuska 	 * Determine the size of the first leaf vdev associated with
4409716fd348SMartin Matuska 	 * our top-level device.
4410716fd348SMartin Matuska 	 */
4411716fd348SMartin Matuska 	vd = vdev_walk_tree(tvd, NULL, NULL);
4412716fd348SMartin Matuska 	ASSERT3P(vd, !=, NULL);
4413716fd348SMartin Matuska 	ASSERT(vd->vdev_ops->vdev_op_leaf);
4414716fd348SMartin Matuska 
4415716fd348SMartin Matuska 	psize = vd->vdev_psize;
4416716fd348SMartin Matuska 
4417716fd348SMartin Matuska 	/*
4418716fd348SMartin Matuska 	 * We only try to expand the vdev if it's healthy, less than 4x its
4419716fd348SMartin Matuska 	 * original size, and it has a valid psize.
4420716fd348SMartin Matuska 	 */
4421716fd348SMartin Matuska 	if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
4422716fd348SMartin Matuska 	    psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
4423716fd348SMartin Matuska 		spa_config_exit(spa, SCL_STATE, spa);
4424716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4425716fd348SMartin Matuska 		mutex_exit(&ztest_checkpoint_lock);
4426716fd348SMartin Matuska 		return;
4427716fd348SMartin Matuska 	}
4428716fd348SMartin Matuska 	ASSERT3U(psize, >, 0);
4429716fd348SMartin Matuska 	newsize = psize + MAX(psize / 8, SPA_MAXBLOCKSIZE);
4430716fd348SMartin Matuska 	ASSERT3U(newsize, >, psize);
4431716fd348SMartin Matuska 
4432716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6) {
4433716fd348SMartin Matuska 		(void) printf("Expanding LUN %s from %lu to %lu\n",
4434716fd348SMartin Matuska 		    vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
4435716fd348SMartin Matuska 	}
4436716fd348SMartin Matuska 
4437716fd348SMartin Matuska 	/*
4438716fd348SMartin Matuska 	 * Growing the vdev is a two step process:
4439716fd348SMartin Matuska 	 *	1). expand the physical size (i.e. relabel)
4440716fd348SMartin Matuska 	 *	2). online the vdev to create the new metaslabs
4441716fd348SMartin Matuska 	 */
4442716fd348SMartin Matuska 	if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
4443716fd348SMartin Matuska 	    vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
4444716fd348SMartin Matuska 	    tvd->vdev_state != VDEV_STATE_HEALTHY) {
4445716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 5) {
4446716fd348SMartin Matuska 			(void) printf("Could not expand LUN because "
4447716fd348SMartin Matuska 			    "the vdev configuration changed.\n");
4448716fd348SMartin Matuska 		}
4449716fd348SMartin Matuska 		spa_config_exit(spa, SCL_STATE, spa);
4450716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4451716fd348SMartin Matuska 		mutex_exit(&ztest_checkpoint_lock);
4452716fd348SMartin Matuska 		return;
4453716fd348SMartin Matuska 	}
4454716fd348SMartin Matuska 
4455716fd348SMartin Matuska 	spa_config_exit(spa, SCL_STATE, spa);
4456716fd348SMartin Matuska 
4457716fd348SMartin Matuska 	/*
4458716fd348SMartin Matuska 	 * Expanding the LUN will update the config asynchronously,
4459716fd348SMartin Matuska 	 * thus we must wait for the async thread to complete any
4460716fd348SMartin Matuska 	 * pending tasks before proceeding.
4461716fd348SMartin Matuska 	 */
4462716fd348SMartin Matuska 	for (;;) {
4463716fd348SMartin Matuska 		boolean_t done;
4464716fd348SMartin Matuska 		mutex_enter(&spa->spa_async_lock);
4465716fd348SMartin Matuska 		done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
4466716fd348SMartin Matuska 		mutex_exit(&spa->spa_async_lock);
4467716fd348SMartin Matuska 		if (done)
4468716fd348SMartin Matuska 			break;
4469716fd348SMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
4470716fd348SMartin Matuska 		(void) poll(NULL, 0, 100);
4471716fd348SMartin Matuska 	}
4472716fd348SMartin Matuska 
4473716fd348SMartin Matuska 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
4474716fd348SMartin Matuska 
4475716fd348SMartin Matuska 	tvd = spa->spa_root_vdev->vdev_child[top];
4476716fd348SMartin Matuska 	new_ms_count = tvd->vdev_ms_count;
4477716fd348SMartin Matuska 	new_class_space = metaslab_class_get_space(mc);
4478716fd348SMartin Matuska 
4479716fd348SMartin Matuska 	if (tvd->vdev_mg != mg || mg->mg_class != mc) {
4480716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 5) {
4481716fd348SMartin Matuska 			(void) printf("Could not verify LUN expansion due to "
4482716fd348SMartin Matuska 			    "intervening vdev offline or remove.\n");
4483716fd348SMartin Matuska 		}
4484716fd348SMartin Matuska 		spa_config_exit(spa, SCL_STATE, spa);
4485716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
4486716fd348SMartin Matuska 		mutex_exit(&ztest_checkpoint_lock);
4487716fd348SMartin Matuska 		return;
4488716fd348SMartin Matuska 	}
4489716fd348SMartin Matuska 
4490716fd348SMartin Matuska 	/*
4491716fd348SMartin Matuska 	 * Make sure we were able to grow the vdev.
4492716fd348SMartin Matuska 	 */
4493716fd348SMartin Matuska 	if (new_ms_count <= old_ms_count) {
4494716fd348SMartin Matuska 		fatal(B_FALSE,
4495716fd348SMartin Matuska 		    "LUN expansion failed: ms_count %"PRIu64" < %"PRIu64"\n",
4496716fd348SMartin Matuska 		    old_ms_count, new_ms_count);
4497716fd348SMartin Matuska 	}
4498716fd348SMartin Matuska 
4499716fd348SMartin Matuska 	/*
4500716fd348SMartin Matuska 	 * Make sure we were able to grow the pool.
4501716fd348SMartin Matuska 	 */
4502716fd348SMartin Matuska 	if (new_class_space <= old_class_space) {
4503716fd348SMartin Matuska 		fatal(B_FALSE,
4504716fd348SMartin Matuska 		    "LUN expansion failed: class_space %"PRIu64" < %"PRIu64"\n",
4505716fd348SMartin Matuska 		    old_class_space, new_class_space);
4506716fd348SMartin Matuska 	}
4507716fd348SMartin Matuska 
4508716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 5) {
4509716fd348SMartin Matuska 		char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
4510716fd348SMartin Matuska 
4511716fd348SMartin Matuska 		nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
4512716fd348SMartin Matuska 		nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
4513716fd348SMartin Matuska 		(void) printf("%s grew from %s to %s\n",
4514716fd348SMartin Matuska 		    spa->spa_name, oldnumbuf, newnumbuf);
4515716fd348SMartin Matuska 	}
4516716fd348SMartin Matuska 
4517716fd348SMartin Matuska 	spa_config_exit(spa, SCL_STATE, spa);
4518716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
4519716fd348SMartin Matuska 	mutex_exit(&ztest_checkpoint_lock);
4520716fd348SMartin Matuska }
4521716fd348SMartin Matuska 
4522716fd348SMartin Matuska /*
4523716fd348SMartin Matuska  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
4524716fd348SMartin Matuska  */
4525716fd348SMartin Matuska static void
4526716fd348SMartin Matuska ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
4527716fd348SMartin Matuska {
4528716fd348SMartin Matuska 	(void) arg, (void) cr;
4529716fd348SMartin Matuska 
4530716fd348SMartin Matuska 	/*
4531716fd348SMartin Matuska 	 * Create the objects common to all ztest datasets.
4532716fd348SMartin Matuska 	 */
4533716fd348SMartin Matuska 	VERIFY0(zap_create_claim(os, ZTEST_DIROBJ,
4534716fd348SMartin Matuska 	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx));
4535716fd348SMartin Matuska }
4536716fd348SMartin Matuska 
4537716fd348SMartin Matuska static int
4538716fd348SMartin Matuska ztest_dataset_create(char *dsname)
4539716fd348SMartin Matuska {
4540716fd348SMartin Matuska 	int err;
4541716fd348SMartin Matuska 	uint64_t rand;
4542716fd348SMartin Matuska 	dsl_crypto_params_t *dcp = NULL;
4543716fd348SMartin Matuska 
4544716fd348SMartin Matuska 	/*
4545716fd348SMartin Matuska 	 * 50% of the time, we create encrypted datasets
4546716fd348SMartin Matuska 	 * using a random cipher suite and a hard-coded
4547716fd348SMartin Matuska 	 * wrapping key.
4548716fd348SMartin Matuska 	 */
4549716fd348SMartin Matuska 	rand = ztest_random(2);
4550716fd348SMartin Matuska 	if (rand != 0) {
4551716fd348SMartin Matuska 		nvlist_t *crypto_args = fnvlist_alloc();
4552716fd348SMartin Matuska 		nvlist_t *props = fnvlist_alloc();
4553716fd348SMartin Matuska 
4554716fd348SMartin Matuska 		/* slight bias towards the default cipher suite */
4555716fd348SMartin Matuska 		rand = ztest_random(ZIO_CRYPT_FUNCTIONS);
4556716fd348SMartin Matuska 		if (rand < ZIO_CRYPT_AES_128_CCM)
4557716fd348SMartin Matuska 			rand = ZIO_CRYPT_ON;
4558716fd348SMartin Matuska 
4559716fd348SMartin Matuska 		fnvlist_add_uint64(props,
4560716fd348SMartin Matuska 		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), rand);
4561716fd348SMartin Matuska 		fnvlist_add_uint8_array(crypto_args, "wkeydata",
4562716fd348SMartin Matuska 		    (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN);
4563716fd348SMartin Matuska 
4564716fd348SMartin Matuska 		/*
4565716fd348SMartin Matuska 		 * These parameters aren't really used by the kernel. They
4566716fd348SMartin Matuska 		 * are simply stored so that userspace knows how to load
4567716fd348SMartin Matuska 		 * the wrapping key.
4568716fd348SMartin Matuska 		 */
4569716fd348SMartin Matuska 		fnvlist_add_uint64(props,
4570716fd348SMartin Matuska 		    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), ZFS_KEYFORMAT_RAW);
4571716fd348SMartin Matuska 		fnvlist_add_string(props,
4572716fd348SMartin Matuska 		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), "prompt");
4573716fd348SMartin Matuska 		fnvlist_add_uint64(props,
4574716fd348SMartin Matuska 		    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 0ULL);
4575716fd348SMartin Matuska 		fnvlist_add_uint64(props,
4576716fd348SMartin Matuska 		    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 0ULL);
4577716fd348SMartin Matuska 
4578716fd348SMartin Matuska 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, props,
4579716fd348SMartin Matuska 		    crypto_args, &dcp));
4580716fd348SMartin Matuska 
4581716fd348SMartin Matuska 		/*
4582716fd348SMartin Matuska 		 * Cycle through all available encryption implementations
4583716fd348SMartin Matuska 		 * to verify interoperability.
4584716fd348SMartin Matuska 		 */
4585716fd348SMartin Matuska 		VERIFY0(gcm_impl_set("cycle"));
4586716fd348SMartin Matuska 		VERIFY0(aes_impl_set("cycle"));
4587716fd348SMartin Matuska 
4588716fd348SMartin Matuska 		fnvlist_free(crypto_args);
4589716fd348SMartin Matuska 		fnvlist_free(props);
4590716fd348SMartin Matuska 	}
4591716fd348SMartin Matuska 
4592716fd348SMartin Matuska 	err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, dcp,
4593716fd348SMartin Matuska 	    ztest_objset_create_cb, NULL);
4594716fd348SMartin Matuska 	dsl_crypto_params_free(dcp, !!err);
4595716fd348SMartin Matuska 
4596716fd348SMartin Matuska 	rand = ztest_random(100);
4597716fd348SMartin Matuska 	if (err || rand < 80)
4598716fd348SMartin Matuska 		return (err);
4599716fd348SMartin Matuska 
4600716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 5)
4601716fd348SMartin Matuska 		(void) printf("Setting dataset %s to sync always\n", dsname);
4602716fd348SMartin Matuska 	return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
4603716fd348SMartin Matuska 	    ZFS_SYNC_ALWAYS, B_FALSE));
4604716fd348SMartin Matuska }
4605716fd348SMartin Matuska 
4606716fd348SMartin Matuska static int
4607716fd348SMartin Matuska ztest_objset_destroy_cb(const char *name, void *arg)
4608716fd348SMartin Matuska {
4609716fd348SMartin Matuska 	(void) arg;
4610716fd348SMartin Matuska 	objset_t *os;
4611716fd348SMartin Matuska 	dmu_object_info_t doi;
4612716fd348SMartin Matuska 	int error;
4613716fd348SMartin Matuska 
4614716fd348SMartin Matuska 	/*
4615716fd348SMartin Matuska 	 * Verify that the dataset contains a directory object.
4616716fd348SMartin Matuska 	 */
4617716fd348SMartin Matuska 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
4618716fd348SMartin Matuska 	    B_TRUE, FTAG, &os));
4619716fd348SMartin Matuska 	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
4620716fd348SMartin Matuska 	if (error != ENOENT) {
4621716fd348SMartin Matuska 		/* We could have crashed in the middle of destroying it */
4622716fd348SMartin Matuska 		ASSERT0(error);
4623716fd348SMartin Matuska 		ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
4624716fd348SMartin Matuska 		ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
4625716fd348SMartin Matuska 	}
4626716fd348SMartin Matuska 	dmu_objset_disown(os, B_TRUE, FTAG);
4627716fd348SMartin Matuska 
4628716fd348SMartin Matuska 	/*
4629716fd348SMartin Matuska 	 * Destroy the dataset.
4630716fd348SMartin Matuska 	 */
4631716fd348SMartin Matuska 	if (strchr(name, '@') != NULL) {
4632716fd348SMartin Matuska 		error = dsl_destroy_snapshot(name, B_TRUE);
4633716fd348SMartin Matuska 		if (error != ECHRNG) {
4634716fd348SMartin Matuska 			/*
4635716fd348SMartin Matuska 			 * The program was executed, but encountered a runtime
4636716fd348SMartin Matuska 			 * error, such as insufficient slop, or a hold on the
4637716fd348SMartin Matuska 			 * dataset.
4638716fd348SMartin Matuska 			 */
4639716fd348SMartin Matuska 			ASSERT0(error);
4640716fd348SMartin Matuska 		}
4641716fd348SMartin Matuska 	} else {
4642716fd348SMartin Matuska 		error = dsl_destroy_head(name);
4643716fd348SMartin Matuska 		if (error == ENOSPC) {
4644716fd348SMartin Matuska 			/* There could be checkpoint or insufficient slop */
4645716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4646716fd348SMartin Matuska 		} else if (error != EBUSY) {
4647716fd348SMartin Matuska 			/* There could be a hold on this dataset */
4648716fd348SMartin Matuska 			ASSERT0(error);
4649716fd348SMartin Matuska 		}
4650716fd348SMartin Matuska 	}
4651716fd348SMartin Matuska 	return (0);
4652716fd348SMartin Matuska }
4653716fd348SMartin Matuska 
4654716fd348SMartin Matuska static boolean_t
4655716fd348SMartin Matuska ztest_snapshot_create(char *osname, uint64_t id)
4656716fd348SMartin Matuska {
4657716fd348SMartin Matuska 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
4658716fd348SMartin Matuska 	int error;
4659716fd348SMartin Matuska 
4660716fd348SMartin Matuska 	(void) snprintf(snapname, sizeof (snapname), "%"PRIu64"", id);
4661716fd348SMartin Matuska 
4662716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(osname, snapname);
4663716fd348SMartin Matuska 	if (error == ENOSPC) {
4664716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
4665716fd348SMartin Matuska 		return (B_FALSE);
4666716fd348SMartin Matuska 	}
466715f0b8c3SMartin Matuska 	if (error != 0 && error != EEXIST && error != ECHRNG) {
4668716fd348SMartin Matuska 		fatal(B_FALSE, "ztest_snapshot_create(%s@%s) = %d", osname,
4669716fd348SMartin Matuska 		    snapname, error);
4670716fd348SMartin Matuska 	}
4671716fd348SMartin Matuska 	return (B_TRUE);
4672716fd348SMartin Matuska }
4673716fd348SMartin Matuska 
4674716fd348SMartin Matuska static boolean_t
4675716fd348SMartin Matuska ztest_snapshot_destroy(char *osname, uint64_t id)
4676716fd348SMartin Matuska {
4677716fd348SMartin Matuska 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
4678716fd348SMartin Matuska 	int error;
4679716fd348SMartin Matuska 
4680716fd348SMartin Matuska 	(void) snprintf(snapname, sizeof (snapname), "%s@%"PRIu64"",
4681716fd348SMartin Matuska 	    osname, id);
4682716fd348SMartin Matuska 
4683716fd348SMartin Matuska 	error = dsl_destroy_snapshot(snapname, B_FALSE);
468415f0b8c3SMartin Matuska 	if (error != 0 && error != ENOENT && error != ECHRNG)
4685716fd348SMartin Matuska 		fatal(B_FALSE, "ztest_snapshot_destroy(%s) = %d",
4686716fd348SMartin Matuska 		    snapname, error);
4687716fd348SMartin Matuska 	return (B_TRUE);
4688716fd348SMartin Matuska }
4689716fd348SMartin Matuska 
4690716fd348SMartin Matuska void
4691716fd348SMartin Matuska ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
4692716fd348SMartin Matuska {
4693716fd348SMartin Matuska 	(void) zd;
4694716fd348SMartin Matuska 	ztest_ds_t *zdtmp;
4695716fd348SMartin Matuska 	int iters;
4696716fd348SMartin Matuska 	int error;
4697716fd348SMartin Matuska 	objset_t *os, *os2;
4698716fd348SMartin Matuska 	char name[ZFS_MAX_DATASET_NAME_LEN];
4699716fd348SMartin Matuska 	zilog_t *zilog;
4700716fd348SMartin Matuska 	int i;
4701716fd348SMartin Matuska 
4702716fd348SMartin Matuska 	zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
4703716fd348SMartin Matuska 
4704716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
4705716fd348SMartin Matuska 
4706716fd348SMartin Matuska 	(void) snprintf(name, sizeof (name), "%s/temp_%"PRIu64"",
4707716fd348SMartin Matuska 	    ztest_opts.zo_pool, id);
4708716fd348SMartin Matuska 
4709716fd348SMartin Matuska 	/*
4710716fd348SMartin Matuska 	 * If this dataset exists from a previous run, process its replay log
4711716fd348SMartin Matuska 	 * half of the time.  If we don't replay it, then dsl_destroy_head()
4712716fd348SMartin Matuska 	 * (invoked from ztest_objset_destroy_cb()) should just throw it away.
4713716fd348SMartin Matuska 	 */
4714716fd348SMartin Matuska 	if (ztest_random(2) == 0 &&
4715716fd348SMartin Matuska 	    ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
4716716fd348SMartin Matuska 	    B_TRUE, FTAG, &os) == 0) {
4717716fd348SMartin Matuska 		ztest_zd_init(zdtmp, NULL, os);
4718716fd348SMartin Matuska 		zil_replay(os, zdtmp, ztest_replay_vector);
4719716fd348SMartin Matuska 		ztest_zd_fini(zdtmp);
4720716fd348SMartin Matuska 		dmu_objset_disown(os, B_TRUE, FTAG);
4721716fd348SMartin Matuska 	}
4722716fd348SMartin Matuska 
4723716fd348SMartin Matuska 	/*
4724716fd348SMartin Matuska 	 * There may be an old instance of the dataset we're about to
4725716fd348SMartin Matuska 	 * create lying around from a previous run.  If so, destroy it
4726716fd348SMartin Matuska 	 * and all of its snapshots.
4727716fd348SMartin Matuska 	 */
4728716fd348SMartin Matuska 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
4729716fd348SMartin Matuska 	    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
4730716fd348SMartin Matuska 
4731716fd348SMartin Matuska 	/*
4732716fd348SMartin Matuska 	 * Verify that the destroyed dataset is no longer in the namespace.
473315f0b8c3SMartin Matuska 	 * It may still be present if the destroy above fails with ENOSPC.
4734716fd348SMartin Matuska 	 */
473515f0b8c3SMartin Matuska 	error = ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE,
473615f0b8c3SMartin Matuska 	    FTAG, &os);
473715f0b8c3SMartin Matuska 	if (error == 0) {
473815f0b8c3SMartin Matuska 		dmu_objset_disown(os, B_TRUE, FTAG);
473915f0b8c3SMartin Matuska 		ztest_record_enospc(FTAG);
474015f0b8c3SMartin Matuska 		goto out;
474115f0b8c3SMartin Matuska 	}
474215f0b8c3SMartin Matuska 	VERIFY3U(ENOENT, ==, error);
4743716fd348SMartin Matuska 
4744716fd348SMartin Matuska 	/*
4745716fd348SMartin Matuska 	 * Verify that we can create a new dataset.
4746716fd348SMartin Matuska 	 */
4747716fd348SMartin Matuska 	error = ztest_dataset_create(name);
4748716fd348SMartin Matuska 	if (error) {
4749716fd348SMartin Matuska 		if (error == ENOSPC) {
4750716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4751716fd348SMartin Matuska 			goto out;
4752716fd348SMartin Matuska 		}
4753716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", name, error);
4754716fd348SMartin Matuska 	}
4755716fd348SMartin Matuska 
4756716fd348SMartin Matuska 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE,
4757716fd348SMartin Matuska 	    FTAG, &os));
4758716fd348SMartin Matuska 
4759716fd348SMartin Matuska 	ztest_zd_init(zdtmp, NULL, os);
4760716fd348SMartin Matuska 
4761716fd348SMartin Matuska 	/*
4762716fd348SMartin Matuska 	 * Open the intent log for it.
4763716fd348SMartin Matuska 	 */
4764271171e0SMartin Matuska 	zilog = zil_open(os, ztest_get_data, NULL);
4765716fd348SMartin Matuska 
4766716fd348SMartin Matuska 	/*
4767716fd348SMartin Matuska 	 * Put some objects in there, do a little I/O to them,
4768716fd348SMartin Matuska 	 * and randomly take a couple of snapshots along the way.
4769716fd348SMartin Matuska 	 */
4770716fd348SMartin Matuska 	iters = ztest_random(5);
4771716fd348SMartin Matuska 	for (i = 0; i < iters; i++) {
4772716fd348SMartin Matuska 		ztest_dmu_object_alloc_free(zdtmp, id);
4773716fd348SMartin Matuska 		if (ztest_random(iters) == 0)
4774716fd348SMartin Matuska 			(void) ztest_snapshot_create(name, i);
4775716fd348SMartin Matuska 	}
4776716fd348SMartin Matuska 
4777716fd348SMartin Matuska 	/*
4778716fd348SMartin Matuska 	 * Verify that we cannot create an existing dataset.
4779716fd348SMartin Matuska 	 */
4780716fd348SMartin Matuska 	VERIFY3U(EEXIST, ==,
4781716fd348SMartin Matuska 	    dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL));
4782716fd348SMartin Matuska 
4783716fd348SMartin Matuska 	/*
4784716fd348SMartin Matuska 	 * Verify that we can hold an objset that is also owned.
4785716fd348SMartin Matuska 	 */
4786716fd348SMartin Matuska 	VERIFY0(dmu_objset_hold(name, FTAG, &os2));
4787716fd348SMartin Matuska 	dmu_objset_rele(os2, FTAG);
4788716fd348SMartin Matuska 
4789716fd348SMartin Matuska 	/*
4790716fd348SMartin Matuska 	 * Verify that we cannot own an objset that is already owned.
4791716fd348SMartin Matuska 	 */
4792716fd348SMartin Matuska 	VERIFY3U(EBUSY, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER,
4793716fd348SMartin Matuska 	    B_FALSE, B_TRUE, FTAG, &os2));
4794716fd348SMartin Matuska 
4795716fd348SMartin Matuska 	zil_close(zilog);
4796716fd348SMartin Matuska 	dmu_objset_disown(os, B_TRUE, FTAG);
4797716fd348SMartin Matuska 	ztest_zd_fini(zdtmp);
4798716fd348SMartin Matuska out:
4799716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
4800716fd348SMartin Matuska 
4801716fd348SMartin Matuska 	umem_free(zdtmp, sizeof (ztest_ds_t));
4802716fd348SMartin Matuska }
4803716fd348SMartin Matuska 
4804716fd348SMartin Matuska /*
4805716fd348SMartin Matuska  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
4806716fd348SMartin Matuska  */
4807716fd348SMartin Matuska void
4808716fd348SMartin Matuska ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
4809716fd348SMartin Matuska {
4810716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
4811716fd348SMartin Matuska 	(void) ztest_snapshot_destroy(zd->zd_name, id);
4812716fd348SMartin Matuska 	(void) ztest_snapshot_create(zd->zd_name, id);
4813716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
4814716fd348SMartin Matuska }
4815716fd348SMartin Matuska 
4816716fd348SMartin Matuska /*
4817716fd348SMartin Matuska  * Cleanup non-standard snapshots and clones.
4818716fd348SMartin Matuska  */
4819716fd348SMartin Matuska static void
4820716fd348SMartin Matuska ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
4821716fd348SMartin Matuska {
4822716fd348SMartin Matuska 	char *snap1name;
4823716fd348SMartin Matuska 	char *clone1name;
4824716fd348SMartin Matuska 	char *snap2name;
4825716fd348SMartin Matuska 	char *clone2name;
4826716fd348SMartin Matuska 	char *snap3name;
4827716fd348SMartin Matuska 	int error;
4828716fd348SMartin Matuska 
4829716fd348SMartin Matuska 	snap1name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4830716fd348SMartin Matuska 	clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4831716fd348SMartin Matuska 	snap2name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4832716fd348SMartin Matuska 	clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4833716fd348SMartin Matuska 	snap3name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4834716fd348SMartin Matuska 
4835716fd348SMartin Matuska 	(void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, "%s@s1_%"PRIu64"",
4836716fd348SMartin Matuska 	    osname, id);
4837716fd348SMartin Matuska 	(void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, "%s/c1_%"PRIu64"",
4838716fd348SMartin Matuska 	    osname, id);
4839716fd348SMartin Matuska 	(void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, "%s@s2_%"PRIu64"",
4840716fd348SMartin Matuska 	    clone1name, id);
4841716fd348SMartin Matuska 	(void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, "%s/c2_%"PRIu64"",
4842716fd348SMartin Matuska 	    osname, id);
4843716fd348SMartin Matuska 	(void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, "%s@s3_%"PRIu64"",
4844716fd348SMartin Matuska 	    clone1name, id);
4845716fd348SMartin Matuska 
4846716fd348SMartin Matuska 	error = dsl_destroy_head(clone2name);
4847716fd348SMartin Matuska 	if (error && error != ENOENT)
4848716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clone2name, error);
4849716fd348SMartin Matuska 	error = dsl_destroy_snapshot(snap3name, B_FALSE);
4850716fd348SMartin Matuska 	if (error && error != ENOENT)
4851716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
4852716fd348SMartin Matuska 		    snap3name, error);
4853716fd348SMartin Matuska 	error = dsl_destroy_snapshot(snap2name, B_FALSE);
4854716fd348SMartin Matuska 	if (error && error != ENOENT)
4855716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
4856716fd348SMartin Matuska 		    snap2name, error);
4857716fd348SMartin Matuska 	error = dsl_destroy_head(clone1name);
4858716fd348SMartin Matuska 	if (error && error != ENOENT)
4859716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clone1name, error);
4860716fd348SMartin Matuska 	error = dsl_destroy_snapshot(snap1name, B_FALSE);
4861716fd348SMartin Matuska 	if (error && error != ENOENT)
4862716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
4863716fd348SMartin Matuska 		    snap1name, error);
4864716fd348SMartin Matuska 
4865716fd348SMartin Matuska 	umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN);
4866716fd348SMartin Matuska 	umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN);
4867716fd348SMartin Matuska 	umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN);
4868716fd348SMartin Matuska 	umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN);
4869716fd348SMartin Matuska 	umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN);
4870716fd348SMartin Matuska }
4871716fd348SMartin Matuska 
4872716fd348SMartin Matuska /*
4873716fd348SMartin Matuska  * Verify dsl_dataset_promote handles EBUSY
4874716fd348SMartin Matuska  */
4875716fd348SMartin Matuska void
4876716fd348SMartin Matuska ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
4877716fd348SMartin Matuska {
4878716fd348SMartin Matuska 	objset_t *os;
4879716fd348SMartin Matuska 	char *snap1name;
4880716fd348SMartin Matuska 	char *clone1name;
4881716fd348SMartin Matuska 	char *snap2name;
4882716fd348SMartin Matuska 	char *clone2name;
4883716fd348SMartin Matuska 	char *snap3name;
4884716fd348SMartin Matuska 	char *osname = zd->zd_name;
4885716fd348SMartin Matuska 	int error;
4886716fd348SMartin Matuska 
4887716fd348SMartin Matuska 	snap1name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4888716fd348SMartin Matuska 	clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4889716fd348SMartin Matuska 	snap2name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4890716fd348SMartin Matuska 	clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4891716fd348SMartin Matuska 	snap3name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
4892716fd348SMartin Matuska 
4893716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
4894716fd348SMartin Matuska 
4895716fd348SMartin Matuska 	ztest_dsl_dataset_cleanup(osname, id);
4896716fd348SMartin Matuska 
4897716fd348SMartin Matuska 	(void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, "%s@s1_%"PRIu64"",
4898716fd348SMartin Matuska 	    osname, id);
4899716fd348SMartin Matuska 	(void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, "%s/c1_%"PRIu64"",
4900716fd348SMartin Matuska 	    osname, id);
4901716fd348SMartin Matuska 	(void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, "%s@s2_%"PRIu64"",
4902716fd348SMartin Matuska 	    clone1name, id);
4903716fd348SMartin Matuska 	(void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, "%s/c2_%"PRIu64"",
4904716fd348SMartin Matuska 	    osname, id);
4905716fd348SMartin Matuska 	(void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, "%s@s3_%"PRIu64"",
4906716fd348SMartin Matuska 	    clone1name, id);
4907716fd348SMartin Matuska 
4908716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
4909716fd348SMartin Matuska 	if (error && error != EEXIST) {
4910716fd348SMartin Matuska 		if (error == ENOSPC) {
4911716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4912716fd348SMartin Matuska 			goto out;
4913716fd348SMartin Matuska 		}
4914716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_take_snapshot(%s) = %d", snap1name, error);
4915716fd348SMartin Matuska 	}
4916716fd348SMartin Matuska 
4917716fd348SMartin Matuska 	error = dmu_objset_clone(clone1name, snap1name);
4918716fd348SMartin Matuska 	if (error) {
4919716fd348SMartin Matuska 		if (error == ENOSPC) {
4920716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4921716fd348SMartin Matuska 			goto out;
4922716fd348SMartin Matuska 		}
4923716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", clone1name, error);
4924716fd348SMartin Matuska 	}
4925716fd348SMartin Matuska 
4926716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
4927716fd348SMartin Matuska 	if (error && error != EEXIST) {
4928716fd348SMartin Matuska 		if (error == ENOSPC) {
4929716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4930716fd348SMartin Matuska 			goto out;
4931716fd348SMartin Matuska 		}
4932716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_open_snapshot(%s) = %d", snap2name, error);
4933716fd348SMartin Matuska 	}
4934716fd348SMartin Matuska 
4935716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
4936716fd348SMartin Matuska 	if (error && error != EEXIST) {
4937716fd348SMartin Matuska 		if (error == ENOSPC) {
4938716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4939716fd348SMartin Matuska 			goto out;
4940716fd348SMartin Matuska 		}
4941716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_open_snapshot(%s) = %d", snap3name, error);
4942716fd348SMartin Matuska 	}
4943716fd348SMartin Matuska 
4944716fd348SMartin Matuska 	error = dmu_objset_clone(clone2name, snap3name);
4945716fd348SMartin Matuska 	if (error) {
4946716fd348SMartin Matuska 		if (error == ENOSPC) {
4947716fd348SMartin Matuska 			ztest_record_enospc(FTAG);
4948716fd348SMartin Matuska 			goto out;
4949716fd348SMartin Matuska 		}
4950716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", clone2name, error);
4951716fd348SMartin Matuska 	}
4952716fd348SMartin Matuska 
4953716fd348SMartin Matuska 	error = ztest_dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE,
4954716fd348SMartin Matuska 	    FTAG, &os);
4955716fd348SMartin Matuska 	if (error)
4956716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_own(%s) = %d", snap2name, error);
4957716fd348SMartin Matuska 	error = dsl_dataset_promote(clone2name, NULL);
4958716fd348SMartin Matuska 	if (error == ENOSPC) {
4959716fd348SMartin Matuska 		dmu_objset_disown(os, B_TRUE, FTAG);
4960716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
4961716fd348SMartin Matuska 		goto out;
4962716fd348SMartin Matuska 	}
4963716fd348SMartin Matuska 	if (error != EBUSY)
4964716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_dataset_promote(%s), %d, not EBUSY",
4965716fd348SMartin Matuska 		    clone2name, error);
4966716fd348SMartin Matuska 	dmu_objset_disown(os, B_TRUE, FTAG);
4967716fd348SMartin Matuska 
4968716fd348SMartin Matuska out:
4969716fd348SMartin Matuska 	ztest_dsl_dataset_cleanup(osname, id);
4970716fd348SMartin Matuska 
4971716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
4972716fd348SMartin Matuska 
4973716fd348SMartin Matuska 	umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN);
4974716fd348SMartin Matuska 	umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN);
4975716fd348SMartin Matuska 	umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN);
4976716fd348SMartin Matuska 	umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN);
4977716fd348SMartin Matuska 	umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN);
4978716fd348SMartin Matuska }
4979716fd348SMartin Matuska 
4980716fd348SMartin Matuska #undef OD_ARRAY_SIZE
4981716fd348SMartin Matuska #define	OD_ARRAY_SIZE	4
4982716fd348SMartin Matuska 
4983716fd348SMartin Matuska /*
4984716fd348SMartin Matuska  * Verify that dmu_object_{alloc,free} work as expected.
4985716fd348SMartin Matuska  */
4986716fd348SMartin Matuska void
4987716fd348SMartin Matuska ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
4988716fd348SMartin Matuska {
4989716fd348SMartin Matuska 	ztest_od_t *od;
4990716fd348SMartin Matuska 	int batchsize;
4991716fd348SMartin Matuska 	int size;
4992716fd348SMartin Matuska 	int b;
4993716fd348SMartin Matuska 
4994716fd348SMartin Matuska 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
4995716fd348SMartin Matuska 	od = umem_alloc(size, UMEM_NOFAIL);
4996716fd348SMartin Matuska 	batchsize = OD_ARRAY_SIZE;
4997716fd348SMartin Matuska 
4998716fd348SMartin Matuska 	for (b = 0; b < batchsize; b++)
4999716fd348SMartin Matuska 		ztest_od_init(od + b, id, FTAG, b, DMU_OT_UINT64_OTHER,
5000716fd348SMartin Matuska 		    0, 0, 0);
5001716fd348SMartin Matuska 
5002716fd348SMartin Matuska 	/*
5003716fd348SMartin Matuska 	 * Destroy the previous batch of objects, create a new batch,
5004716fd348SMartin Matuska 	 * and do some I/O on the new objects.
5005716fd348SMartin Matuska 	 */
50062a58b312SMartin Matuska 	if (ztest_object_init(zd, od, size, B_TRUE) != 0) {
50072a58b312SMartin Matuska 		zd->zd_od = NULL;
50082a58b312SMartin Matuska 		umem_free(od, size);
5009716fd348SMartin Matuska 		return;
50102a58b312SMartin Matuska 	}
5011716fd348SMartin Matuska 
5012716fd348SMartin Matuska 	while (ztest_random(4 * batchsize) != 0)
5013716fd348SMartin Matuska 		ztest_io(zd, od[ztest_random(batchsize)].od_object,
5014716fd348SMartin Matuska 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
5015716fd348SMartin Matuska 
5016716fd348SMartin Matuska 	umem_free(od, size);
5017716fd348SMartin Matuska }
5018716fd348SMartin Matuska 
5019716fd348SMartin Matuska /*
5020716fd348SMartin Matuska  * Rewind the global allocator to verify object allocation backfilling.
5021716fd348SMartin Matuska  */
5022716fd348SMartin Matuska void
5023716fd348SMartin Matuska ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
5024716fd348SMartin Matuska {
5025716fd348SMartin Matuska 	(void) id;
5026716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5027be181ee2SMartin Matuska 	uint_t dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
5028716fd348SMartin Matuska 	uint64_t object;
5029716fd348SMartin Matuska 
5030716fd348SMartin Matuska 	/*
5031716fd348SMartin Matuska 	 * Rewind the global allocator randomly back to a lower object number
5032716fd348SMartin Matuska 	 * to force backfilling and reclamation of recently freed dnodes.
5033716fd348SMartin Matuska 	 */
5034716fd348SMartin Matuska 	mutex_enter(&os->os_obj_lock);
5035716fd348SMartin Matuska 	object = ztest_random(os->os_obj_next_chunk);
5036aca928a5SMartin Matuska 	os->os_obj_next_chunk = P2ALIGN_TYPED(object, dnodes_per_chunk,
5037aca928a5SMartin Matuska 	    uint64_t);
5038716fd348SMartin Matuska 	mutex_exit(&os->os_obj_lock);
5039716fd348SMartin Matuska }
5040716fd348SMartin Matuska 
5041716fd348SMartin Matuska #undef OD_ARRAY_SIZE
5042716fd348SMartin Matuska #define	OD_ARRAY_SIZE	2
5043716fd348SMartin Matuska 
5044716fd348SMartin Matuska /*
5045716fd348SMartin Matuska  * Verify that dmu_{read,write} work as expected.
5046716fd348SMartin Matuska  */
5047716fd348SMartin Matuska void
5048716fd348SMartin Matuska ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
5049716fd348SMartin Matuska {
5050716fd348SMartin Matuska 	int size;
5051716fd348SMartin Matuska 	ztest_od_t *od;
5052716fd348SMartin Matuska 
5053716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5054716fd348SMartin Matuska 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
5055716fd348SMartin Matuska 	od = umem_alloc(size, UMEM_NOFAIL);
5056716fd348SMartin Matuska 	dmu_tx_t *tx;
5057716fd348SMartin Matuska 	int freeit, error;
5058716fd348SMartin Matuska 	uint64_t i, n, s, txg;
5059716fd348SMartin Matuska 	bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
5060716fd348SMartin Matuska 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
5061716fd348SMartin Matuska 	uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
5062716fd348SMartin Matuska 	uint64_t regions = 997;
5063716fd348SMartin Matuska 	uint64_t stride = 123456789ULL;
5064716fd348SMartin Matuska 	uint64_t width = 40;
5065716fd348SMartin Matuska 	int free_percent = 5;
50667a7741afSMartin Matuska 	uint32_t dmu_read_flags = DMU_READ_PREFETCH;
50677a7741afSMartin Matuska 
50687a7741afSMartin Matuska 	/*
50697a7741afSMartin Matuska 	 * We will randomly set when to do O_DIRECT on a read.
50707a7741afSMartin Matuska 	 */
50717a7741afSMartin Matuska 	if (ztest_random(4) == 0)
50727a7741afSMartin Matuska 		dmu_read_flags |= DMU_DIRECTIO;
5073716fd348SMartin Matuska 
5074716fd348SMartin Matuska 	/*
5075716fd348SMartin Matuska 	 * This test uses two objects, packobj and bigobj, that are always
5076716fd348SMartin Matuska 	 * updated together (i.e. in the same tx) so that their contents are
5077716fd348SMartin Matuska 	 * in sync and can be compared.  Their contents relate to each other
5078716fd348SMartin Matuska 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
5079716fd348SMartin Matuska 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
5080716fd348SMartin Matuska 	 * for any index n, there are three bufwads that should be identical:
5081716fd348SMartin Matuska 	 *
5082716fd348SMartin Matuska 	 *	packobj, at offset n * sizeof (bufwad_t)
5083716fd348SMartin Matuska 	 *	bigobj, at the head of the nth chunk
5084716fd348SMartin Matuska 	 *	bigobj, at the tail of the nth chunk
5085716fd348SMartin Matuska 	 *
5086716fd348SMartin Matuska 	 * The chunk size is arbitrary. It doesn't have to be a power of two,
5087716fd348SMartin Matuska 	 * and it doesn't have any relation to the object blocksize.
5088716fd348SMartin Matuska 	 * The only requirement is that it can hold at least two bufwads.
5089716fd348SMartin Matuska 	 *
5090716fd348SMartin Matuska 	 * Normally, we write the bufwad to each of these locations.
5091716fd348SMartin Matuska 	 * However, free_percent of the time we instead write zeroes to
5092716fd348SMartin Matuska 	 * packobj and perform a dmu_free_range() on bigobj.  By comparing
5093716fd348SMartin Matuska 	 * bigobj to packobj, we can verify that the DMU is correctly
5094716fd348SMartin Matuska 	 * tracking which parts of an object are allocated and free,
5095716fd348SMartin Matuska 	 * and that the contents of the allocated blocks are correct.
5096716fd348SMartin Matuska 	 */
5097716fd348SMartin Matuska 
5098716fd348SMartin Matuska 	/*
5099716fd348SMartin Matuska 	 * Read the directory info.  If it's the first time, set things up.
5100716fd348SMartin Matuska 	 */
5101716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
5102716fd348SMartin Matuska 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
5103716fd348SMartin Matuska 	    chunksize);
5104716fd348SMartin Matuska 
5105716fd348SMartin Matuska 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
5106716fd348SMartin Matuska 		umem_free(od, size);
5107716fd348SMartin Matuska 		return;
5108716fd348SMartin Matuska 	}
5109716fd348SMartin Matuska 
5110716fd348SMartin Matuska 	bigobj = od[0].od_object;
5111716fd348SMartin Matuska 	packobj = od[1].od_object;
5112716fd348SMartin Matuska 	chunksize = od[0].od_gen;
5113716fd348SMartin Matuska 	ASSERT3U(chunksize, ==, od[1].od_gen);
5114716fd348SMartin Matuska 
5115716fd348SMartin Matuska 	/*
5116716fd348SMartin Matuska 	 * Prefetch a random chunk of the big object.
5117716fd348SMartin Matuska 	 * Our aim here is to get some async reads in flight
5118716fd348SMartin Matuska 	 * for blocks that we may free below; the DMU should
5119716fd348SMartin Matuska 	 * handle this race correctly.
5120716fd348SMartin Matuska 	 */
5121716fd348SMartin Matuska 	n = ztest_random(regions) * stride + ztest_random(width);
5122716fd348SMartin Matuska 	s = 1 + ztest_random(2 * width - 1);
5123716fd348SMartin Matuska 	dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
5124716fd348SMartin Matuska 	    ZIO_PRIORITY_SYNC_READ);
5125716fd348SMartin Matuska 
5126716fd348SMartin Matuska 	/*
5127716fd348SMartin Matuska 	 * Pick a random index and compute the offsets into packobj and bigobj.
5128716fd348SMartin Matuska 	 */
5129716fd348SMartin Matuska 	n = ztest_random(regions) * stride + ztest_random(width);
5130716fd348SMartin Matuska 	s = 1 + ztest_random(width - 1);
5131716fd348SMartin Matuska 
5132716fd348SMartin Matuska 	packoff = n * sizeof (bufwad_t);
5133716fd348SMartin Matuska 	packsize = s * sizeof (bufwad_t);
5134716fd348SMartin Matuska 
5135716fd348SMartin Matuska 	bigoff = n * chunksize;
5136716fd348SMartin Matuska 	bigsize = s * chunksize;
5137716fd348SMartin Matuska 
5138716fd348SMartin Matuska 	packbuf = umem_alloc(packsize, UMEM_NOFAIL);
5139716fd348SMartin Matuska 	bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
5140716fd348SMartin Matuska 
5141716fd348SMartin Matuska 	/*
5142716fd348SMartin Matuska 	 * free_percent of the time, free a range of bigobj rather than
5143716fd348SMartin Matuska 	 * overwriting it.
5144716fd348SMartin Matuska 	 */
5145716fd348SMartin Matuska 	freeit = (ztest_random(100) < free_percent);
5146716fd348SMartin Matuska 
5147716fd348SMartin Matuska 	/*
5148716fd348SMartin Matuska 	 * Read the current contents of our objects.
5149716fd348SMartin Matuska 	 */
5150716fd348SMartin Matuska 	error = dmu_read(os, packobj, packoff, packsize, packbuf,
51517a7741afSMartin Matuska 	    dmu_read_flags);
5152716fd348SMartin Matuska 	ASSERT0(error);
5153716fd348SMartin Matuska 	error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
51547a7741afSMartin Matuska 	    dmu_read_flags);
5155716fd348SMartin Matuska 	ASSERT0(error);
5156716fd348SMartin Matuska 
5157716fd348SMartin Matuska 	/*
5158716fd348SMartin Matuska 	 * Get a tx for the mods to both packobj and bigobj.
5159716fd348SMartin Matuska 	 */
5160716fd348SMartin Matuska 	tx = dmu_tx_create(os);
5161716fd348SMartin Matuska 
5162716fd348SMartin Matuska 	dmu_tx_hold_write(tx, packobj, packoff, packsize);
5163716fd348SMartin Matuska 
5164716fd348SMartin Matuska 	if (freeit)
5165716fd348SMartin Matuska 		dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
5166716fd348SMartin Matuska 	else
5167716fd348SMartin Matuska 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
5168716fd348SMartin Matuska 
5169716fd348SMartin Matuska 	/* This accounts for setting the checksum/compression. */
5170716fd348SMartin Matuska 	dmu_tx_hold_bonus(tx, bigobj);
5171716fd348SMartin Matuska 
5172716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5173716fd348SMartin Matuska 	if (txg == 0) {
5174716fd348SMartin Matuska 		umem_free(packbuf, packsize);
5175716fd348SMartin Matuska 		umem_free(bigbuf, bigsize);
5176716fd348SMartin Matuska 		umem_free(od, size);
5177716fd348SMartin Matuska 		return;
5178716fd348SMartin Matuska 	}
5179716fd348SMartin Matuska 
5180716fd348SMartin Matuska 	enum zio_checksum cksum;
5181716fd348SMartin Matuska 	do {
5182716fd348SMartin Matuska 		cksum = (enum zio_checksum)
5183716fd348SMartin Matuska 		    ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
5184716fd348SMartin Matuska 	} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
5185716fd348SMartin Matuska 	dmu_object_set_checksum(os, bigobj, cksum, tx);
5186716fd348SMartin Matuska 
5187716fd348SMartin Matuska 	enum zio_compress comp;
5188716fd348SMartin Matuska 	do {
5189716fd348SMartin Matuska 		comp = (enum zio_compress)
5190716fd348SMartin Matuska 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
5191716fd348SMartin Matuska 	} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
5192716fd348SMartin Matuska 	dmu_object_set_compress(os, bigobj, comp, tx);
5193716fd348SMartin Matuska 
5194716fd348SMartin Matuska 	/*
5195716fd348SMartin Matuska 	 * For each index from n to n + s, verify that the existing bufwad
5196716fd348SMartin Matuska 	 * in packobj matches the bufwads at the head and tail of the
5197716fd348SMartin Matuska 	 * corresponding chunk in bigobj.  Then update all three bufwads
5198716fd348SMartin Matuska 	 * with the new values we want to write out.
5199716fd348SMartin Matuska 	 */
5200716fd348SMartin Matuska 	for (i = 0; i < s; i++) {
5201716fd348SMartin Matuska 		/* LINTED */
5202716fd348SMartin Matuska 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
5203716fd348SMartin Matuska 		/* LINTED */
5204716fd348SMartin Matuska 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
5205716fd348SMartin Matuska 		/* LINTED */
5206716fd348SMartin Matuska 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
5207716fd348SMartin Matuska 
5208716fd348SMartin Matuska 		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
5209716fd348SMartin Matuska 		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
5210716fd348SMartin Matuska 
5211716fd348SMartin Matuska 		if (pack->bw_txg > txg)
5212716fd348SMartin Matuska 			fatal(B_FALSE,
5213716fd348SMartin Matuska 			    "future leak: got %"PRIx64", open txg is %"PRIx64"",
5214716fd348SMartin Matuska 			    pack->bw_txg, txg);
5215716fd348SMartin Matuska 
5216716fd348SMartin Matuska 		if (pack->bw_data != 0 && pack->bw_index != n + i)
5217716fd348SMartin Matuska 			fatal(B_FALSE, "wrong index: "
5218716fd348SMartin Matuska 			    "got %"PRIx64", wanted %"PRIx64"+%"PRIx64"",
5219716fd348SMartin Matuska 			    pack->bw_index, n, i);
5220716fd348SMartin Matuska 
5221716fd348SMartin Matuska 		if (memcmp(pack, bigH, sizeof (bufwad_t)) != 0)
5222716fd348SMartin Matuska 			fatal(B_FALSE, "pack/bigH mismatch in %p/%p",
5223716fd348SMartin Matuska 			    pack, bigH);
5224716fd348SMartin Matuska 
5225716fd348SMartin Matuska 		if (memcmp(pack, bigT, sizeof (bufwad_t)) != 0)
5226716fd348SMartin Matuska 			fatal(B_FALSE, "pack/bigT mismatch in %p/%p",
5227716fd348SMartin Matuska 			    pack, bigT);
5228716fd348SMartin Matuska 
5229716fd348SMartin Matuska 		if (freeit) {
5230716fd348SMartin Matuska 			memset(pack, 0, sizeof (bufwad_t));
5231716fd348SMartin Matuska 		} else {
5232716fd348SMartin Matuska 			pack->bw_index = n + i;
5233716fd348SMartin Matuska 			pack->bw_txg = txg;
5234716fd348SMartin Matuska 			pack->bw_data = 1 + ztest_random(-2ULL);
5235716fd348SMartin Matuska 		}
5236716fd348SMartin Matuska 		*bigH = *pack;
5237716fd348SMartin Matuska 		*bigT = *pack;
5238716fd348SMartin Matuska 	}
5239716fd348SMartin Matuska 
5240716fd348SMartin Matuska 	/*
5241716fd348SMartin Matuska 	 * We've verified all the old bufwads, and made new ones.
5242716fd348SMartin Matuska 	 * Now write them out.
5243716fd348SMartin Matuska 	 */
5244716fd348SMartin Matuska 	dmu_write(os, packobj, packoff, packsize, packbuf, tx);
5245716fd348SMartin Matuska 
5246716fd348SMartin Matuska 	if (freeit) {
5247716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 7) {
5248716fd348SMartin Matuska 			(void) printf("freeing offset %"PRIx64" size %"PRIx64""
5249716fd348SMartin Matuska 			    " txg %"PRIx64"\n",
5250716fd348SMartin Matuska 			    bigoff, bigsize, txg);
5251716fd348SMartin Matuska 		}
5252716fd348SMartin Matuska 		VERIFY0(dmu_free_range(os, bigobj, bigoff, bigsize, tx));
5253716fd348SMartin Matuska 	} else {
5254716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 7) {
5255716fd348SMartin Matuska 			(void) printf("writing offset %"PRIx64" size %"PRIx64""
5256716fd348SMartin Matuska 			    " txg %"PRIx64"\n",
5257716fd348SMartin Matuska 			    bigoff, bigsize, txg);
5258716fd348SMartin Matuska 		}
5259716fd348SMartin Matuska 		dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
5260716fd348SMartin Matuska 	}
5261716fd348SMartin Matuska 
5262716fd348SMartin Matuska 	dmu_tx_commit(tx);
5263716fd348SMartin Matuska 
5264716fd348SMartin Matuska 	/*
5265716fd348SMartin Matuska 	 * Sanity check the stuff we just wrote.
5266716fd348SMartin Matuska 	 */
5267716fd348SMartin Matuska 	{
5268716fd348SMartin Matuska 		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
5269716fd348SMartin Matuska 		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
5270716fd348SMartin Matuska 
5271716fd348SMartin Matuska 		VERIFY0(dmu_read(os, packobj, packoff,
52727a7741afSMartin Matuska 		    packsize, packcheck, dmu_read_flags));
5273716fd348SMartin Matuska 		VERIFY0(dmu_read(os, bigobj, bigoff,
52747a7741afSMartin Matuska 		    bigsize, bigcheck, dmu_read_flags));
5275716fd348SMartin Matuska 
5276716fd348SMartin Matuska 		ASSERT0(memcmp(packbuf, packcheck, packsize));
5277716fd348SMartin Matuska 		ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
5278716fd348SMartin Matuska 
5279716fd348SMartin Matuska 		umem_free(packcheck, packsize);
5280716fd348SMartin Matuska 		umem_free(bigcheck, bigsize);
5281716fd348SMartin Matuska 	}
5282716fd348SMartin Matuska 
5283716fd348SMartin Matuska 	umem_free(packbuf, packsize);
5284716fd348SMartin Matuska 	umem_free(bigbuf, bigsize);
5285716fd348SMartin Matuska 	umem_free(od, size);
5286716fd348SMartin Matuska }
5287716fd348SMartin Matuska 
5288716fd348SMartin Matuska static void
5289716fd348SMartin Matuska compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
5290716fd348SMartin Matuska     uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
5291716fd348SMartin Matuska {
5292716fd348SMartin Matuska 	uint64_t i;
5293716fd348SMartin Matuska 	bufwad_t *pack;
5294716fd348SMartin Matuska 	bufwad_t *bigH;
5295716fd348SMartin Matuska 	bufwad_t *bigT;
5296716fd348SMartin Matuska 
5297716fd348SMartin Matuska 	/*
5298716fd348SMartin Matuska 	 * For each index from n to n + s, verify that the existing bufwad
5299716fd348SMartin Matuska 	 * in packobj matches the bufwads at the head and tail of the
5300716fd348SMartin Matuska 	 * corresponding chunk in bigobj.  Then update all three bufwads
5301716fd348SMartin Matuska 	 * with the new values we want to write out.
5302716fd348SMartin Matuska 	 */
5303716fd348SMartin Matuska 	for (i = 0; i < s; i++) {
5304716fd348SMartin Matuska 		/* LINTED */
5305716fd348SMartin Matuska 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
5306716fd348SMartin Matuska 		/* LINTED */
5307716fd348SMartin Matuska 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
5308716fd348SMartin Matuska 		/* LINTED */
5309716fd348SMartin Matuska 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
5310716fd348SMartin Matuska 
5311716fd348SMartin Matuska 		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
5312716fd348SMartin Matuska 		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
5313716fd348SMartin Matuska 
5314716fd348SMartin Matuska 		if (pack->bw_txg > txg)
5315716fd348SMartin Matuska 			fatal(B_FALSE,
5316716fd348SMartin Matuska 			    "future leak: got %"PRIx64", open txg is %"PRIx64"",
5317716fd348SMartin Matuska 			    pack->bw_txg, txg);
5318716fd348SMartin Matuska 
5319716fd348SMartin Matuska 		if (pack->bw_data != 0 && pack->bw_index != n + i)
5320716fd348SMartin Matuska 			fatal(B_FALSE, "wrong index: "
5321716fd348SMartin Matuska 			    "got %"PRIx64", wanted %"PRIx64"+%"PRIx64"",
5322716fd348SMartin Matuska 			    pack->bw_index, n, i);
5323716fd348SMartin Matuska 
5324716fd348SMartin Matuska 		if (memcmp(pack, bigH, sizeof (bufwad_t)) != 0)
5325716fd348SMartin Matuska 			fatal(B_FALSE, "pack/bigH mismatch in %p/%p",
5326716fd348SMartin Matuska 			    pack, bigH);
5327716fd348SMartin Matuska 
5328716fd348SMartin Matuska 		if (memcmp(pack, bigT, sizeof (bufwad_t)) != 0)
5329716fd348SMartin Matuska 			fatal(B_FALSE, "pack/bigT mismatch in %p/%p",
5330716fd348SMartin Matuska 			    pack, bigT);
5331716fd348SMartin Matuska 
5332716fd348SMartin Matuska 		pack->bw_index = n + i;
5333716fd348SMartin Matuska 		pack->bw_txg = txg;
5334716fd348SMartin Matuska 		pack->bw_data = 1 + ztest_random(-2ULL);
5335716fd348SMartin Matuska 
5336716fd348SMartin Matuska 		*bigH = *pack;
5337716fd348SMartin Matuska 		*bigT = *pack;
5338716fd348SMartin Matuska 	}
5339716fd348SMartin Matuska }
5340716fd348SMartin Matuska 
5341716fd348SMartin Matuska #undef OD_ARRAY_SIZE
5342716fd348SMartin Matuska #define	OD_ARRAY_SIZE	2
5343716fd348SMartin Matuska 
5344716fd348SMartin Matuska void
5345716fd348SMartin Matuska ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
5346716fd348SMartin Matuska {
5347716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5348716fd348SMartin Matuska 	ztest_od_t *od;
5349716fd348SMartin Matuska 	dmu_tx_t *tx;
5350716fd348SMartin Matuska 	uint64_t i;
5351716fd348SMartin Matuska 	int error;
5352716fd348SMartin Matuska 	int size;
5353716fd348SMartin Matuska 	uint64_t n, s, txg;
5354716fd348SMartin Matuska 	bufwad_t *packbuf, *bigbuf;
5355716fd348SMartin Matuska 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
5356716fd348SMartin Matuska 	uint64_t blocksize = ztest_random_blocksize();
5357716fd348SMartin Matuska 	uint64_t chunksize = blocksize;
5358716fd348SMartin Matuska 	uint64_t regions = 997;
5359716fd348SMartin Matuska 	uint64_t stride = 123456789ULL;
5360716fd348SMartin Matuska 	uint64_t width = 9;
5361716fd348SMartin Matuska 	dmu_buf_t *bonus_db;
5362716fd348SMartin Matuska 	arc_buf_t **bigbuf_arcbufs;
5363716fd348SMartin Matuska 	dmu_object_info_t doi;
53647a7741afSMartin Matuska 	uint32_t dmu_read_flags = DMU_READ_PREFETCH;
53657a7741afSMartin Matuska 
53667a7741afSMartin Matuska 	/*
53677a7741afSMartin Matuska 	 * We will randomly set when to do O_DIRECT on a read.
53687a7741afSMartin Matuska 	 */
53697a7741afSMartin Matuska 	if (ztest_random(4) == 0)
53707a7741afSMartin Matuska 		dmu_read_flags |= DMU_DIRECTIO;
5371716fd348SMartin Matuska 
5372716fd348SMartin Matuska 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
5373716fd348SMartin Matuska 	od = umem_alloc(size, UMEM_NOFAIL);
5374716fd348SMartin Matuska 
5375716fd348SMartin Matuska 	/*
5376716fd348SMartin Matuska 	 * This test uses two objects, packobj and bigobj, that are always
5377716fd348SMartin Matuska 	 * updated together (i.e. in the same tx) so that their contents are
5378716fd348SMartin Matuska 	 * in sync and can be compared.  Their contents relate to each other
5379716fd348SMartin Matuska 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
5380716fd348SMartin Matuska 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
5381716fd348SMartin Matuska 	 * for any index n, there are three bufwads that should be identical:
5382716fd348SMartin Matuska 	 *
5383716fd348SMartin Matuska 	 *	packobj, at offset n * sizeof (bufwad_t)
5384716fd348SMartin Matuska 	 *	bigobj, at the head of the nth chunk
5385716fd348SMartin Matuska 	 *	bigobj, at the tail of the nth chunk
5386716fd348SMartin Matuska 	 *
5387716fd348SMartin Matuska 	 * The chunk size is set equal to bigobj block size so that
5388716fd348SMartin Matuska 	 * dmu_assign_arcbuf_by_dbuf() can be tested for object updates.
5389716fd348SMartin Matuska 	 */
5390716fd348SMartin Matuska 
5391716fd348SMartin Matuska 	/*
5392716fd348SMartin Matuska 	 * Read the directory info.  If it's the first time, set things up.
5393716fd348SMartin Matuska 	 */
5394716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
5395716fd348SMartin Matuska 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
5396716fd348SMartin Matuska 	    chunksize);
5397716fd348SMartin Matuska 
5398716fd348SMartin Matuska 
5399716fd348SMartin Matuska 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
5400716fd348SMartin Matuska 		umem_free(od, size);
5401716fd348SMartin Matuska 		return;
5402716fd348SMartin Matuska 	}
5403716fd348SMartin Matuska 
5404716fd348SMartin Matuska 	bigobj = od[0].od_object;
5405716fd348SMartin Matuska 	packobj = od[1].od_object;
5406716fd348SMartin Matuska 	blocksize = od[0].od_blocksize;
5407716fd348SMartin Matuska 	chunksize = blocksize;
5408716fd348SMartin Matuska 	ASSERT3U(chunksize, ==, od[1].od_gen);
5409716fd348SMartin Matuska 
5410716fd348SMartin Matuska 	VERIFY0(dmu_object_info(os, bigobj, &doi));
5411716fd348SMartin Matuska 	VERIFY(ISP2(doi.doi_data_block_size));
5412716fd348SMartin Matuska 	VERIFY3U(chunksize, ==, doi.doi_data_block_size);
5413716fd348SMartin Matuska 	VERIFY3U(chunksize, >=, 2 * sizeof (bufwad_t));
5414716fd348SMartin Matuska 
5415716fd348SMartin Matuska 	/*
5416716fd348SMartin Matuska 	 * Pick a random index and compute the offsets into packobj and bigobj.
5417716fd348SMartin Matuska 	 */
5418716fd348SMartin Matuska 	n = ztest_random(regions) * stride + ztest_random(width);
5419716fd348SMartin Matuska 	s = 1 + ztest_random(width - 1);
5420716fd348SMartin Matuska 
5421716fd348SMartin Matuska 	packoff = n * sizeof (bufwad_t);
5422716fd348SMartin Matuska 	packsize = s * sizeof (bufwad_t);
5423716fd348SMartin Matuska 
5424716fd348SMartin Matuska 	bigoff = n * chunksize;
5425716fd348SMartin Matuska 	bigsize = s * chunksize;
5426716fd348SMartin Matuska 
5427716fd348SMartin Matuska 	packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
5428716fd348SMartin Matuska 	bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
5429716fd348SMartin Matuska 
5430716fd348SMartin Matuska 	VERIFY0(dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
5431716fd348SMartin Matuska 
5432716fd348SMartin Matuska 	bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
5433716fd348SMartin Matuska 
5434716fd348SMartin Matuska 	/*
5435716fd348SMartin Matuska 	 * Iteration 0 test zcopy for DB_UNCACHED dbufs.
5436716fd348SMartin Matuska 	 * Iteration 1 test zcopy to already referenced dbufs.
5437716fd348SMartin Matuska 	 * Iteration 2 test zcopy to dirty dbuf in the same txg.
5438716fd348SMartin Matuska 	 * Iteration 3 test zcopy to dbuf dirty in previous txg.
5439716fd348SMartin Matuska 	 * Iteration 4 test zcopy when dbuf is no longer dirty.
5440716fd348SMartin Matuska 	 * Iteration 5 test zcopy when it can't be done.
5441716fd348SMartin Matuska 	 * Iteration 6 one more zcopy write.
5442716fd348SMartin Matuska 	 */
5443716fd348SMartin Matuska 	for (i = 0; i < 7; i++) {
5444716fd348SMartin Matuska 		uint64_t j;
5445716fd348SMartin Matuska 		uint64_t off;
5446716fd348SMartin Matuska 
5447716fd348SMartin Matuska 		/*
5448716fd348SMartin Matuska 		 * In iteration 5 (i == 5) use arcbufs
5449716fd348SMartin Matuska 		 * that don't match bigobj blksz to test
5450716fd348SMartin Matuska 		 * dmu_assign_arcbuf_by_dbuf() when it can't directly
5451716fd348SMartin Matuska 		 * assign an arcbuf to a dbuf.
5452716fd348SMartin Matuska 		 */
5453716fd348SMartin Matuska 		for (j = 0; j < s; j++) {
5454716fd348SMartin Matuska 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
5455716fd348SMartin Matuska 				bigbuf_arcbufs[j] =
5456716fd348SMartin Matuska 				    dmu_request_arcbuf(bonus_db, chunksize);
5457716fd348SMartin Matuska 			} else {
5458716fd348SMartin Matuska 				bigbuf_arcbufs[2 * j] =
5459716fd348SMartin Matuska 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
5460716fd348SMartin Matuska 				bigbuf_arcbufs[2 * j + 1] =
5461716fd348SMartin Matuska 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
5462716fd348SMartin Matuska 			}
5463716fd348SMartin Matuska 		}
5464716fd348SMartin Matuska 
5465716fd348SMartin Matuska 		/*
5466716fd348SMartin Matuska 		 * Get a tx for the mods to both packobj and bigobj.
5467716fd348SMartin Matuska 		 */
5468716fd348SMartin Matuska 		tx = dmu_tx_create(os);
5469716fd348SMartin Matuska 
5470716fd348SMartin Matuska 		dmu_tx_hold_write(tx, packobj, packoff, packsize);
5471716fd348SMartin Matuska 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
5472716fd348SMartin Matuska 
5473716fd348SMartin Matuska 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5474716fd348SMartin Matuska 		if (txg == 0) {
5475716fd348SMartin Matuska 			umem_free(packbuf, packsize);
5476716fd348SMartin Matuska 			umem_free(bigbuf, bigsize);
5477716fd348SMartin Matuska 			for (j = 0; j < s; j++) {
5478716fd348SMartin Matuska 				if (i != 5 ||
5479716fd348SMartin Matuska 				    chunksize < (SPA_MINBLOCKSIZE * 2)) {
5480716fd348SMartin Matuska 					dmu_return_arcbuf(bigbuf_arcbufs[j]);
5481716fd348SMartin Matuska 				} else {
5482716fd348SMartin Matuska 					dmu_return_arcbuf(
5483716fd348SMartin Matuska 					    bigbuf_arcbufs[2 * j]);
5484716fd348SMartin Matuska 					dmu_return_arcbuf(
5485716fd348SMartin Matuska 					    bigbuf_arcbufs[2 * j + 1]);
5486716fd348SMartin Matuska 				}
5487716fd348SMartin Matuska 			}
5488716fd348SMartin Matuska 			umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
5489716fd348SMartin Matuska 			umem_free(od, size);
5490716fd348SMartin Matuska 			dmu_buf_rele(bonus_db, FTAG);
5491716fd348SMartin Matuska 			return;
5492716fd348SMartin Matuska 		}
5493716fd348SMartin Matuska 
5494716fd348SMartin Matuska 		/*
5495716fd348SMartin Matuska 		 * 50% of the time don't read objects in the 1st iteration to
5496716fd348SMartin Matuska 		 * test dmu_assign_arcbuf_by_dbuf() for the case when there are
5497716fd348SMartin Matuska 		 * no existing dbufs for the specified offsets.
5498716fd348SMartin Matuska 		 */
5499716fd348SMartin Matuska 		if (i != 0 || ztest_random(2) != 0) {
5500716fd348SMartin Matuska 			error = dmu_read(os, packobj, packoff,
55017a7741afSMartin Matuska 			    packsize, packbuf, dmu_read_flags);
5502716fd348SMartin Matuska 			ASSERT0(error);
5503716fd348SMartin Matuska 			error = dmu_read(os, bigobj, bigoff, bigsize,
55047a7741afSMartin Matuska 			    bigbuf, dmu_read_flags);
5505716fd348SMartin Matuska 			ASSERT0(error);
5506716fd348SMartin Matuska 		}
5507716fd348SMartin Matuska 		compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
5508716fd348SMartin Matuska 		    n, chunksize, txg);
5509716fd348SMartin Matuska 
5510716fd348SMartin Matuska 		/*
5511716fd348SMartin Matuska 		 * We've verified all the old bufwads, and made new ones.
5512716fd348SMartin Matuska 		 * Now write them out.
5513716fd348SMartin Matuska 		 */
5514716fd348SMartin Matuska 		dmu_write(os, packobj, packoff, packsize, packbuf, tx);
5515716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 7) {
5516716fd348SMartin Matuska 			(void) printf("writing offset %"PRIx64" size %"PRIx64""
5517716fd348SMartin Matuska 			    " txg %"PRIx64"\n",
5518716fd348SMartin Matuska 			    bigoff, bigsize, txg);
5519716fd348SMartin Matuska 		}
5520716fd348SMartin Matuska 		for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
5521716fd348SMartin Matuska 			dmu_buf_t *dbt;
5522716fd348SMartin Matuska 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
5523716fd348SMartin Matuska 				memcpy(bigbuf_arcbufs[j]->b_data,
5524716fd348SMartin Matuska 				    (caddr_t)bigbuf + (off - bigoff),
5525716fd348SMartin Matuska 				    chunksize);
5526716fd348SMartin Matuska 			} else {
5527716fd348SMartin Matuska 				memcpy(bigbuf_arcbufs[2 * j]->b_data,
5528716fd348SMartin Matuska 				    (caddr_t)bigbuf + (off - bigoff),
5529716fd348SMartin Matuska 				    chunksize / 2);
5530716fd348SMartin Matuska 				memcpy(bigbuf_arcbufs[2 * j + 1]->b_data,
5531716fd348SMartin Matuska 				    (caddr_t)bigbuf + (off - bigoff) +
5532716fd348SMartin Matuska 				    chunksize / 2,
5533716fd348SMartin Matuska 				    chunksize / 2);
5534716fd348SMartin Matuska 			}
5535716fd348SMartin Matuska 
5536716fd348SMartin Matuska 			if (i == 1) {
5537716fd348SMartin Matuska 				VERIFY(dmu_buf_hold(os, bigobj, off,
5538716fd348SMartin Matuska 				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
5539716fd348SMartin Matuska 			}
5540716fd348SMartin Matuska 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
5541716fd348SMartin Matuska 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
5542716fd348SMartin Matuska 				    off, bigbuf_arcbufs[j], tx));
5543716fd348SMartin Matuska 			} else {
5544716fd348SMartin Matuska 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
5545716fd348SMartin Matuska 				    off, bigbuf_arcbufs[2 * j], tx));
5546716fd348SMartin Matuska 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
5547716fd348SMartin Matuska 				    off + chunksize / 2,
5548716fd348SMartin Matuska 				    bigbuf_arcbufs[2 * j + 1], tx));
5549716fd348SMartin Matuska 			}
5550716fd348SMartin Matuska 			if (i == 1) {
5551716fd348SMartin Matuska 				dmu_buf_rele(dbt, FTAG);
5552716fd348SMartin Matuska 			}
5553716fd348SMartin Matuska 		}
5554716fd348SMartin Matuska 		dmu_tx_commit(tx);
5555716fd348SMartin Matuska 
5556716fd348SMartin Matuska 		/*
5557716fd348SMartin Matuska 		 * Sanity check the stuff we just wrote.
5558716fd348SMartin Matuska 		 */
5559716fd348SMartin Matuska 		{
5560716fd348SMartin Matuska 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
5561716fd348SMartin Matuska 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
5562716fd348SMartin Matuska 
5563716fd348SMartin Matuska 			VERIFY0(dmu_read(os, packobj, packoff,
55647a7741afSMartin Matuska 			    packsize, packcheck, dmu_read_flags));
5565716fd348SMartin Matuska 			VERIFY0(dmu_read(os, bigobj, bigoff,
55667a7741afSMartin Matuska 			    bigsize, bigcheck, dmu_read_flags));
5567716fd348SMartin Matuska 
5568716fd348SMartin Matuska 			ASSERT0(memcmp(packbuf, packcheck, packsize));
5569716fd348SMartin Matuska 			ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
5570716fd348SMartin Matuska 
5571716fd348SMartin Matuska 			umem_free(packcheck, packsize);
5572716fd348SMartin Matuska 			umem_free(bigcheck, bigsize);
5573716fd348SMartin Matuska 		}
5574716fd348SMartin Matuska 		if (i == 2) {
5575716fd348SMartin Matuska 			txg_wait_open(dmu_objset_pool(os), 0, B_TRUE);
5576716fd348SMartin Matuska 		} else if (i == 3) {
5577716fd348SMartin Matuska 			txg_wait_synced(dmu_objset_pool(os), 0);
5578716fd348SMartin Matuska 		}
5579716fd348SMartin Matuska 	}
5580716fd348SMartin Matuska 
5581716fd348SMartin Matuska 	dmu_buf_rele(bonus_db, FTAG);
5582716fd348SMartin Matuska 	umem_free(packbuf, packsize);
5583716fd348SMartin Matuska 	umem_free(bigbuf, bigsize);
5584716fd348SMartin Matuska 	umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
5585716fd348SMartin Matuska 	umem_free(od, size);
5586716fd348SMartin Matuska }
5587716fd348SMartin Matuska 
5588716fd348SMartin Matuska void
5589716fd348SMartin Matuska ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
5590716fd348SMartin Matuska {
5591716fd348SMartin Matuska 	(void) id;
5592716fd348SMartin Matuska 	ztest_od_t *od;
5593716fd348SMartin Matuska 
5594716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
5595716fd348SMartin Matuska 	uint64_t offset = (1ULL << (ztest_random(20) + 43)) +
5596716fd348SMartin Matuska 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
5597716fd348SMartin Matuska 
5598716fd348SMartin Matuska 	/*
5599716fd348SMartin Matuska 	 * Have multiple threads write to large offsets in an object
5600716fd348SMartin Matuska 	 * to verify that parallel writes to an object -- even to the
5601716fd348SMartin Matuska 	 * same blocks within the object -- doesn't cause any trouble.
5602716fd348SMartin Matuska 	 */
5603716fd348SMartin Matuska 	ztest_od_init(od, ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
5604716fd348SMartin Matuska 
5605716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0)
5606716fd348SMartin Matuska 		return;
5607716fd348SMartin Matuska 
5608716fd348SMartin Matuska 	while (ztest_random(10) != 0)
5609716fd348SMartin Matuska 		ztest_io(zd, od->od_object, offset);
5610716fd348SMartin Matuska 
5611716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
5612716fd348SMartin Matuska }
5613716fd348SMartin Matuska 
5614716fd348SMartin Matuska void
5615716fd348SMartin Matuska ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
5616716fd348SMartin Matuska {
5617716fd348SMartin Matuska 	ztest_od_t *od;
5618716fd348SMartin Matuska 	uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) +
5619716fd348SMartin Matuska 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
5620716fd348SMartin Matuska 	uint64_t count = ztest_random(20) + 1;
5621716fd348SMartin Matuska 	uint64_t blocksize = ztest_random_blocksize();
5622716fd348SMartin Matuska 	void *data;
5623716fd348SMartin Matuska 
5624716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
5625716fd348SMartin Matuska 
5626716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
5627716fd348SMartin Matuska 
5628716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
5629716fd348SMartin Matuska 	    !ztest_random(2)) != 0) {
5630716fd348SMartin Matuska 		umem_free(od, sizeof (ztest_od_t));
5631716fd348SMartin Matuska 		return;
5632716fd348SMartin Matuska 	}
5633716fd348SMartin Matuska 
5634716fd348SMartin Matuska 	if (ztest_truncate(zd, od->od_object, offset, count * blocksize) != 0) {
5635716fd348SMartin Matuska 		umem_free(od, sizeof (ztest_od_t));
5636716fd348SMartin Matuska 		return;
5637716fd348SMartin Matuska 	}
5638716fd348SMartin Matuska 
5639716fd348SMartin Matuska 	ztest_prealloc(zd, od->od_object, offset, count * blocksize);
5640716fd348SMartin Matuska 
5641716fd348SMartin Matuska 	data = umem_zalloc(blocksize, UMEM_NOFAIL);
5642716fd348SMartin Matuska 
5643716fd348SMartin Matuska 	while (ztest_random(count) != 0) {
5644716fd348SMartin Matuska 		uint64_t randoff = offset + (ztest_random(count) * blocksize);
5645716fd348SMartin Matuska 		if (ztest_write(zd, od->od_object, randoff, blocksize,
5646716fd348SMartin Matuska 		    data) != 0)
5647716fd348SMartin Matuska 			break;
5648716fd348SMartin Matuska 		while (ztest_random(4) != 0)
5649716fd348SMartin Matuska 			ztest_io(zd, od->od_object, randoff);
5650716fd348SMartin Matuska 	}
5651716fd348SMartin Matuska 
5652716fd348SMartin Matuska 	umem_free(data, blocksize);
5653716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
5654716fd348SMartin Matuska }
5655716fd348SMartin Matuska 
5656716fd348SMartin Matuska /*
5657716fd348SMartin Matuska  * Verify that zap_{create,destroy,add,remove,update} work as expected.
5658716fd348SMartin Matuska  */
5659716fd348SMartin Matuska #define	ZTEST_ZAP_MIN_INTS	1
5660716fd348SMartin Matuska #define	ZTEST_ZAP_MAX_INTS	4
5661716fd348SMartin Matuska #define	ZTEST_ZAP_MAX_PROPS	1000
5662716fd348SMartin Matuska 
5663716fd348SMartin Matuska void
5664716fd348SMartin Matuska ztest_zap(ztest_ds_t *zd, uint64_t id)
5665716fd348SMartin Matuska {
5666716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5667716fd348SMartin Matuska 	ztest_od_t *od;
5668716fd348SMartin Matuska 	uint64_t object;
5669716fd348SMartin Matuska 	uint64_t txg, last_txg;
5670716fd348SMartin Matuska 	uint64_t value[ZTEST_ZAP_MAX_INTS];
5671716fd348SMartin Matuska 	uint64_t zl_ints, zl_intsize, prop;
5672716fd348SMartin Matuska 	int i, ints;
5673716fd348SMartin Matuska 	dmu_tx_t *tx;
5674716fd348SMartin Matuska 	char propname[100], txgname[100];
5675716fd348SMartin Matuska 	int error;
5676a0b956f5SMartin Matuska 	const char *const hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
5677716fd348SMartin Matuska 
5678716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
5679716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
5680716fd348SMartin Matuska 
5681716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
5682716fd348SMartin Matuska 	    !ztest_random(2)) != 0)
5683716fd348SMartin Matuska 		goto out;
5684716fd348SMartin Matuska 
5685716fd348SMartin Matuska 	object = od->od_object;
5686716fd348SMartin Matuska 
5687716fd348SMartin Matuska 	/*
5688716fd348SMartin Matuska 	 * Generate a known hash collision, and verify that
5689716fd348SMartin Matuska 	 * we can lookup and remove both entries.
5690716fd348SMartin Matuska 	 */
5691716fd348SMartin Matuska 	tx = dmu_tx_create(os);
5692716fd348SMartin Matuska 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
5693716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5694716fd348SMartin Matuska 	if (txg == 0)
5695716fd348SMartin Matuska 		goto out;
5696716fd348SMartin Matuska 	for (i = 0; i < 2; i++) {
5697716fd348SMartin Matuska 		value[i] = i;
5698716fd348SMartin Matuska 		VERIFY0(zap_add(os, object, hc[i], sizeof (uint64_t),
5699716fd348SMartin Matuska 		    1, &value[i], tx));
5700716fd348SMartin Matuska 	}
5701716fd348SMartin Matuska 	for (i = 0; i < 2; i++) {
5702716fd348SMartin Matuska 		VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
5703716fd348SMartin Matuska 		    sizeof (uint64_t), 1, &value[i], tx));
5704716fd348SMartin Matuska 		VERIFY0(
5705716fd348SMartin Matuska 		    zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
5706716fd348SMartin Matuska 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
5707716fd348SMartin Matuska 		ASSERT3U(zl_ints, ==, 1);
5708716fd348SMartin Matuska 	}
5709716fd348SMartin Matuska 	for (i = 0; i < 2; i++) {
5710716fd348SMartin Matuska 		VERIFY0(zap_remove(os, object, hc[i], tx));
5711716fd348SMartin Matuska 	}
5712716fd348SMartin Matuska 	dmu_tx_commit(tx);
5713716fd348SMartin Matuska 
5714716fd348SMartin Matuska 	/*
5715716fd348SMartin Matuska 	 * Generate a bunch of random entries.
5716716fd348SMartin Matuska 	 */
5717716fd348SMartin Matuska 	ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
5718716fd348SMartin Matuska 
5719716fd348SMartin Matuska 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
5720716fd348SMartin Matuska 	(void) sprintf(propname, "prop_%"PRIu64"", prop);
5721716fd348SMartin Matuska 	(void) sprintf(txgname, "txg_%"PRIu64"", prop);
5722716fd348SMartin Matuska 	memset(value, 0, sizeof (value));
5723716fd348SMartin Matuska 	last_txg = 0;
5724716fd348SMartin Matuska 
5725716fd348SMartin Matuska 	/*
5726716fd348SMartin Matuska 	 * If these zap entries already exist, validate their contents.
5727716fd348SMartin Matuska 	 */
5728716fd348SMartin Matuska 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
5729716fd348SMartin Matuska 	if (error == 0) {
5730716fd348SMartin Matuska 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
5731716fd348SMartin Matuska 		ASSERT3U(zl_ints, ==, 1);
5732716fd348SMartin Matuska 
5733716fd348SMartin Matuska 		VERIFY0(zap_lookup(os, object, txgname, zl_intsize,
5734716fd348SMartin Matuska 		    zl_ints, &last_txg));
5735716fd348SMartin Matuska 
5736716fd348SMartin Matuska 		VERIFY0(zap_length(os, object, propname, &zl_intsize,
5737716fd348SMartin Matuska 		    &zl_ints));
5738716fd348SMartin Matuska 
5739716fd348SMartin Matuska 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
5740716fd348SMartin Matuska 		ASSERT3U(zl_ints, ==, ints);
5741716fd348SMartin Matuska 
5742716fd348SMartin Matuska 		VERIFY0(zap_lookup(os, object, propname, zl_intsize,
5743716fd348SMartin Matuska 		    zl_ints, value));
5744716fd348SMartin Matuska 
5745716fd348SMartin Matuska 		for (i = 0; i < ints; i++) {
5746716fd348SMartin Matuska 			ASSERT3U(value[i], ==, last_txg + object + i);
5747716fd348SMartin Matuska 		}
5748716fd348SMartin Matuska 	} else {
5749716fd348SMartin Matuska 		ASSERT3U(error, ==, ENOENT);
5750716fd348SMartin Matuska 	}
5751716fd348SMartin Matuska 
5752716fd348SMartin Matuska 	/*
5753716fd348SMartin Matuska 	 * Atomically update two entries in our zap object.
5754716fd348SMartin Matuska 	 * The first is named txg_%llu, and contains the txg
5755716fd348SMartin Matuska 	 * in which the property was last updated.  The second
5756716fd348SMartin Matuska 	 * is named prop_%llu, and the nth element of its value
5757716fd348SMartin Matuska 	 * should be txg + object + n.
5758716fd348SMartin Matuska 	 */
5759716fd348SMartin Matuska 	tx = dmu_tx_create(os);
5760716fd348SMartin Matuska 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
5761716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5762716fd348SMartin Matuska 	if (txg == 0)
5763716fd348SMartin Matuska 		goto out;
5764716fd348SMartin Matuska 
5765716fd348SMartin Matuska 	if (last_txg > txg)
5766716fd348SMartin Matuska 		fatal(B_FALSE, "zap future leak: old %"PRIu64" new %"PRIu64"",
5767716fd348SMartin Matuska 		    last_txg, txg);
5768716fd348SMartin Matuska 
5769716fd348SMartin Matuska 	for (i = 0; i < ints; i++)
5770716fd348SMartin Matuska 		value[i] = txg + object + i;
5771716fd348SMartin Matuska 
5772716fd348SMartin Matuska 	VERIFY0(zap_update(os, object, txgname, sizeof (uint64_t),
5773716fd348SMartin Matuska 	    1, &txg, tx));
5774716fd348SMartin Matuska 	VERIFY0(zap_update(os, object, propname, sizeof (uint64_t),
5775716fd348SMartin Matuska 	    ints, value, tx));
5776716fd348SMartin Matuska 
5777716fd348SMartin Matuska 	dmu_tx_commit(tx);
5778716fd348SMartin Matuska 
5779716fd348SMartin Matuska 	/*
5780716fd348SMartin Matuska 	 * Remove a random pair of entries.
5781716fd348SMartin Matuska 	 */
5782716fd348SMartin Matuska 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
5783716fd348SMartin Matuska 	(void) sprintf(propname, "prop_%"PRIu64"", prop);
5784716fd348SMartin Matuska 	(void) sprintf(txgname, "txg_%"PRIu64"", prop);
5785716fd348SMartin Matuska 
5786716fd348SMartin Matuska 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
5787716fd348SMartin Matuska 
5788716fd348SMartin Matuska 	if (error == ENOENT)
5789716fd348SMartin Matuska 		goto out;
5790716fd348SMartin Matuska 
5791716fd348SMartin Matuska 	ASSERT0(error);
5792716fd348SMartin Matuska 
5793716fd348SMartin Matuska 	tx = dmu_tx_create(os);
5794716fd348SMartin Matuska 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
5795716fd348SMartin Matuska 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5796716fd348SMartin Matuska 	if (txg == 0)
5797716fd348SMartin Matuska 		goto out;
5798716fd348SMartin Matuska 	VERIFY0(zap_remove(os, object, txgname, tx));
5799716fd348SMartin Matuska 	VERIFY0(zap_remove(os, object, propname, tx));
5800716fd348SMartin Matuska 	dmu_tx_commit(tx);
5801716fd348SMartin Matuska out:
5802716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
5803716fd348SMartin Matuska }
5804716fd348SMartin Matuska 
5805716fd348SMartin Matuska /*
5806716fd348SMartin Matuska  * Test case to test the upgrading of a microzap to fatzap.
5807716fd348SMartin Matuska  */
5808716fd348SMartin Matuska void
5809716fd348SMartin Matuska ztest_fzap(ztest_ds_t *zd, uint64_t id)
5810716fd348SMartin Matuska {
5811716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5812716fd348SMartin Matuska 	ztest_od_t *od;
5813716fd348SMartin Matuska 	uint64_t object, txg, value;
5814716fd348SMartin Matuska 
5815716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
5816716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
5817716fd348SMartin Matuska 
5818716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
5819716fd348SMartin Matuska 	    !ztest_random(2)) != 0)
5820716fd348SMartin Matuska 		goto out;
5821716fd348SMartin Matuska 	object = od->od_object;
5822716fd348SMartin Matuska 
5823716fd348SMartin Matuska 	/*
5824716fd348SMartin Matuska 	 * Add entries to this ZAP and make sure it spills over
5825716fd348SMartin Matuska 	 * and gets upgraded to a fatzap. Also, since we are adding
5826716fd348SMartin Matuska 	 * 2050 entries we should see ptrtbl growth and leaf-block split.
5827716fd348SMartin Matuska 	 */
5828716fd348SMartin Matuska 	for (value = 0; value < 2050; value++) {
5829716fd348SMartin Matuska 		char name[ZFS_MAX_DATASET_NAME_LEN];
5830716fd348SMartin Matuska 		dmu_tx_t *tx;
5831716fd348SMartin Matuska 		int error;
5832716fd348SMartin Matuska 
5833716fd348SMartin Matuska 		(void) snprintf(name, sizeof (name), "fzap-%"PRIu64"-%"PRIu64"",
5834716fd348SMartin Matuska 		    id, value);
5835716fd348SMartin Matuska 
5836716fd348SMartin Matuska 		tx = dmu_tx_create(os);
5837716fd348SMartin Matuska 		dmu_tx_hold_zap(tx, object, B_TRUE, name);
5838716fd348SMartin Matuska 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5839716fd348SMartin Matuska 		if (txg == 0)
5840716fd348SMartin Matuska 			goto out;
5841716fd348SMartin Matuska 		error = zap_add(os, object, name, sizeof (uint64_t), 1,
5842716fd348SMartin Matuska 		    &value, tx);
5843716fd348SMartin Matuska 		ASSERT(error == 0 || error == EEXIST);
5844716fd348SMartin Matuska 		dmu_tx_commit(tx);
5845716fd348SMartin Matuska 	}
5846716fd348SMartin Matuska out:
5847716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
5848716fd348SMartin Matuska }
5849716fd348SMartin Matuska 
5850716fd348SMartin Matuska void
5851716fd348SMartin Matuska ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
5852716fd348SMartin Matuska {
5853716fd348SMartin Matuska 	(void) id;
5854716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
5855716fd348SMartin Matuska 	ztest_od_t *od;
5856716fd348SMartin Matuska 	uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
5857716fd348SMartin Matuska 	dmu_tx_t *tx;
5858716fd348SMartin Matuska 	int i, namelen, error;
5859716fd348SMartin Matuska 	int micro = ztest_random(2);
5860716fd348SMartin Matuska 	char name[20], string_value[20];
5861716fd348SMartin Matuska 	void *data;
5862716fd348SMartin Matuska 
5863716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
5864716fd348SMartin Matuska 	ztest_od_init(od, ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0, 0);
5865716fd348SMartin Matuska 
5866716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
5867716fd348SMartin Matuska 		umem_free(od, sizeof (ztest_od_t));
5868716fd348SMartin Matuska 		return;
5869716fd348SMartin Matuska 	}
5870716fd348SMartin Matuska 
5871716fd348SMartin Matuska 	object = od->od_object;
5872716fd348SMartin Matuska 
5873716fd348SMartin Matuska 	/*
5874716fd348SMartin Matuska 	 * Generate a random name of the form 'xxx.....' where each
5875716fd348SMartin Matuska 	 * x is a random printable character and the dots are dots.
5876716fd348SMartin Matuska 	 * There are 94 such characters, and the name length goes from
5877716fd348SMartin Matuska 	 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
5878716fd348SMartin Matuska 	 */
5879716fd348SMartin Matuska 	namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
5880716fd348SMartin Matuska 
5881716fd348SMartin Matuska 	for (i = 0; i < 3; i++)
5882716fd348SMartin Matuska 		name[i] = '!' + ztest_random('~' - '!' + 1);
5883716fd348SMartin Matuska 	for (; i < namelen - 1; i++)
5884716fd348SMartin Matuska 		name[i] = '.';
5885716fd348SMartin Matuska 	name[i] = '\0';
5886716fd348SMartin Matuska 
5887716fd348SMartin Matuska 	if ((namelen & 1) || micro) {
5888716fd348SMartin Matuska 		wsize = sizeof (txg);
5889716fd348SMartin Matuska 		wc = 1;
5890716fd348SMartin Matuska 		data = &txg;
5891716fd348SMartin Matuska 	} else {
5892716fd348SMartin Matuska 		wsize = 1;
5893716fd348SMartin Matuska 		wc = namelen;
5894716fd348SMartin Matuska 		data = string_value;
5895716fd348SMartin Matuska 	}
5896716fd348SMartin Matuska 
5897716fd348SMartin Matuska 	count = -1ULL;
5898716fd348SMartin Matuska 	VERIFY0(zap_count(os, object, &count));
5899716fd348SMartin Matuska 	ASSERT3S(count, !=, -1ULL);
5900716fd348SMartin Matuska 
5901716fd348SMartin Matuska 	/*
5902716fd348SMartin Matuska 	 * Select an operation: length, lookup, add, update, remove.
5903716fd348SMartin Matuska 	 */
5904716fd348SMartin Matuska 	i = ztest_random(5);
5905716fd348SMartin Matuska 
5906716fd348SMartin Matuska 	if (i >= 2) {
5907716fd348SMartin Matuska 		tx = dmu_tx_create(os);
5908716fd348SMartin Matuska 		dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
5909716fd348SMartin Matuska 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
5910716fd348SMartin Matuska 		if (txg == 0) {
5911716fd348SMartin Matuska 			umem_free(od, sizeof (ztest_od_t));
5912716fd348SMartin Matuska 			return;
5913716fd348SMartin Matuska 		}
5914716fd348SMartin Matuska 		memcpy(string_value, name, namelen);
5915716fd348SMartin Matuska 	} else {
5916716fd348SMartin Matuska 		tx = NULL;
5917716fd348SMartin Matuska 		txg = 0;
5918716fd348SMartin Matuska 		memset(string_value, 0, namelen);
5919716fd348SMartin Matuska 	}
5920716fd348SMartin Matuska 
5921716fd348SMartin Matuska 	switch (i) {
5922716fd348SMartin Matuska 
5923716fd348SMartin Matuska 	case 0:
5924716fd348SMartin Matuska 		error = zap_length(os, object, name, &zl_wsize, &zl_wc);
5925716fd348SMartin Matuska 		if (error == 0) {
5926716fd348SMartin Matuska 			ASSERT3U(wsize, ==, zl_wsize);
5927716fd348SMartin Matuska 			ASSERT3U(wc, ==, zl_wc);
5928716fd348SMartin Matuska 		} else {
5929716fd348SMartin Matuska 			ASSERT3U(error, ==, ENOENT);
5930716fd348SMartin Matuska 		}
5931716fd348SMartin Matuska 		break;
5932716fd348SMartin Matuska 
5933716fd348SMartin Matuska 	case 1:
5934716fd348SMartin Matuska 		error = zap_lookup(os, object, name, wsize, wc, data);
5935716fd348SMartin Matuska 		if (error == 0) {
5936716fd348SMartin Matuska 			if (data == string_value &&
5937716fd348SMartin Matuska 			    memcmp(name, data, namelen) != 0)
5938716fd348SMartin Matuska 				fatal(B_FALSE, "name '%s' != val '%s' len %d",
5939716fd348SMartin Matuska 				    name, (char *)data, namelen);
5940716fd348SMartin Matuska 		} else {
5941716fd348SMartin Matuska 			ASSERT3U(error, ==, ENOENT);
5942716fd348SMartin Matuska 		}
5943716fd348SMartin Matuska 		break;
5944716fd348SMartin Matuska 
5945716fd348SMartin Matuska 	case 2:
5946716fd348SMartin Matuska 		error = zap_add(os, object, name, wsize, wc, data, tx);
5947716fd348SMartin Matuska 		ASSERT(error == 0 || error == EEXIST);
5948716fd348SMartin Matuska 		break;
5949716fd348SMartin Matuska 
5950716fd348SMartin Matuska 	case 3:
5951716fd348SMartin Matuska 		VERIFY0(zap_update(os, object, name, wsize, wc, data, tx));
5952716fd348SMartin Matuska 		break;
5953716fd348SMartin Matuska 
5954716fd348SMartin Matuska 	case 4:
5955716fd348SMartin Matuska 		error = zap_remove(os, object, name, tx);
5956716fd348SMartin Matuska 		ASSERT(error == 0 || error == ENOENT);
5957716fd348SMartin Matuska 		break;
5958716fd348SMartin Matuska 	}
5959716fd348SMartin Matuska 
5960716fd348SMartin Matuska 	if (tx != NULL)
5961716fd348SMartin Matuska 		dmu_tx_commit(tx);
5962716fd348SMartin Matuska 
5963716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
5964716fd348SMartin Matuska }
5965716fd348SMartin Matuska 
5966716fd348SMartin Matuska /*
5967716fd348SMartin Matuska  * Commit callback data.
5968716fd348SMartin Matuska  */
5969716fd348SMartin Matuska typedef struct ztest_cb_data {
5970716fd348SMartin Matuska 	list_node_t		zcd_node;
5971716fd348SMartin Matuska 	uint64_t		zcd_txg;
5972716fd348SMartin Matuska 	int			zcd_expected_err;
5973716fd348SMartin Matuska 	boolean_t		zcd_added;
5974716fd348SMartin Matuska 	boolean_t		zcd_called;
5975716fd348SMartin Matuska 	spa_t			*zcd_spa;
5976716fd348SMartin Matuska } ztest_cb_data_t;
5977716fd348SMartin Matuska 
5978716fd348SMartin Matuska /* This is the actual commit callback function */
5979716fd348SMartin Matuska static void
5980716fd348SMartin Matuska ztest_commit_callback(void *arg, int error)
5981716fd348SMartin Matuska {
5982716fd348SMartin Matuska 	ztest_cb_data_t *data = arg;
5983716fd348SMartin Matuska 	uint64_t synced_txg;
5984716fd348SMartin Matuska 
5985716fd348SMartin Matuska 	VERIFY3P(data, !=, NULL);
5986716fd348SMartin Matuska 	VERIFY3S(data->zcd_expected_err, ==, error);
5987716fd348SMartin Matuska 	VERIFY(!data->zcd_called);
5988716fd348SMartin Matuska 
5989716fd348SMartin Matuska 	synced_txg = spa_last_synced_txg(data->zcd_spa);
5990716fd348SMartin Matuska 	if (data->zcd_txg > synced_txg)
5991716fd348SMartin Matuska 		fatal(B_FALSE,
5992716fd348SMartin Matuska 		    "commit callback of txg %"PRIu64" called prematurely, "
5993716fd348SMartin Matuska 		    "last synced txg = %"PRIu64"\n",
5994716fd348SMartin Matuska 		    data->zcd_txg, synced_txg);
5995716fd348SMartin Matuska 
5996716fd348SMartin Matuska 	data->zcd_called = B_TRUE;
5997716fd348SMartin Matuska 
5998716fd348SMartin Matuska 	if (error == ECANCELED) {
5999716fd348SMartin Matuska 		ASSERT0(data->zcd_txg);
6000716fd348SMartin Matuska 		ASSERT(!data->zcd_added);
6001716fd348SMartin Matuska 
6002716fd348SMartin Matuska 		/*
6003716fd348SMartin Matuska 		 * The private callback data should be destroyed here, but
6004716fd348SMartin Matuska 		 * since we are going to check the zcd_called field after
6005716fd348SMartin Matuska 		 * dmu_tx_abort(), we will destroy it there.
6006716fd348SMartin Matuska 		 */
6007716fd348SMartin Matuska 		return;
6008716fd348SMartin Matuska 	}
6009716fd348SMartin Matuska 
6010716fd348SMartin Matuska 	ASSERT(data->zcd_added);
6011716fd348SMartin Matuska 	ASSERT3U(data->zcd_txg, !=, 0);
6012716fd348SMartin Matuska 
6013716fd348SMartin Matuska 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
6014716fd348SMartin Matuska 
6015716fd348SMartin Matuska 	/* See if this cb was called more quickly */
6016716fd348SMartin Matuska 	if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
6017716fd348SMartin Matuska 		zc_min_txg_delay = synced_txg - data->zcd_txg;
6018716fd348SMartin Matuska 
6019716fd348SMartin Matuska 	/* Remove our callback from the list */
6020716fd348SMartin Matuska 	list_remove(&zcl.zcl_callbacks, data);
6021716fd348SMartin Matuska 
6022716fd348SMartin Matuska 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
6023716fd348SMartin Matuska 
6024716fd348SMartin Matuska 	umem_free(data, sizeof (ztest_cb_data_t));
6025716fd348SMartin Matuska }
6026716fd348SMartin Matuska 
6027716fd348SMartin Matuska /* Allocate and initialize callback data structure */
6028716fd348SMartin Matuska static ztest_cb_data_t *
6029716fd348SMartin Matuska ztest_create_cb_data(objset_t *os, uint64_t txg)
6030716fd348SMartin Matuska {
6031716fd348SMartin Matuska 	ztest_cb_data_t *cb_data;
6032716fd348SMartin Matuska 
6033716fd348SMartin Matuska 	cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
6034716fd348SMartin Matuska 
6035716fd348SMartin Matuska 	cb_data->zcd_txg = txg;
6036716fd348SMartin Matuska 	cb_data->zcd_spa = dmu_objset_spa(os);
6037716fd348SMartin Matuska 	list_link_init(&cb_data->zcd_node);
6038716fd348SMartin Matuska 
6039716fd348SMartin Matuska 	return (cb_data);
6040716fd348SMartin Matuska }
6041716fd348SMartin Matuska 
6042716fd348SMartin Matuska /*
6043716fd348SMartin Matuska  * Commit callback test.
6044716fd348SMartin Matuska  */
6045716fd348SMartin Matuska void
6046716fd348SMartin Matuska ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
6047716fd348SMartin Matuska {
6048716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
6049716fd348SMartin Matuska 	ztest_od_t *od;
6050716fd348SMartin Matuska 	dmu_tx_t *tx;
6051716fd348SMartin Matuska 	ztest_cb_data_t *cb_data[3], *tmp_cb;
6052716fd348SMartin Matuska 	uint64_t old_txg, txg;
6053716fd348SMartin Matuska 	int i, error = 0;
6054716fd348SMartin Matuska 
6055716fd348SMartin Matuska 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
6056716fd348SMartin Matuska 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
6057716fd348SMartin Matuska 
6058716fd348SMartin Matuska 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
6059716fd348SMartin Matuska 		umem_free(od, sizeof (ztest_od_t));
6060716fd348SMartin Matuska 		return;
6061716fd348SMartin Matuska 	}
6062716fd348SMartin Matuska 
6063716fd348SMartin Matuska 	tx = dmu_tx_create(os);
6064716fd348SMartin Matuska 
6065716fd348SMartin Matuska 	cb_data[0] = ztest_create_cb_data(os, 0);
6066716fd348SMartin Matuska 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]);
6067716fd348SMartin Matuska 
6068716fd348SMartin Matuska 	dmu_tx_hold_write(tx, od->od_object, 0, sizeof (uint64_t));
6069716fd348SMartin Matuska 
6070716fd348SMartin Matuska 	/* Every once in a while, abort the transaction on purpose */
6071716fd348SMartin Matuska 	if (ztest_random(100) == 0)
6072716fd348SMartin Matuska 		error = -1;
6073716fd348SMartin Matuska 
6074716fd348SMartin Matuska 	if (!error)
6075716fd348SMartin Matuska 		error = dmu_tx_assign(tx, TXG_NOWAIT);
6076716fd348SMartin Matuska 
6077716fd348SMartin Matuska 	txg = error ? 0 : dmu_tx_get_txg(tx);
6078716fd348SMartin Matuska 
6079716fd348SMartin Matuska 	cb_data[0]->zcd_txg = txg;
6080716fd348SMartin Matuska 	cb_data[1] = ztest_create_cb_data(os, txg);
6081716fd348SMartin Matuska 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]);
6082716fd348SMartin Matuska 
6083716fd348SMartin Matuska 	if (error) {
6084716fd348SMartin Matuska 		/*
6085716fd348SMartin Matuska 		 * It's not a strict requirement to call the registered
6086716fd348SMartin Matuska 		 * callbacks from inside dmu_tx_abort(), but that's what
6087716fd348SMartin Matuska 		 * it's supposed to happen in the current implementation
6088716fd348SMartin Matuska 		 * so we will check for that.
6089716fd348SMartin Matuska 		 */
6090716fd348SMartin Matuska 		for (i = 0; i < 2; i++) {
6091716fd348SMartin Matuska 			cb_data[i]->zcd_expected_err = ECANCELED;
6092716fd348SMartin Matuska 			VERIFY(!cb_data[i]->zcd_called);
6093716fd348SMartin Matuska 		}
6094716fd348SMartin Matuska 
6095716fd348SMartin Matuska 		dmu_tx_abort(tx);
6096716fd348SMartin Matuska 
6097716fd348SMartin Matuska 		for (i = 0; i < 2; i++) {
6098716fd348SMartin Matuska 			VERIFY(cb_data[i]->zcd_called);
6099716fd348SMartin Matuska 			umem_free(cb_data[i], sizeof (ztest_cb_data_t));
6100716fd348SMartin Matuska 		}
6101716fd348SMartin Matuska 
6102716fd348SMartin Matuska 		umem_free(od, sizeof (ztest_od_t));
6103716fd348SMartin Matuska 		return;
6104716fd348SMartin Matuska 	}
6105716fd348SMartin Matuska 
6106716fd348SMartin Matuska 	cb_data[2] = ztest_create_cb_data(os, txg);
6107716fd348SMartin Matuska 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
6108716fd348SMartin Matuska 
6109716fd348SMartin Matuska 	/*
6110716fd348SMartin Matuska 	 * Read existing data to make sure there isn't a future leak.
6111716fd348SMartin Matuska 	 */
6112716fd348SMartin Matuska 	VERIFY0(dmu_read(os, od->od_object, 0, sizeof (uint64_t),
6113716fd348SMartin Matuska 	    &old_txg, DMU_READ_PREFETCH));
6114716fd348SMartin Matuska 
6115716fd348SMartin Matuska 	if (old_txg > txg)
6116716fd348SMartin Matuska 		fatal(B_FALSE,
6117716fd348SMartin Matuska 		    "future leak: got %"PRIu64", open txg is %"PRIu64"",
6118716fd348SMartin Matuska 		    old_txg, txg);
6119716fd348SMartin Matuska 
6120716fd348SMartin Matuska 	dmu_write(os, od->od_object, 0, sizeof (uint64_t), &txg, tx);
6121716fd348SMartin Matuska 
6122716fd348SMartin Matuska 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
6123716fd348SMartin Matuska 
6124716fd348SMartin Matuska 	/*
6125716fd348SMartin Matuska 	 * Since commit callbacks don't have any ordering requirement and since
6126716fd348SMartin Matuska 	 * it is theoretically possible for a commit callback to be called
6127716fd348SMartin Matuska 	 * after an arbitrary amount of time has elapsed since its txg has been
6128716fd348SMartin Matuska 	 * synced, it is difficult to reliably determine whether a commit
6129716fd348SMartin Matuska 	 * callback hasn't been called due to high load or due to a flawed
6130716fd348SMartin Matuska 	 * implementation.
6131716fd348SMartin Matuska 	 *
6132716fd348SMartin Matuska 	 * In practice, we will assume that if after a certain number of txgs a
6133716fd348SMartin Matuska 	 * commit callback hasn't been called, then most likely there's an
6134716fd348SMartin Matuska 	 * implementation bug..
6135716fd348SMartin Matuska 	 */
6136716fd348SMartin Matuska 	tmp_cb = list_head(&zcl.zcl_callbacks);
6137716fd348SMartin Matuska 	if (tmp_cb != NULL &&
6138716fd348SMartin Matuska 	    tmp_cb->zcd_txg + ZTEST_COMMIT_CB_THRESH < txg) {
6139716fd348SMartin Matuska 		fatal(B_FALSE,
6140716fd348SMartin Matuska 		    "Commit callback threshold exceeded, "
6141716fd348SMartin Matuska 		    "oldest txg: %"PRIu64", open txg: %"PRIu64"\n",
6142716fd348SMartin Matuska 		    tmp_cb->zcd_txg, txg);
6143716fd348SMartin Matuska 	}
6144716fd348SMartin Matuska 
6145716fd348SMartin Matuska 	/*
6146716fd348SMartin Matuska 	 * Let's find the place to insert our callbacks.
6147716fd348SMartin Matuska 	 *
6148716fd348SMartin Matuska 	 * Even though the list is ordered by txg, it is possible for the
6149716fd348SMartin Matuska 	 * insertion point to not be the end because our txg may already be
6150716fd348SMartin Matuska 	 * quiescing at this point and other callbacks in the open txg
6151716fd348SMartin Matuska 	 * (from other objsets) may have sneaked in.
6152716fd348SMartin Matuska 	 */
6153716fd348SMartin Matuska 	tmp_cb = list_tail(&zcl.zcl_callbacks);
6154716fd348SMartin Matuska 	while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
6155716fd348SMartin Matuska 		tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
6156716fd348SMartin Matuska 
6157716fd348SMartin Matuska 	/* Add the 3 callbacks to the list */
6158716fd348SMartin Matuska 	for (i = 0; i < 3; i++) {
6159716fd348SMartin Matuska 		if (tmp_cb == NULL)
6160716fd348SMartin Matuska 			list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
6161716fd348SMartin Matuska 		else
6162716fd348SMartin Matuska 			list_insert_after(&zcl.zcl_callbacks, tmp_cb,
6163716fd348SMartin Matuska 			    cb_data[i]);
6164716fd348SMartin Matuska 
6165716fd348SMartin Matuska 		cb_data[i]->zcd_added = B_TRUE;
6166716fd348SMartin Matuska 		VERIFY(!cb_data[i]->zcd_called);
6167716fd348SMartin Matuska 
6168716fd348SMartin Matuska 		tmp_cb = cb_data[i];
6169716fd348SMartin Matuska 	}
6170716fd348SMartin Matuska 
6171716fd348SMartin Matuska 	zc_cb_counter += 3;
6172716fd348SMartin Matuska 
6173716fd348SMartin Matuska 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
6174716fd348SMartin Matuska 
6175716fd348SMartin Matuska 	dmu_tx_commit(tx);
6176716fd348SMartin Matuska 
6177716fd348SMartin Matuska 	umem_free(od, sizeof (ztest_od_t));
6178716fd348SMartin Matuska }
6179716fd348SMartin Matuska 
6180716fd348SMartin Matuska /*
6181716fd348SMartin Matuska  * Visit each object in the dataset. Verify that its properties
6182716fd348SMartin Matuska  * are consistent what was stored in the block tag when it was created,
6183716fd348SMartin Matuska  * and that its unused bonus buffer space has not been overwritten.
6184716fd348SMartin Matuska  */
6185716fd348SMartin Matuska void
6186716fd348SMartin Matuska ztest_verify_dnode_bt(ztest_ds_t *zd, uint64_t id)
6187716fd348SMartin Matuska {
6188716fd348SMartin Matuska 	(void) id;
6189716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
6190716fd348SMartin Matuska 	uint64_t obj;
6191716fd348SMartin Matuska 	int err = 0;
6192716fd348SMartin Matuska 
6193716fd348SMartin Matuska 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
6194716fd348SMartin Matuska 		ztest_block_tag_t *bt = NULL;
6195716fd348SMartin Matuska 		dmu_object_info_t doi;
6196716fd348SMartin Matuska 		dmu_buf_t *db;
6197716fd348SMartin Matuska 
6198e716630dSMartin Matuska 		ztest_object_lock(zd, obj, ZTRL_READER);
6199716fd348SMartin Matuska 		if (dmu_bonus_hold(os, obj, FTAG, &db) != 0) {
6200716fd348SMartin Matuska 			ztest_object_unlock(zd, obj);
6201716fd348SMartin Matuska 			continue;
6202716fd348SMartin Matuska 		}
6203716fd348SMartin Matuska 
6204716fd348SMartin Matuska 		dmu_object_info_from_db(db, &doi);
6205716fd348SMartin Matuska 		if (doi.doi_bonus_size >= sizeof (*bt))
6206716fd348SMartin Matuska 			bt = ztest_bt_bonus(db);
6207716fd348SMartin Matuska 
6208716fd348SMartin Matuska 		if (bt && bt->bt_magic == BT_MAGIC) {
6209716fd348SMartin Matuska 			ztest_bt_verify(bt, os, obj, doi.doi_dnodesize,
6210716fd348SMartin Matuska 			    bt->bt_offset, bt->bt_gen, bt->bt_txg,
6211716fd348SMartin Matuska 			    bt->bt_crtxg);
6212716fd348SMartin Matuska 			ztest_verify_unused_bonus(db, bt, obj, os, bt->bt_gen);
6213716fd348SMartin Matuska 		}
6214716fd348SMartin Matuska 
6215716fd348SMartin Matuska 		dmu_buf_rele(db, FTAG);
6216716fd348SMartin Matuska 		ztest_object_unlock(zd, obj);
6217716fd348SMartin Matuska 	}
6218716fd348SMartin Matuska }
6219716fd348SMartin Matuska 
6220716fd348SMartin Matuska void
6221716fd348SMartin Matuska ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
6222716fd348SMartin Matuska {
6223716fd348SMartin Matuska 	(void) id;
6224716fd348SMartin Matuska 	zfs_prop_t proplist[] = {
6225716fd348SMartin Matuska 		ZFS_PROP_CHECKSUM,
6226716fd348SMartin Matuska 		ZFS_PROP_COMPRESSION,
6227716fd348SMartin Matuska 		ZFS_PROP_COPIES,
6228716fd348SMartin Matuska 		ZFS_PROP_DEDUP
6229716fd348SMartin Matuska 	};
6230716fd348SMartin Matuska 
6231716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
6232716fd348SMartin Matuska 
623315f0b8c3SMartin Matuska 	for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) {
623415f0b8c3SMartin Matuska 		int error = ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
6235716fd348SMartin Matuska 		    ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
623615f0b8c3SMartin Matuska 		ASSERT(error == 0 || error == ENOSPC);
623715f0b8c3SMartin Matuska 	}
6238716fd348SMartin Matuska 
623915f0b8c3SMartin Matuska 	int error = ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_RECORDSIZE,
624015f0b8c3SMartin Matuska 	    ztest_random_blocksize(), (int)ztest_random(2));
624115f0b8c3SMartin Matuska 	ASSERT(error == 0 || error == ENOSPC);
6242716fd348SMartin Matuska 
6243716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
6244716fd348SMartin Matuska }
6245716fd348SMartin Matuska 
6246716fd348SMartin Matuska void
6247716fd348SMartin Matuska ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
6248716fd348SMartin Matuska {
6249716fd348SMartin Matuska 	(void) zd, (void) id;
6250716fd348SMartin Matuska 
6251716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
6252716fd348SMartin Matuska 
6253716fd348SMartin Matuska 	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
6254716fd348SMartin Matuska 
6255d59a7618SRob Norris 	nvlist_t *props = fnvlist_alloc();
6256d59a7618SRob Norris 
6257d59a7618SRob Norris 	VERIFY0(spa_prop_get(ztest_spa, props));
6258716fd348SMartin Matuska 
6259716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6)
6260716fd348SMartin Matuska 		dump_nvlist(props, 4);
6261716fd348SMartin Matuska 
6262716fd348SMartin Matuska 	fnvlist_free(props);
6263716fd348SMartin Matuska 
6264716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
6265716fd348SMartin Matuska }
6266716fd348SMartin Matuska 
6267716fd348SMartin Matuska static int
6268716fd348SMartin Matuska user_release_one(const char *snapname, const char *holdname)
6269716fd348SMartin Matuska {
6270716fd348SMartin Matuska 	nvlist_t *snaps, *holds;
6271716fd348SMartin Matuska 	int error;
6272716fd348SMartin Matuska 
6273716fd348SMartin Matuska 	snaps = fnvlist_alloc();
6274716fd348SMartin Matuska 	holds = fnvlist_alloc();
6275716fd348SMartin Matuska 	fnvlist_add_boolean(holds, holdname);
6276716fd348SMartin Matuska 	fnvlist_add_nvlist(snaps, snapname, holds);
6277716fd348SMartin Matuska 	fnvlist_free(holds);
6278716fd348SMartin Matuska 	error = dsl_dataset_user_release(snaps, NULL);
6279716fd348SMartin Matuska 	fnvlist_free(snaps);
6280716fd348SMartin Matuska 	return (error);
6281716fd348SMartin Matuska }
6282716fd348SMartin Matuska 
6283716fd348SMartin Matuska /*
6284716fd348SMartin Matuska  * Test snapshot hold/release and deferred destroy.
6285716fd348SMartin Matuska  */
6286716fd348SMartin Matuska void
6287716fd348SMartin Matuska ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
6288716fd348SMartin Matuska {
6289716fd348SMartin Matuska 	int error;
6290716fd348SMartin Matuska 	objset_t *os = zd->zd_os;
6291716fd348SMartin Matuska 	objset_t *origin;
6292716fd348SMartin Matuska 	char snapname[100];
6293716fd348SMartin Matuska 	char fullname[100];
6294716fd348SMartin Matuska 	char clonename[100];
6295716fd348SMartin Matuska 	char tag[100];
6296716fd348SMartin Matuska 	char osname[ZFS_MAX_DATASET_NAME_LEN];
6297716fd348SMartin Matuska 	nvlist_t *holds;
6298716fd348SMartin Matuska 
6299716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
6300716fd348SMartin Matuska 
6301716fd348SMartin Matuska 	dmu_objset_name(os, osname);
6302716fd348SMartin Matuska 
6303716fd348SMartin Matuska 	(void) snprintf(snapname, sizeof (snapname), "sh1_%"PRIu64"", id);
6304716fd348SMartin Matuska 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
6305716fd348SMartin Matuska 	(void) snprintf(clonename, sizeof (clonename), "%s/ch1_%"PRIu64"",
6306716fd348SMartin Matuska 	    osname, id);
6307716fd348SMartin Matuska 	(void) snprintf(tag, sizeof (tag), "tag_%"PRIu64"", id);
6308716fd348SMartin Matuska 
6309716fd348SMartin Matuska 	/*
6310716fd348SMartin Matuska 	 * Clean up from any previous run.
6311716fd348SMartin Matuska 	 */
6312716fd348SMartin Matuska 	error = dsl_destroy_head(clonename);
6313716fd348SMartin Matuska 	if (error != ENOENT)
6314716fd348SMartin Matuska 		ASSERT0(error);
6315716fd348SMartin Matuska 	error = user_release_one(fullname, tag);
6316716fd348SMartin Matuska 	if (error != ESRCH && error != ENOENT)
6317716fd348SMartin Matuska 		ASSERT0(error);
6318716fd348SMartin Matuska 	error = dsl_destroy_snapshot(fullname, B_FALSE);
6319716fd348SMartin Matuska 	if (error != ENOENT)
6320716fd348SMartin Matuska 		ASSERT0(error);
6321716fd348SMartin Matuska 
6322716fd348SMartin Matuska 	/*
6323716fd348SMartin Matuska 	 * Create snapshot, clone it, mark snap for deferred destroy,
6324716fd348SMartin Matuska 	 * destroy clone, verify snap was also destroyed.
6325716fd348SMartin Matuska 	 */
6326716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(osname, snapname);
6327716fd348SMartin Matuska 	if (error) {
6328716fd348SMartin Matuska 		if (error == ENOSPC) {
6329716fd348SMartin Matuska 			ztest_record_enospc("dmu_objset_snapshot");
6330716fd348SMartin Matuska 			goto out;
6331716fd348SMartin Matuska 		}
6332716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_snapshot(%s) = %d", fullname, error);
6333716fd348SMartin Matuska 	}
6334716fd348SMartin Matuska 
6335716fd348SMartin Matuska 	error = dmu_objset_clone(clonename, fullname);
6336716fd348SMartin Matuska 	if (error) {
6337716fd348SMartin Matuska 		if (error == ENOSPC) {
6338716fd348SMartin Matuska 			ztest_record_enospc("dmu_objset_clone");
6339716fd348SMartin Matuska 			goto out;
6340716fd348SMartin Matuska 		}
6341716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_clone(%s) = %d", clonename, error);
6342716fd348SMartin Matuska 	}
6343716fd348SMartin Matuska 
6344716fd348SMartin Matuska 	error = dsl_destroy_snapshot(fullname, B_TRUE);
6345716fd348SMartin Matuska 	if (error) {
6346716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
6347716fd348SMartin Matuska 		    fullname, error);
6348716fd348SMartin Matuska 	}
6349716fd348SMartin Matuska 
6350716fd348SMartin Matuska 	error = dsl_destroy_head(clonename);
6351716fd348SMartin Matuska 	if (error)
6352716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clonename, error);
6353716fd348SMartin Matuska 
6354716fd348SMartin Matuska 	error = dmu_objset_hold(fullname, FTAG, &origin);
6355716fd348SMartin Matuska 	if (error != ENOENT)
6356716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_hold(%s) = %d", fullname, error);
6357716fd348SMartin Matuska 
6358716fd348SMartin Matuska 	/*
6359716fd348SMartin Matuska 	 * Create snapshot, add temporary hold, verify that we can't
6360716fd348SMartin Matuska 	 * destroy a held snapshot, mark for deferred destroy,
6361716fd348SMartin Matuska 	 * release hold, verify snapshot was destroyed.
6362716fd348SMartin Matuska 	 */
6363716fd348SMartin Matuska 	error = dmu_objset_snapshot_one(osname, snapname);
6364716fd348SMartin Matuska 	if (error) {
6365716fd348SMartin Matuska 		if (error == ENOSPC) {
6366716fd348SMartin Matuska 			ztest_record_enospc("dmu_objset_snapshot");
6367716fd348SMartin Matuska 			goto out;
6368716fd348SMartin Matuska 		}
6369716fd348SMartin Matuska 		fatal(B_FALSE, "dmu_objset_snapshot(%s) = %d", fullname, error);
6370716fd348SMartin Matuska 	}
6371716fd348SMartin Matuska 
6372716fd348SMartin Matuska 	holds = fnvlist_alloc();
6373716fd348SMartin Matuska 	fnvlist_add_string(holds, fullname, tag);
6374716fd348SMartin Matuska 	error = dsl_dataset_user_hold(holds, 0, NULL);
6375716fd348SMartin Matuska 	fnvlist_free(holds);
6376716fd348SMartin Matuska 
6377716fd348SMartin Matuska 	if (error == ENOSPC) {
6378716fd348SMartin Matuska 		ztest_record_enospc("dsl_dataset_user_hold");
6379716fd348SMartin Matuska 		goto out;
6380716fd348SMartin Matuska 	} else if (error) {
6381716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_dataset_user_hold(%s, %s) = %u",
6382716fd348SMartin Matuska 		    fullname, tag, error);
6383716fd348SMartin Matuska 	}
6384716fd348SMartin Matuska 
6385716fd348SMartin Matuska 	error = dsl_destroy_snapshot(fullname, B_FALSE);
6386716fd348SMartin Matuska 	if (error != EBUSY) {
6387716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
6388716fd348SMartin Matuska 		    fullname, error);
6389716fd348SMartin Matuska 	}
6390716fd348SMartin Matuska 
6391716fd348SMartin Matuska 	error = dsl_destroy_snapshot(fullname, B_TRUE);
6392716fd348SMartin Matuska 	if (error) {
6393716fd348SMartin Matuska 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
6394716fd348SMartin Matuska 		    fullname, error);
6395716fd348SMartin Matuska 	}
6396716fd348SMartin Matuska 
6397716fd348SMartin Matuska 	error = user_release_one(fullname, tag);
6398716fd348SMartin Matuska 	if (error)
6399716fd348SMartin Matuska 		fatal(B_FALSE, "user_release_one(%s, %s) = %d",
6400716fd348SMartin Matuska 		    fullname, tag, error);
6401716fd348SMartin Matuska 
6402716fd348SMartin Matuska 	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
6403716fd348SMartin Matuska 
6404716fd348SMartin Matuska out:
6405716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
6406716fd348SMartin Matuska }
6407716fd348SMartin Matuska 
6408716fd348SMartin Matuska /*
6409716fd348SMartin Matuska  * Inject random faults into the on-disk data.
6410716fd348SMartin Matuska  */
6411716fd348SMartin Matuska void
6412716fd348SMartin Matuska ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
6413716fd348SMartin Matuska {
6414716fd348SMartin Matuska 	(void) zd, (void) id;
6415716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
6416716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
6417716fd348SMartin Matuska 	int fd;
6418716fd348SMartin Matuska 	uint64_t offset;
6419716fd348SMartin Matuska 	uint64_t leaves;
6420716fd348SMartin Matuska 	uint64_t bad = 0x1990c0ffeedecadeull;
6421716fd348SMartin Matuska 	uint64_t top, leaf;
6422e716630dSMartin Matuska 	uint64_t raidz_children;
6423716fd348SMartin Matuska 	char *path0;
6424716fd348SMartin Matuska 	char *pathrand;
6425716fd348SMartin Matuska 	size_t fsize;
6426716fd348SMartin Matuska 	int bshift = SPA_MAXBLOCKSHIFT + 2;
6427716fd348SMartin Matuska 	int iters = 1000;
6428716fd348SMartin Matuska 	int maxfaults;
6429716fd348SMartin Matuska 	int mirror_save;
6430716fd348SMartin Matuska 	vdev_t *vd0 = NULL;
6431716fd348SMartin Matuska 	uint64_t guid0 = 0;
6432716fd348SMartin Matuska 	boolean_t islog = B_FALSE;
6433e716630dSMartin Matuska 	boolean_t injected = B_FALSE;
6434716fd348SMartin Matuska 
6435716fd348SMartin Matuska 	path0 = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
6436716fd348SMartin Matuska 	pathrand = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
6437716fd348SMartin Matuska 
6438716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
6439716fd348SMartin Matuska 
6440716fd348SMartin Matuska 	/*
6441716fd348SMartin Matuska 	 * Device removal is in progress, fault injection must be disabled
6442716fd348SMartin Matuska 	 * until it completes and the pool is scrubbed.  The fault injection
6443716fd348SMartin Matuska 	 * strategy for damaging blocks does not take in to account evacuated
6444716fd348SMartin Matuska 	 * blocks which may have already been damaged.
6445716fd348SMartin Matuska 	 */
6446e716630dSMartin Matuska 	if (ztest_device_removal_active)
6447716fd348SMartin Matuska 		goto out;
6448e716630dSMartin Matuska 
6449e716630dSMartin Matuska 	/*
6450e716630dSMartin Matuska 	 * The fault injection strategy for damaging blocks cannot be used
6451e716630dSMartin Matuska 	 * if raidz expansion is in progress. The leaves value
6452e716630dSMartin Matuska 	 * (attached raidz children) is variable and strategy for damaging
6453e716630dSMartin Matuska 	 * blocks will corrupt same data blocks on different child vdevs
6454e716630dSMartin Matuska 	 * because of the reflow process.
6455e716630dSMartin Matuska 	 */
6456e716630dSMartin Matuska 	if (spa->spa_raidz_expand != NULL)
6457e716630dSMartin Matuska 		goto out;
6458716fd348SMartin Matuska 
6459716fd348SMartin Matuska 	maxfaults = MAXFAULTS(zs);
6460e716630dSMartin Matuska 	raidz_children = ztest_get_raidz_children(spa);
6461e716630dSMartin Matuska 	leaves = MAX(zs->zs_mirrors, 1) * raidz_children;
6462716fd348SMartin Matuska 	mirror_save = zs->zs_mirrors;
6463716fd348SMartin Matuska 
6464716fd348SMartin Matuska 	ASSERT3U(leaves, >=, 1);
6465716fd348SMartin Matuska 
6466716fd348SMartin Matuska 	/*
6467716fd348SMartin Matuska 	 * While ztest is running the number of leaves will not change.  This
6468716fd348SMartin Matuska 	 * is critical for the fault injection logic as it determines where
6469716fd348SMartin Matuska 	 * errors can be safely injected such that they are always repairable.
6470716fd348SMartin Matuska 	 *
6471716fd348SMartin Matuska 	 * When restarting ztest a different number of leaves may be requested
6472716fd348SMartin Matuska 	 * which will shift the regions to be damaged.  This is fine as long
6473716fd348SMartin Matuska 	 * as the pool has been scrubbed prior to using the new mapping.
6474716fd348SMartin Matuska 	 * Failure to do can result in non-repairable damage being injected.
6475716fd348SMartin Matuska 	 */
6476716fd348SMartin Matuska 	if (ztest_pool_scrubbed == B_FALSE)
6477716fd348SMartin Matuska 		goto out;
6478716fd348SMartin Matuska 
6479716fd348SMartin Matuska 	/*
6480716fd348SMartin Matuska 	 * Grab the name lock as reader. There are some operations
6481716fd348SMartin Matuska 	 * which don't like to have their vdevs changed while
6482716fd348SMartin Matuska 	 * they are in progress (i.e. spa_change_guid). Those
6483716fd348SMartin Matuska 	 * operations will have grabbed the name lock as writer.
6484716fd348SMartin Matuska 	 */
6485716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
6486716fd348SMartin Matuska 
6487716fd348SMartin Matuska 	/*
6488716fd348SMartin Matuska 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
6489716fd348SMartin Matuska 	 */
6490716fd348SMartin Matuska 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
6491716fd348SMartin Matuska 
6492716fd348SMartin Matuska 	if (ztest_random(2) == 0) {
6493716fd348SMartin Matuska 		/*
6494716fd348SMartin Matuska 		 * Inject errors on a normal data device or slog device.
6495716fd348SMartin Matuska 		 */
6496716fd348SMartin Matuska 		top = ztest_random_vdev_top(spa, B_TRUE);
6497716fd348SMartin Matuska 		leaf = ztest_random(leaves) + zs->zs_splits;
6498716fd348SMartin Matuska 
6499716fd348SMartin Matuska 		/*
6500716fd348SMartin Matuska 		 * Generate paths to the first leaf in this top-level vdev,
6501716fd348SMartin Matuska 		 * and to the random leaf we selected.  We'll induce transient
6502716fd348SMartin Matuska 		 * write failures and random online/offline activity on leaf 0,
6503716fd348SMartin Matuska 		 * and we'll write random garbage to the randomly chosen leaf.
6504716fd348SMartin Matuska 		 */
6505716fd348SMartin Matuska 		(void) snprintf(path0, MAXPATHLEN, ztest_dev_template,
6506716fd348SMartin Matuska 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
6507716fd348SMartin Matuska 		    top * leaves + zs->zs_splits);
6508716fd348SMartin Matuska 		(void) snprintf(pathrand, MAXPATHLEN, ztest_dev_template,
6509716fd348SMartin Matuska 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
6510716fd348SMartin Matuska 		    top * leaves + leaf);
6511716fd348SMartin Matuska 
6512716fd348SMartin Matuska 		vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
6513716fd348SMartin Matuska 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
6514716fd348SMartin Matuska 			islog = B_TRUE;
6515716fd348SMartin Matuska 
6516716fd348SMartin Matuska 		/*
6517716fd348SMartin Matuska 		 * If the top-level vdev needs to be resilvered
6518716fd348SMartin Matuska 		 * then we only allow faults on the device that is
6519716fd348SMartin Matuska 		 * resilvering.
6520716fd348SMartin Matuska 		 */
6521716fd348SMartin Matuska 		if (vd0 != NULL && maxfaults != 1 &&
6522716fd348SMartin Matuska 		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
6523716fd348SMartin Matuska 		    vd0->vdev_resilver_txg != 0)) {
6524716fd348SMartin Matuska 			/*
6525716fd348SMartin Matuska 			 * Make vd0 explicitly claim to be unreadable,
6526716fd348SMartin Matuska 			 * or unwritable, or reach behind its back
6527716fd348SMartin Matuska 			 * and close the underlying fd.  We can do this if
6528716fd348SMartin Matuska 			 * maxfaults == 0 because we'll fail and reexecute,
6529716fd348SMartin Matuska 			 * and we can do it if maxfaults >= 2 because we'll
6530716fd348SMartin Matuska 			 * have enough redundancy.  If maxfaults == 1, the
6531716fd348SMartin Matuska 			 * combination of this with injection of random data
6532716fd348SMartin Matuska 			 * corruption below exceeds the pool's fault tolerance.
6533716fd348SMartin Matuska 			 */
6534716fd348SMartin Matuska 			vdev_file_t *vf = vd0->vdev_tsd;
6535716fd348SMartin Matuska 
6536716fd348SMartin Matuska 			zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d",
6537716fd348SMartin Matuska 			    (long long)vd0->vdev_id, (int)maxfaults);
6538716fd348SMartin Matuska 
6539716fd348SMartin Matuska 			if (vf != NULL && ztest_random(3) == 0) {
6540716fd348SMartin Matuska 				(void) close(vf->vf_file->f_fd);
6541716fd348SMartin Matuska 				vf->vf_file->f_fd = -1;
6542716fd348SMartin Matuska 			} else if (ztest_random(2) == 0) {
6543716fd348SMartin Matuska 				vd0->vdev_cant_read = B_TRUE;
6544716fd348SMartin Matuska 			} else {
6545716fd348SMartin Matuska 				vd0->vdev_cant_write = B_TRUE;
6546716fd348SMartin Matuska 			}
6547716fd348SMartin Matuska 			guid0 = vd0->vdev_guid;
6548716fd348SMartin Matuska 		}
6549716fd348SMartin Matuska 	} else {
6550716fd348SMartin Matuska 		/*
6551716fd348SMartin Matuska 		 * Inject errors on an l2cache device.
6552716fd348SMartin Matuska 		 */
6553716fd348SMartin Matuska 		spa_aux_vdev_t *sav = &spa->spa_l2cache;
6554716fd348SMartin Matuska 
6555716fd348SMartin Matuska 		if (sav->sav_count == 0) {
6556716fd348SMartin Matuska 			spa_config_exit(spa, SCL_STATE, FTAG);
6557716fd348SMartin Matuska 			(void) pthread_rwlock_unlock(&ztest_name_lock);
6558716fd348SMartin Matuska 			goto out;
6559716fd348SMartin Matuska 		}
6560716fd348SMartin Matuska 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
6561716fd348SMartin Matuska 		guid0 = vd0->vdev_guid;
6562be181ee2SMartin Matuska 		(void) strlcpy(path0, vd0->vdev_path, MAXPATHLEN);
6563be181ee2SMartin Matuska 		(void) strlcpy(pathrand, vd0->vdev_path, MAXPATHLEN);
6564716fd348SMartin Matuska 
6565716fd348SMartin Matuska 		leaf = 0;
6566716fd348SMartin Matuska 		leaves = 1;
6567716fd348SMartin Matuska 		maxfaults = INT_MAX;	/* no limit on cache devices */
6568716fd348SMartin Matuska 	}
6569716fd348SMartin Matuska 
6570716fd348SMartin Matuska 	spa_config_exit(spa, SCL_STATE, FTAG);
6571716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
6572716fd348SMartin Matuska 
6573716fd348SMartin Matuska 	/*
6574716fd348SMartin Matuska 	 * If we can tolerate two or more faults, or we're dealing
6575716fd348SMartin Matuska 	 * with a slog, randomly online/offline vd0.
6576716fd348SMartin Matuska 	 */
6577716fd348SMartin Matuska 	if ((maxfaults >= 2 || islog) && guid0 != 0) {
6578716fd348SMartin Matuska 		if (ztest_random(10) < 6) {
6579716fd348SMartin Matuska 			int flags = (ztest_random(2) == 0 ?
6580716fd348SMartin Matuska 			    ZFS_OFFLINE_TEMPORARY : 0);
6581716fd348SMartin Matuska 
6582716fd348SMartin Matuska 			/*
6583716fd348SMartin Matuska 			 * We have to grab the zs_name_lock as writer to
6584716fd348SMartin Matuska 			 * prevent a race between offlining a slog and
6585716fd348SMartin Matuska 			 * destroying a dataset. Offlining the slog will
6586716fd348SMartin Matuska 			 * grab a reference on the dataset which may cause
6587716fd348SMartin Matuska 			 * dsl_destroy_head() to fail with EBUSY thus
6588716fd348SMartin Matuska 			 * leaving the dataset in an inconsistent state.
6589716fd348SMartin Matuska 			 */
6590716fd348SMartin Matuska 			if (islog)
6591716fd348SMartin Matuska 				(void) pthread_rwlock_wrlock(&ztest_name_lock);
6592716fd348SMartin Matuska 
6593716fd348SMartin Matuska 			VERIFY3U(vdev_offline(spa, guid0, flags), !=, EBUSY);
6594716fd348SMartin Matuska 
6595716fd348SMartin Matuska 			if (islog)
6596716fd348SMartin Matuska 				(void) pthread_rwlock_unlock(&ztest_name_lock);
6597716fd348SMartin Matuska 		} else {
6598716fd348SMartin Matuska 			/*
6599716fd348SMartin Matuska 			 * Ideally we would like to be able to randomly
6600716fd348SMartin Matuska 			 * call vdev_[on|off]line without holding locks
6601716fd348SMartin Matuska 			 * to force unpredictable failures but the side
6602716fd348SMartin Matuska 			 * effects of vdev_[on|off]line prevent us from
6603e716630dSMartin Matuska 			 * doing so.
6604716fd348SMartin Matuska 			 */
6605716fd348SMartin Matuska 			(void) vdev_online(spa, guid0, 0, NULL);
6606716fd348SMartin Matuska 		}
6607716fd348SMartin Matuska 	}
6608716fd348SMartin Matuska 
6609716fd348SMartin Matuska 	if (maxfaults == 0)
6610716fd348SMartin Matuska 		goto out;
6611716fd348SMartin Matuska 
6612716fd348SMartin Matuska 	/*
6613716fd348SMartin Matuska 	 * We have at least single-fault tolerance, so inject data corruption.
6614716fd348SMartin Matuska 	 */
6615716fd348SMartin Matuska 	fd = open(pathrand, O_RDWR);
6616716fd348SMartin Matuska 
6617716fd348SMartin Matuska 	if (fd == -1) /* we hit a gap in the device namespace */
6618716fd348SMartin Matuska 		goto out;
6619716fd348SMartin Matuska 
6620716fd348SMartin Matuska 	fsize = lseek(fd, 0, SEEK_END);
6621716fd348SMartin Matuska 
6622716fd348SMartin Matuska 	while (--iters != 0) {
6623716fd348SMartin Matuska 		/*
6624716fd348SMartin Matuska 		 * The offset must be chosen carefully to ensure that
6625716fd348SMartin Matuska 		 * we do not inject a given logical block with errors
6626716fd348SMartin Matuska 		 * on two different leaf devices, because ZFS can not
6627716fd348SMartin Matuska 		 * tolerate that (if maxfaults==1).
6628716fd348SMartin Matuska 		 *
6629716fd348SMartin Matuska 		 * To achieve this we divide each leaf device into
6630716fd348SMartin Matuska 		 * chunks of size (# leaves * SPA_MAXBLOCKSIZE * 4).
6631716fd348SMartin Matuska 		 * Each chunk is further divided into error-injection
6632716fd348SMartin Matuska 		 * ranges (can accept errors) and clear ranges (we do
6633716fd348SMartin Matuska 		 * not inject errors in those). Each error-injection
6634716fd348SMartin Matuska 		 * range can accept errors only for a single leaf vdev.
6635716fd348SMartin Matuska 		 * Error-injection ranges are separated by clear ranges.
6636716fd348SMartin Matuska 		 *
6637716fd348SMartin Matuska 		 * For example, with 3 leaves, each chunk looks like:
6638716fd348SMartin Matuska 		 *    0 to  32M: injection range for leaf 0
6639716fd348SMartin Matuska 		 *  32M to  64M: clear range - no injection allowed
6640716fd348SMartin Matuska 		 *  64M to  96M: injection range for leaf 1
6641716fd348SMartin Matuska 		 *  96M to 128M: clear range - no injection allowed
6642716fd348SMartin Matuska 		 * 128M to 160M: injection range for leaf 2
6643716fd348SMartin Matuska 		 * 160M to 192M: clear range - no injection allowed
6644716fd348SMartin Matuska 		 *
6645716fd348SMartin Matuska 		 * Each clear range must be large enough such that a
6646716fd348SMartin Matuska 		 * single block cannot straddle it. This way a block
6647716fd348SMartin Matuska 		 * can't be a target in two different injection ranges
6648716fd348SMartin Matuska 		 * (on different leaf vdevs).
6649716fd348SMartin Matuska 		 */
6650716fd348SMartin Matuska 		offset = ztest_random(fsize / (leaves << bshift)) *
6651716fd348SMartin Matuska 		    (leaves << bshift) + (leaf << bshift) +
6652716fd348SMartin Matuska 		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
6653716fd348SMartin Matuska 
6654716fd348SMartin Matuska 		/*
6655716fd348SMartin Matuska 		 * Only allow damage to the labels at one end of the vdev.
6656716fd348SMartin Matuska 		 *
6657716fd348SMartin Matuska 		 * If all labels are damaged, the device will be totally
6658716fd348SMartin Matuska 		 * inaccessible, which will result in loss of data,
6659716fd348SMartin Matuska 		 * because we also damage (parts of) the other side of
6660716fd348SMartin Matuska 		 * the mirror/raidz.
6661716fd348SMartin Matuska 		 *
6662716fd348SMartin Matuska 		 * Additionally, we will always have both an even and an
6663716fd348SMartin Matuska 		 * odd label, so that we can handle crashes in the
6664716fd348SMartin Matuska 		 * middle of vdev_config_sync().
6665716fd348SMartin Matuska 		 */
6666716fd348SMartin Matuska 		if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
6667716fd348SMartin Matuska 			continue;
6668716fd348SMartin Matuska 
6669716fd348SMartin Matuska 		/*
6670716fd348SMartin Matuska 		 * The two end labels are stored at the "end" of the disk, but
6671716fd348SMartin Matuska 		 * the end of the disk (vdev_psize) is aligned to
6672716fd348SMartin Matuska 		 * sizeof (vdev_label_t).
6673716fd348SMartin Matuska 		 */
6674aca928a5SMartin Matuska 		uint64_t psize = P2ALIGN_TYPED(fsize, sizeof (vdev_label_t),
6675aca928a5SMartin Matuska 		    uint64_t);
6676716fd348SMartin Matuska 		if ((leaf & 1) == 1 &&
6677716fd348SMartin Matuska 		    offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
6678716fd348SMartin Matuska 			continue;
6679716fd348SMartin Matuska 
6680716fd348SMartin Matuska 		if (mirror_save != zs->zs_mirrors) {
6681716fd348SMartin Matuska 			(void) close(fd);
6682716fd348SMartin Matuska 			goto out;
6683716fd348SMartin Matuska 		}
6684716fd348SMartin Matuska 
6685716fd348SMartin Matuska 		if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
6686716fd348SMartin Matuska 			fatal(B_TRUE,
6687716fd348SMartin Matuska 			    "can't inject bad word at 0x%"PRIx64" in %s",
6688716fd348SMartin Matuska 			    offset, pathrand);
6689716fd348SMartin Matuska 
6690716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 7)
6691716fd348SMartin Matuska 			(void) printf("injected bad word into %s,"
6692716fd348SMartin Matuska 			    " offset 0x%"PRIx64"\n", pathrand, offset);
6693e716630dSMartin Matuska 
6694e716630dSMartin Matuska 		injected = B_TRUE;
6695716fd348SMartin Matuska 	}
6696716fd348SMartin Matuska 
6697716fd348SMartin Matuska 	(void) close(fd);
6698716fd348SMartin Matuska out:
6699e716630dSMartin Matuska 	mutex_exit(&ztest_vdev_lock);
6700e716630dSMartin Matuska 
6701e716630dSMartin Matuska 	if (injected && ztest_opts.zo_raid_do_expand) {
6702e716630dSMartin Matuska 		int error = spa_scan(spa, POOL_SCAN_SCRUB);
6703e716630dSMartin Matuska 		if (error == 0) {
6704e716630dSMartin Matuska 			while (dsl_scan_scrubbing(spa_get_dsl(spa)))
6705e716630dSMartin Matuska 				txg_wait_synced(spa_get_dsl(spa), 0);
6706e716630dSMartin Matuska 		}
6707e716630dSMartin Matuska 	}
6708e716630dSMartin Matuska 
6709716fd348SMartin Matuska 	umem_free(path0, MAXPATHLEN);
6710716fd348SMartin Matuska 	umem_free(pathrand, MAXPATHLEN);
6711716fd348SMartin Matuska }
6712716fd348SMartin Matuska 
6713716fd348SMartin Matuska /*
6714716fd348SMartin Matuska  * By design ztest will never inject uncorrectable damage in to the pool.
6715716fd348SMartin Matuska  * Issue a scrub, wait for it to complete, and verify there is never any
6716716fd348SMartin Matuska  * persistent damage.
6717716fd348SMartin Matuska  *
6718716fd348SMartin Matuska  * Only after a full scrub has been completed is it safe to start injecting
6719716fd348SMartin Matuska  * data corruption.  See the comment in zfs_fault_inject().
6720*87bf66d4SMartin Matuska  *
6721*87bf66d4SMartin Matuska  * EBUSY may be returned for the following six cases.  It's the callers
6722*87bf66d4SMartin Matuska  * responsibility to handle them accordingly.
6723*87bf66d4SMartin Matuska  *
6724*87bf66d4SMartin Matuska  * Current state                Requested
6725*87bf66d4SMartin Matuska  * 1. Normal Scrub Running      Normal Scrub or Error Scrub
6726*87bf66d4SMartin Matuska  * 2. Normal Scrub Paused       Error Scrub
6727*87bf66d4SMartin Matuska  * 3. Normal Scrub Paused       Pause Normal Scrub
6728*87bf66d4SMartin Matuska  * 4. Error Scrub Running       Normal Scrub or Error Scrub
6729*87bf66d4SMartin Matuska  * 5. Error Scrub Paused        Pause Error Scrub
6730*87bf66d4SMartin Matuska  * 6. Resilvering               Anything else
6731716fd348SMartin Matuska  */
6732716fd348SMartin Matuska static int
6733716fd348SMartin Matuska ztest_scrub_impl(spa_t *spa)
6734716fd348SMartin Matuska {
6735716fd348SMartin Matuska 	int error = spa_scan(spa, POOL_SCAN_SCRUB);
6736716fd348SMartin Matuska 	if (error)
6737716fd348SMartin Matuska 		return (error);
6738716fd348SMartin Matuska 
6739716fd348SMartin Matuska 	while (dsl_scan_scrubbing(spa_get_dsl(spa)))
6740716fd348SMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
6741716fd348SMartin Matuska 
674215f0b8c3SMartin Matuska 	if (spa_approx_errlog_size(spa) > 0)
6743716fd348SMartin Matuska 		return (ECKSUM);
6744716fd348SMartin Matuska 
6745716fd348SMartin Matuska 	ztest_pool_scrubbed = B_TRUE;
6746716fd348SMartin Matuska 
6747716fd348SMartin Matuska 	return (0);
6748716fd348SMartin Matuska }
6749716fd348SMartin Matuska 
6750716fd348SMartin Matuska /*
6751716fd348SMartin Matuska  * Scrub the pool.
6752716fd348SMartin Matuska  */
6753716fd348SMartin Matuska void
6754716fd348SMartin Matuska ztest_scrub(ztest_ds_t *zd, uint64_t id)
6755716fd348SMartin Matuska {
6756716fd348SMartin Matuska 	(void) zd, (void) id;
6757716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
6758716fd348SMartin Matuska 	int error;
6759716fd348SMartin Matuska 
6760716fd348SMartin Matuska 	/*
6761716fd348SMartin Matuska 	 * Scrub in progress by device removal.
6762716fd348SMartin Matuska 	 */
6763716fd348SMartin Matuska 	if (ztest_device_removal_active)
6764716fd348SMartin Matuska 		return;
6765716fd348SMartin Matuska 
6766716fd348SMartin Matuska 	/*
6767716fd348SMartin Matuska 	 * Start a scrub, wait a moment, then force a restart.
6768716fd348SMartin Matuska 	 */
6769716fd348SMartin Matuska 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
6770716fd348SMartin Matuska 	(void) poll(NULL, 0, 100);
6771716fd348SMartin Matuska 
6772716fd348SMartin Matuska 	error = ztest_scrub_impl(spa);
6773716fd348SMartin Matuska 	if (error == EBUSY)
6774716fd348SMartin Matuska 		error = 0;
6775716fd348SMartin Matuska 	ASSERT0(error);
6776716fd348SMartin Matuska }
6777716fd348SMartin Matuska 
6778716fd348SMartin Matuska /*
6779716fd348SMartin Matuska  * Change the guid for the pool.
6780716fd348SMartin Matuska  */
6781716fd348SMartin Matuska void
6782716fd348SMartin Matuska ztest_reguid(ztest_ds_t *zd, uint64_t id)
6783716fd348SMartin Matuska {
6784716fd348SMartin Matuska 	(void) zd, (void) id;
6785716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
6786716fd348SMartin Matuska 	uint64_t orig, load;
6787716fd348SMartin Matuska 	int error;
6788abcdc1b9SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
6789716fd348SMartin Matuska 
6790716fd348SMartin Matuska 	if (ztest_opts.zo_mmp_test)
6791716fd348SMartin Matuska 		return;
6792716fd348SMartin Matuska 
6793716fd348SMartin Matuska 	orig = spa_guid(spa);
6794716fd348SMartin Matuska 	load = spa_load_guid(spa);
6795716fd348SMartin Matuska 
6796716fd348SMartin Matuska 	(void) pthread_rwlock_wrlock(&ztest_name_lock);
6797e2df9bb4SMartin Matuska 	error = spa_change_guid(spa, NULL);
6798abcdc1b9SMartin Matuska 	zs->zs_guid = spa_guid(spa);
6799716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
6800716fd348SMartin Matuska 
6801716fd348SMartin Matuska 	if (error != 0)
6802716fd348SMartin Matuska 		return;
6803716fd348SMartin Matuska 
6804716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 4) {
6805716fd348SMartin Matuska 		(void) printf("Changed guid old %"PRIu64" -> %"PRIu64"\n",
6806716fd348SMartin Matuska 		    orig, spa_guid(spa));
6807716fd348SMartin Matuska 	}
6808716fd348SMartin Matuska 
6809716fd348SMartin Matuska 	VERIFY3U(orig, !=, spa_guid(spa));
6810716fd348SMartin Matuska 	VERIFY3U(load, ==, spa_load_guid(spa));
6811716fd348SMartin Matuska }
6812716fd348SMartin Matuska 
6813716fd348SMartin Matuska void
68141f1e2261SMartin Matuska ztest_blake3(ztest_ds_t *zd, uint64_t id)
68151f1e2261SMartin Matuska {
68161f1e2261SMartin Matuska 	(void) zd, (void) id;
68171f1e2261SMartin Matuska 	hrtime_t end = gethrtime() + NANOSEC;
68181f1e2261SMartin Matuska 	zio_cksum_salt_t salt;
68191f1e2261SMartin Matuska 	void *salt_ptr = &salt.zcs_bytes;
68201f1e2261SMartin Matuska 	struct abd *abd_data, *abd_meta;
68211f1e2261SMartin Matuska 	void *buf, *templ;
68221f1e2261SMartin Matuska 	int i, *ptr;
68231f1e2261SMartin Matuska 	uint32_t size;
68241f1e2261SMartin Matuska 	BLAKE3_CTX ctx;
68252a58b312SMartin Matuska 	const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
68261f1e2261SMartin Matuska 
68271f1e2261SMartin Matuska 	size = ztest_random_blocksize();
68281f1e2261SMartin Matuska 	buf = umem_alloc(size, UMEM_NOFAIL);
68291f1e2261SMartin Matuska 	abd_data = abd_alloc(size, B_FALSE);
68301f1e2261SMartin Matuska 	abd_meta = abd_alloc(size, B_TRUE);
68311f1e2261SMartin Matuska 
68321f1e2261SMartin Matuska 	for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
68331f1e2261SMartin Matuska 		*ptr = ztest_random(UINT_MAX);
68341f1e2261SMartin Matuska 	memset(salt_ptr, 'A', 32);
68351f1e2261SMartin Matuska 
68361f1e2261SMartin Matuska 	abd_copy_from_buf_off(abd_data, buf, 0, size);
68371f1e2261SMartin Matuska 	abd_copy_from_buf_off(abd_meta, buf, 0, size);
68381f1e2261SMartin Matuska 
68391f1e2261SMartin Matuska 	while (gethrtime() <= end) {
68401f1e2261SMartin Matuska 		int run_count = 100;
68411f1e2261SMartin Matuska 		zio_cksum_t zc_ref1, zc_ref2;
68421f1e2261SMartin Matuska 		zio_cksum_t zc_res1, zc_res2;
68431f1e2261SMartin Matuska 
68441f1e2261SMartin Matuska 		void *ref1 = &zc_ref1;
68451f1e2261SMartin Matuska 		void *ref2 = &zc_ref2;
68461f1e2261SMartin Matuska 		void *res1 = &zc_res1;
68471f1e2261SMartin Matuska 		void *res2 = &zc_res2;
68481f1e2261SMartin Matuska 
68491f1e2261SMartin Matuska 		/* BLAKE3_KEY_LEN = 32 */
68502a58b312SMartin Matuska 		VERIFY0(blake3->setname("generic"));
68511f1e2261SMartin Matuska 		templ = abd_checksum_blake3_tmpl_init(&salt);
68521f1e2261SMartin Matuska 		Blake3_InitKeyed(&ctx, salt_ptr);
68531f1e2261SMartin Matuska 		Blake3_Update(&ctx, buf, size);
68541f1e2261SMartin Matuska 		Blake3_Final(&ctx, ref1);
68551f1e2261SMartin Matuska 		zc_ref2 = zc_ref1;
68561f1e2261SMartin Matuska 		ZIO_CHECKSUM_BSWAP(&zc_ref2);
68571f1e2261SMartin Matuska 		abd_checksum_blake3_tmpl_free(templ);
68581f1e2261SMartin Matuska 
68592a58b312SMartin Matuska 		VERIFY0(blake3->setname("cycle"));
68601f1e2261SMartin Matuska 		while (run_count-- > 0) {
68611f1e2261SMartin Matuska 
68621f1e2261SMartin Matuska 			/* Test current implementation */
68631f1e2261SMartin Matuska 			Blake3_InitKeyed(&ctx, salt_ptr);
68641f1e2261SMartin Matuska 			Blake3_Update(&ctx, buf, size);
68651f1e2261SMartin Matuska 			Blake3_Final(&ctx, res1);
68661f1e2261SMartin Matuska 			zc_res2 = zc_res1;
68671f1e2261SMartin Matuska 			ZIO_CHECKSUM_BSWAP(&zc_res2);
68681f1e2261SMartin Matuska 
68691f1e2261SMartin Matuska 			VERIFY0(memcmp(ref1, res1, 32));
68701f1e2261SMartin Matuska 			VERIFY0(memcmp(ref2, res2, 32));
68711f1e2261SMartin Matuska 
68721f1e2261SMartin Matuska 			/* Test ABD - data */
68731f1e2261SMartin Matuska 			templ = abd_checksum_blake3_tmpl_init(&salt);
68741f1e2261SMartin Matuska 			abd_checksum_blake3_native(abd_data, size,
68751f1e2261SMartin Matuska 			    templ, &zc_res1);
68761f1e2261SMartin Matuska 			abd_checksum_blake3_byteswap(abd_data, size,
68771f1e2261SMartin Matuska 			    templ, &zc_res2);
68781f1e2261SMartin Matuska 
68791f1e2261SMartin Matuska 			VERIFY0(memcmp(ref1, res1, 32));
68801f1e2261SMartin Matuska 			VERIFY0(memcmp(ref2, res2, 32));
68811f1e2261SMartin Matuska 
68821f1e2261SMartin Matuska 			/* Test ABD - metadata */
68831f1e2261SMartin Matuska 			abd_checksum_blake3_native(abd_meta, size,
68841f1e2261SMartin Matuska 			    templ, &zc_res1);
68851f1e2261SMartin Matuska 			abd_checksum_blake3_byteswap(abd_meta, size,
68861f1e2261SMartin Matuska 			    templ, &zc_res2);
68871f1e2261SMartin Matuska 			abd_checksum_blake3_tmpl_free(templ);
68881f1e2261SMartin Matuska 
68891f1e2261SMartin Matuska 			VERIFY0(memcmp(ref1, res1, 32));
68901f1e2261SMartin Matuska 			VERIFY0(memcmp(ref2, res2, 32));
68911f1e2261SMartin Matuska 
68921f1e2261SMartin Matuska 		}
68931f1e2261SMartin Matuska 	}
68941f1e2261SMartin Matuska 
68951f1e2261SMartin Matuska 	abd_free(abd_data);
68961f1e2261SMartin Matuska 	abd_free(abd_meta);
68971f1e2261SMartin Matuska 	umem_free(buf, size);
68981f1e2261SMartin Matuska }
68991f1e2261SMartin Matuska 
69001f1e2261SMartin Matuska void
6901716fd348SMartin Matuska ztest_fletcher(ztest_ds_t *zd, uint64_t id)
6902716fd348SMartin Matuska {
6903716fd348SMartin Matuska 	(void) zd, (void) id;
6904716fd348SMartin Matuska 	hrtime_t end = gethrtime() + NANOSEC;
6905716fd348SMartin Matuska 
6906716fd348SMartin Matuska 	while (gethrtime() <= end) {
6907716fd348SMartin Matuska 		int run_count = 100;
6908716fd348SMartin Matuska 		void *buf;
6909716fd348SMartin Matuska 		struct abd *abd_data, *abd_meta;
6910716fd348SMartin Matuska 		uint32_t size;
6911716fd348SMartin Matuska 		int *ptr;
6912716fd348SMartin Matuska 		int i;
6913716fd348SMartin Matuska 		zio_cksum_t zc_ref;
6914716fd348SMartin Matuska 		zio_cksum_t zc_ref_byteswap;
6915716fd348SMartin Matuska 
6916716fd348SMartin Matuska 		size = ztest_random_blocksize();
6917716fd348SMartin Matuska 
6918716fd348SMartin Matuska 		buf = umem_alloc(size, UMEM_NOFAIL);
6919716fd348SMartin Matuska 		abd_data = abd_alloc(size, B_FALSE);
6920716fd348SMartin Matuska 		abd_meta = abd_alloc(size, B_TRUE);
6921716fd348SMartin Matuska 
6922716fd348SMartin Matuska 		for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
6923716fd348SMartin Matuska 			*ptr = ztest_random(UINT_MAX);
6924716fd348SMartin Matuska 
6925716fd348SMartin Matuska 		abd_copy_from_buf_off(abd_data, buf, 0, size);
6926716fd348SMartin Matuska 		abd_copy_from_buf_off(abd_meta, buf, 0, size);
6927716fd348SMartin Matuska 
6928716fd348SMartin Matuska 		VERIFY0(fletcher_4_impl_set("scalar"));
6929716fd348SMartin Matuska 		fletcher_4_native(buf, size, NULL, &zc_ref);
6930716fd348SMartin Matuska 		fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap);
6931716fd348SMartin Matuska 
6932716fd348SMartin Matuska 		VERIFY0(fletcher_4_impl_set("cycle"));
6933716fd348SMartin Matuska 		while (run_count-- > 0) {
6934716fd348SMartin Matuska 			zio_cksum_t zc;
6935716fd348SMartin Matuska 			zio_cksum_t zc_byteswap;
6936716fd348SMartin Matuska 
6937716fd348SMartin Matuska 			fletcher_4_byteswap(buf, size, NULL, &zc_byteswap);
6938716fd348SMartin Matuska 			fletcher_4_native(buf, size, NULL, &zc);
6939716fd348SMartin Matuska 
6940716fd348SMartin Matuska 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
6941716fd348SMartin Matuska 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
6942716fd348SMartin Matuska 			    sizeof (zc_byteswap)));
6943716fd348SMartin Matuska 
6944716fd348SMartin Matuska 			/* Test ABD - data */
6945716fd348SMartin Matuska 			abd_fletcher_4_byteswap(abd_data, size, NULL,
6946716fd348SMartin Matuska 			    &zc_byteswap);
6947716fd348SMartin Matuska 			abd_fletcher_4_native(abd_data, size, NULL, &zc);
6948716fd348SMartin Matuska 
6949716fd348SMartin Matuska 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
6950716fd348SMartin Matuska 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
6951716fd348SMartin Matuska 			    sizeof (zc_byteswap)));
6952716fd348SMartin Matuska 
6953716fd348SMartin Matuska 			/* Test ABD - metadata */
6954716fd348SMartin Matuska 			abd_fletcher_4_byteswap(abd_meta, size, NULL,
6955716fd348SMartin Matuska 			    &zc_byteswap);
6956716fd348SMartin Matuska 			abd_fletcher_4_native(abd_meta, size, NULL, &zc);
6957716fd348SMartin Matuska 
6958716fd348SMartin Matuska 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
6959716fd348SMartin Matuska 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
6960716fd348SMartin Matuska 			    sizeof (zc_byteswap)));
6961716fd348SMartin Matuska 
6962716fd348SMartin Matuska 		}
6963716fd348SMartin Matuska 
6964716fd348SMartin Matuska 		umem_free(buf, size);
6965716fd348SMartin Matuska 		abd_free(abd_data);
6966716fd348SMartin Matuska 		abd_free(abd_meta);
6967716fd348SMartin Matuska 	}
6968716fd348SMartin Matuska }
6969716fd348SMartin Matuska 
6970716fd348SMartin Matuska void
6971716fd348SMartin Matuska ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id)
6972716fd348SMartin Matuska {
6973716fd348SMartin Matuska 	(void) zd, (void) id;
6974716fd348SMartin Matuska 	void *buf;
6975716fd348SMartin Matuska 	size_t size;
6976716fd348SMartin Matuska 	int *ptr;
6977716fd348SMartin Matuska 	int i;
6978716fd348SMartin Matuska 	zio_cksum_t zc_ref;
6979716fd348SMartin Matuska 	zio_cksum_t zc_ref_bswap;
6980716fd348SMartin Matuska 
6981716fd348SMartin Matuska 	hrtime_t end = gethrtime() + NANOSEC;
6982716fd348SMartin Matuska 
6983716fd348SMartin Matuska 	while (gethrtime() <= end) {
6984716fd348SMartin Matuska 		int run_count = 100;
6985716fd348SMartin Matuska 
6986716fd348SMartin Matuska 		size = ztest_random_blocksize();
6987716fd348SMartin Matuska 		buf = umem_alloc(size, UMEM_NOFAIL);
6988716fd348SMartin Matuska 
6989716fd348SMartin Matuska 		for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
6990716fd348SMartin Matuska 			*ptr = ztest_random(UINT_MAX);
6991716fd348SMartin Matuska 
6992716fd348SMartin Matuska 		VERIFY0(fletcher_4_impl_set("scalar"));
6993716fd348SMartin Matuska 		fletcher_4_native(buf, size, NULL, &zc_ref);
6994716fd348SMartin Matuska 		fletcher_4_byteswap(buf, size, NULL, &zc_ref_bswap);
6995716fd348SMartin Matuska 
6996716fd348SMartin Matuska 		VERIFY0(fletcher_4_impl_set("cycle"));
6997716fd348SMartin Matuska 
6998716fd348SMartin Matuska 		while (run_count-- > 0) {
6999716fd348SMartin Matuska 			zio_cksum_t zc;
7000716fd348SMartin Matuska 			zio_cksum_t zc_bswap;
7001716fd348SMartin Matuska 			size_t pos = 0;
7002716fd348SMartin Matuska 
7003716fd348SMartin Matuska 			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
7004716fd348SMartin Matuska 			ZIO_SET_CHECKSUM(&zc_bswap, 0, 0, 0, 0);
7005716fd348SMartin Matuska 
7006716fd348SMartin Matuska 			while (pos < size) {
7007716fd348SMartin Matuska 				size_t inc = 64 * ztest_random(size / 67);
7008716fd348SMartin Matuska 				/* sometimes add few bytes to test non-simd */
7009716fd348SMartin Matuska 				if (ztest_random(100) < 10)
7010aca928a5SMartin Matuska 					inc += P2ALIGN_TYPED(ztest_random(64),
7011aca928a5SMartin Matuska 					    sizeof (uint32_t), uint64_t);
7012716fd348SMartin Matuska 
7013716fd348SMartin Matuska 				if (inc > (size - pos))
7014716fd348SMartin Matuska 					inc = size - pos;
7015716fd348SMartin Matuska 
7016716fd348SMartin Matuska 				fletcher_4_incremental_native(buf + pos, inc,
7017716fd348SMartin Matuska 				    &zc);
7018716fd348SMartin Matuska 				fletcher_4_incremental_byteswap(buf + pos, inc,
7019716fd348SMartin Matuska 				    &zc_bswap);
7020716fd348SMartin Matuska 
7021716fd348SMartin Matuska 				pos += inc;
7022716fd348SMartin Matuska 			}
7023716fd348SMartin Matuska 
7024716fd348SMartin Matuska 			VERIFY3U(pos, ==, size);
7025716fd348SMartin Matuska 
7026716fd348SMartin Matuska 			VERIFY(ZIO_CHECKSUM_EQUAL(zc, zc_ref));
7027716fd348SMartin Matuska 			VERIFY(ZIO_CHECKSUM_EQUAL(zc_bswap, zc_ref_bswap));
7028716fd348SMartin Matuska 
7029716fd348SMartin Matuska 			/*
7030716fd348SMartin Matuska 			 * verify if incremental on the whole buffer is
7031716fd348SMartin Matuska 			 * equivalent to non-incremental version
7032716fd348SMartin Matuska 			 */
7033716fd348SMartin Matuska 			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
7034716fd348SMartin Matuska 			ZIO_SET_CHECKSUM(&zc_bswap, 0, 0, 0, 0);
7035716fd348SMartin Matuska 
7036716fd348SMartin Matuska 			fletcher_4_incremental_native(buf, size, &zc);
7037716fd348SMartin Matuska 			fletcher_4_incremental_byteswap(buf, size, &zc_bswap);
7038716fd348SMartin Matuska 
7039716fd348SMartin Matuska 			VERIFY(ZIO_CHECKSUM_EQUAL(zc, zc_ref));
7040716fd348SMartin Matuska 			VERIFY(ZIO_CHECKSUM_EQUAL(zc_bswap, zc_ref_bswap));
7041716fd348SMartin Matuska 		}
7042716fd348SMartin Matuska 
7043716fd348SMartin Matuska 		umem_free(buf, size);
7044716fd348SMartin Matuska 	}
7045716fd348SMartin Matuska }
7046716fd348SMartin Matuska 
7047ce4dcb97SMartin Matuska void
7048ce4dcb97SMartin Matuska ztest_pool_prefetch_ddt(ztest_ds_t *zd, uint64_t id)
7049ce4dcb97SMartin Matuska {
7050ce4dcb97SMartin Matuska 	(void) zd, (void) id;
7051ce4dcb97SMartin Matuska 	spa_t *spa;
7052ce4dcb97SMartin Matuska 
7053ce4dcb97SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
7054ce4dcb97SMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
7055ce4dcb97SMartin Matuska 
7056ce4dcb97SMartin Matuska 	ddt_prefetch_all(spa);
7057ce4dcb97SMartin Matuska 
7058ce4dcb97SMartin Matuska 	spa_close(spa, FTAG);
7059ce4dcb97SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
7060ce4dcb97SMartin Matuska }
7061ce4dcb97SMartin Matuska 
7062716fd348SMartin Matuska static int
7063716fd348SMartin Matuska ztest_set_global_vars(void)
7064716fd348SMartin Matuska {
7065716fd348SMartin Matuska 	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
7066716fd348SMartin Matuska 		char *kv = ztest_opts.zo_gvars[i];
7067716fd348SMartin Matuska 		VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN);
7068716fd348SMartin Matuska 		VERIFY3U(strlen(kv), >, 0);
7069716fd348SMartin Matuska 		int err = set_global_var(kv);
7070716fd348SMartin Matuska 		if (ztest_opts.zo_verbose > 0) {
7071716fd348SMartin Matuska 			(void) printf("setting global var %s ... %s\n", kv,
7072716fd348SMartin Matuska 			    err ? "failed" : "ok");
7073716fd348SMartin Matuska 		}
7074716fd348SMartin Matuska 		if (err != 0) {
7075716fd348SMartin Matuska 			(void) fprintf(stderr,
7076716fd348SMartin Matuska 			    "failed to set global var '%s'\n", kv);
7077716fd348SMartin Matuska 			return (err);
7078716fd348SMartin Matuska 		}
7079716fd348SMartin Matuska 	}
7080716fd348SMartin Matuska 	return (0);
7081716fd348SMartin Matuska }
7082716fd348SMartin Matuska 
7083716fd348SMartin Matuska static char **
7084716fd348SMartin Matuska ztest_global_vars_to_zdb_args(void)
7085716fd348SMartin Matuska {
7086716fd348SMartin Matuska 	char **args = calloc(2*ztest_opts.zo_gvars_count + 1, sizeof (char *));
7087716fd348SMartin Matuska 	char **cur = args;
7088be181ee2SMartin Matuska 	if (args == NULL)
7089be181ee2SMartin Matuska 		return (NULL);
7090716fd348SMartin Matuska 	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
7091a0b956f5SMartin Matuska 		*cur++ = (char *)"-o";
7092a0b956f5SMartin Matuska 		*cur++ = ztest_opts.zo_gvars[i];
7093716fd348SMartin Matuska 	}
7094716fd348SMartin Matuska 	ASSERT3P(cur, ==, &args[2*ztest_opts.zo_gvars_count]);
7095716fd348SMartin Matuska 	*cur = NULL;
7096716fd348SMartin Matuska 	return (args);
7097716fd348SMartin Matuska }
7098716fd348SMartin Matuska 
7099716fd348SMartin Matuska /* The end of strings is indicated by a NULL element */
7100716fd348SMartin Matuska static char *
7101716fd348SMartin Matuska join_strings(char **strings, const char *sep)
7102716fd348SMartin Matuska {
7103716fd348SMartin Matuska 	size_t totallen = 0;
7104716fd348SMartin Matuska 	for (char **sp = strings; *sp != NULL; sp++) {
7105716fd348SMartin Matuska 		totallen += strlen(*sp);
7106716fd348SMartin Matuska 		totallen += strlen(sep);
7107716fd348SMartin Matuska 	}
7108716fd348SMartin Matuska 	if (totallen > 0) {
7109716fd348SMartin Matuska 		ASSERT(totallen >= strlen(sep));
7110716fd348SMartin Matuska 		totallen -= strlen(sep);
7111716fd348SMartin Matuska 	}
7112716fd348SMartin Matuska 
7113716fd348SMartin Matuska 	size_t buflen = totallen + 1;
7114dbd5678dSMartin Matuska 	char *o = umem_alloc(buflen, UMEM_NOFAIL); /* trailing 0 byte */
7115716fd348SMartin Matuska 	o[0] = '\0';
7116716fd348SMartin Matuska 	for (char **sp = strings; *sp != NULL; sp++) {
7117716fd348SMartin Matuska 		size_t would;
7118716fd348SMartin Matuska 		would = strlcat(o, *sp, buflen);
7119716fd348SMartin Matuska 		VERIFY3U(would, <, buflen);
7120716fd348SMartin Matuska 		if (*(sp+1) == NULL) {
7121716fd348SMartin Matuska 			break;
7122716fd348SMartin Matuska 		}
7123716fd348SMartin Matuska 		would = strlcat(o, sep, buflen);
7124716fd348SMartin Matuska 		VERIFY3U(would, <, buflen);
7125716fd348SMartin Matuska 	}
7126716fd348SMartin Matuska 	ASSERT3S(strlen(o), ==, totallen);
7127716fd348SMartin Matuska 	return (o);
7128716fd348SMartin Matuska }
7129716fd348SMartin Matuska 
7130716fd348SMartin Matuska static int
7131716fd348SMartin Matuska ztest_check_path(char *path)
7132716fd348SMartin Matuska {
7133716fd348SMartin Matuska 	struct stat s;
7134716fd348SMartin Matuska 	/* return true on success */
7135716fd348SMartin Matuska 	return (!stat(path, &s));
7136716fd348SMartin Matuska }
7137716fd348SMartin Matuska 
7138716fd348SMartin Matuska static void
7139716fd348SMartin Matuska ztest_get_zdb_bin(char *bin, int len)
7140716fd348SMartin Matuska {
7141716fd348SMartin Matuska 	char *zdb_path;
7142716fd348SMartin Matuska 	/*
7143716fd348SMartin Matuska 	 * Try to use $ZDB and in-tree zdb path. If not successful, just
7144716fd348SMartin Matuska 	 * let popen to search through PATH.
7145716fd348SMartin Matuska 	 */
7146716fd348SMartin Matuska 	if ((zdb_path = getenv("ZDB"))) {
7147716fd348SMartin Matuska 		strlcpy(bin, zdb_path, len); /* In env */
7148716fd348SMartin Matuska 		if (!ztest_check_path(bin)) {
7149716fd348SMartin Matuska 			ztest_dump_core = 0;
7150716fd348SMartin Matuska 			fatal(B_TRUE, "invalid ZDB '%s'", bin);
7151716fd348SMartin Matuska 		}
7152716fd348SMartin Matuska 		return;
7153716fd348SMartin Matuska 	}
7154716fd348SMartin Matuska 
7155716fd348SMartin Matuska 	VERIFY3P(realpath(getexecname(), bin), !=, NULL);
7156716fd348SMartin Matuska 	if (strstr(bin, ".libs/ztest")) {
7157716fd348SMartin Matuska 		strstr(bin, ".libs/ztest")[0] = '\0'; /* In-tree */
7158716fd348SMartin Matuska 		strcat(bin, "zdb");
7159716fd348SMartin Matuska 		if (ztest_check_path(bin))
7160716fd348SMartin Matuska 			return;
7161716fd348SMartin Matuska 	}
7162716fd348SMartin Matuska 	strcpy(bin, "zdb");
7163716fd348SMartin Matuska }
7164716fd348SMartin Matuska 
7165716fd348SMartin Matuska static vdev_t *
7166716fd348SMartin Matuska ztest_random_concrete_vdev_leaf(vdev_t *vd)
7167716fd348SMartin Matuska {
7168716fd348SMartin Matuska 	if (vd == NULL)
7169716fd348SMartin Matuska 		return (NULL);
7170716fd348SMartin Matuska 
7171716fd348SMartin Matuska 	if (vd->vdev_children == 0)
7172716fd348SMartin Matuska 		return (vd);
7173716fd348SMartin Matuska 
7174716fd348SMartin Matuska 	vdev_t *eligible[vd->vdev_children];
7175716fd348SMartin Matuska 	int eligible_idx = 0, i;
7176716fd348SMartin Matuska 	for (i = 0; i < vd->vdev_children; i++) {
7177716fd348SMartin Matuska 		vdev_t *cvd = vd->vdev_child[i];
7178716fd348SMartin Matuska 		if (cvd->vdev_top->vdev_removing)
7179716fd348SMartin Matuska 			continue;
7180716fd348SMartin Matuska 		if (cvd->vdev_children > 0 ||
7181716fd348SMartin Matuska 		    (vdev_is_concrete(cvd) && !cvd->vdev_detached)) {
7182716fd348SMartin Matuska 			eligible[eligible_idx++] = cvd;
7183716fd348SMartin Matuska 		}
7184716fd348SMartin Matuska 	}
7185716fd348SMartin Matuska 	VERIFY3S(eligible_idx, >, 0);
7186716fd348SMartin Matuska 
7187716fd348SMartin Matuska 	uint64_t child_no = ztest_random(eligible_idx);
7188716fd348SMartin Matuska 	return (ztest_random_concrete_vdev_leaf(eligible[child_no]));
7189716fd348SMartin Matuska }
7190716fd348SMartin Matuska 
7191716fd348SMartin Matuska void
7192716fd348SMartin Matuska ztest_initialize(ztest_ds_t *zd, uint64_t id)
7193716fd348SMartin Matuska {
7194716fd348SMartin Matuska 	(void) zd, (void) id;
7195716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
7196716fd348SMartin Matuska 	int error = 0;
7197716fd348SMartin Matuska 
7198716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
7199716fd348SMartin Matuska 
7200716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
7201716fd348SMartin Matuska 
7202716fd348SMartin Matuska 	/* Random leaf vdev */
7203716fd348SMartin Matuska 	vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev);
7204716fd348SMartin Matuska 	if (rand_vd == NULL) {
7205716fd348SMartin Matuska 		spa_config_exit(spa, SCL_VDEV, FTAG);
7206716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
7207716fd348SMartin Matuska 		return;
7208716fd348SMartin Matuska 	}
7209716fd348SMartin Matuska 
7210716fd348SMartin Matuska 	/*
7211716fd348SMartin Matuska 	 * The random vdev we've selected may change as soon as we
7212716fd348SMartin Matuska 	 * drop the spa_config_lock. We create local copies of things
7213716fd348SMartin Matuska 	 * we're interested in.
7214716fd348SMartin Matuska 	 */
7215716fd348SMartin Matuska 	uint64_t guid = rand_vd->vdev_guid;
7216716fd348SMartin Matuska 	char *path = strdup(rand_vd->vdev_path);
7217716fd348SMartin Matuska 	boolean_t active = rand_vd->vdev_initialize_thread != NULL;
7218716fd348SMartin Matuska 
7219716fd348SMartin Matuska 	zfs_dbgmsg("vd %px, guid %llu", rand_vd, (u_longlong_t)guid);
7220716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
7221716fd348SMartin Matuska 
7222716fd348SMartin Matuska 	uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
7223716fd348SMartin Matuska 
7224716fd348SMartin Matuska 	nvlist_t *vdev_guids = fnvlist_alloc();
7225716fd348SMartin Matuska 	nvlist_t *vdev_errlist = fnvlist_alloc();
7226716fd348SMartin Matuska 	fnvlist_add_uint64(vdev_guids, path, guid);
7227716fd348SMartin Matuska 	error = spa_vdev_initialize(spa, vdev_guids, cmd, vdev_errlist);
7228716fd348SMartin Matuska 	fnvlist_free(vdev_guids);
7229716fd348SMartin Matuska 	fnvlist_free(vdev_errlist);
7230716fd348SMartin Matuska 
7231716fd348SMartin Matuska 	switch (cmd) {
7232716fd348SMartin Matuska 	case POOL_INITIALIZE_CANCEL:
7233716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7234716fd348SMartin Matuska 			(void) printf("Cancel initialize %s", path);
7235716fd348SMartin Matuska 			if (!active)
7236716fd348SMartin Matuska 				(void) printf(" failed (no initialize active)");
7237716fd348SMartin Matuska 			(void) printf("\n");
7238716fd348SMartin Matuska 		}
7239716fd348SMartin Matuska 		break;
7240716fd348SMartin Matuska 	case POOL_INITIALIZE_START:
7241716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7242716fd348SMartin Matuska 			(void) printf("Start initialize %s", path);
7243716fd348SMartin Matuska 			if (active && error == 0)
7244716fd348SMartin Matuska 				(void) printf(" failed (already active)");
7245716fd348SMartin Matuska 			else if (error != 0)
7246716fd348SMartin Matuska 				(void) printf(" failed (error %d)", error);
7247716fd348SMartin Matuska 			(void) printf("\n");
7248716fd348SMartin Matuska 		}
7249716fd348SMartin Matuska 		break;
7250716fd348SMartin Matuska 	case POOL_INITIALIZE_SUSPEND:
7251716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7252716fd348SMartin Matuska 			(void) printf("Suspend initialize %s", path);
7253716fd348SMartin Matuska 			if (!active)
7254716fd348SMartin Matuska 				(void) printf(" failed (no initialize active)");
7255716fd348SMartin Matuska 			(void) printf("\n");
7256716fd348SMartin Matuska 		}
7257716fd348SMartin Matuska 		break;
7258716fd348SMartin Matuska 	}
7259716fd348SMartin Matuska 	free(path);
7260716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
7261716fd348SMartin Matuska }
7262716fd348SMartin Matuska 
7263716fd348SMartin Matuska void
7264716fd348SMartin Matuska ztest_trim(ztest_ds_t *zd, uint64_t id)
7265716fd348SMartin Matuska {
7266716fd348SMartin Matuska 	(void) zd, (void) id;
7267716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
7268716fd348SMartin Matuska 	int error = 0;
7269716fd348SMartin Matuska 
7270716fd348SMartin Matuska 	mutex_enter(&ztest_vdev_lock);
7271716fd348SMartin Matuska 
7272716fd348SMartin Matuska 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
7273716fd348SMartin Matuska 
7274716fd348SMartin Matuska 	/* Random leaf vdev */
7275716fd348SMartin Matuska 	vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev);
7276716fd348SMartin Matuska 	if (rand_vd == NULL) {
7277716fd348SMartin Matuska 		spa_config_exit(spa, SCL_VDEV, FTAG);
7278716fd348SMartin Matuska 		mutex_exit(&ztest_vdev_lock);
7279716fd348SMartin Matuska 		return;
7280716fd348SMartin Matuska 	}
7281716fd348SMartin Matuska 
7282716fd348SMartin Matuska 	/*
7283716fd348SMartin Matuska 	 * The random vdev we've selected may change as soon as we
7284716fd348SMartin Matuska 	 * drop the spa_config_lock. We create local copies of things
7285716fd348SMartin Matuska 	 * we're interested in.
7286716fd348SMartin Matuska 	 */
7287716fd348SMartin Matuska 	uint64_t guid = rand_vd->vdev_guid;
7288716fd348SMartin Matuska 	char *path = strdup(rand_vd->vdev_path);
7289716fd348SMartin Matuska 	boolean_t active = rand_vd->vdev_trim_thread != NULL;
7290716fd348SMartin Matuska 
7291716fd348SMartin Matuska 	zfs_dbgmsg("vd %p, guid %llu", rand_vd, (u_longlong_t)guid);
7292716fd348SMartin Matuska 	spa_config_exit(spa, SCL_VDEV, FTAG);
7293716fd348SMartin Matuska 
7294716fd348SMartin Matuska 	uint64_t cmd = ztest_random(POOL_TRIM_FUNCS);
7295716fd348SMartin Matuska 	uint64_t rate = 1 << ztest_random(30);
7296716fd348SMartin Matuska 	boolean_t partial = (ztest_random(5) > 0);
7297716fd348SMartin Matuska 	boolean_t secure = (ztest_random(5) > 0);
7298716fd348SMartin Matuska 
7299716fd348SMartin Matuska 	nvlist_t *vdev_guids = fnvlist_alloc();
7300716fd348SMartin Matuska 	nvlist_t *vdev_errlist = fnvlist_alloc();
7301716fd348SMartin Matuska 	fnvlist_add_uint64(vdev_guids, path, guid);
7302716fd348SMartin Matuska 	error = spa_vdev_trim(spa, vdev_guids, cmd, rate, partial,
7303716fd348SMartin Matuska 	    secure, vdev_errlist);
7304716fd348SMartin Matuska 	fnvlist_free(vdev_guids);
7305716fd348SMartin Matuska 	fnvlist_free(vdev_errlist);
7306716fd348SMartin Matuska 
7307716fd348SMartin Matuska 	switch (cmd) {
7308716fd348SMartin Matuska 	case POOL_TRIM_CANCEL:
7309716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7310716fd348SMartin Matuska 			(void) printf("Cancel TRIM %s", path);
7311716fd348SMartin Matuska 			if (!active)
7312716fd348SMartin Matuska 				(void) printf(" failed (no TRIM active)");
7313716fd348SMartin Matuska 			(void) printf("\n");
7314716fd348SMartin Matuska 		}
7315716fd348SMartin Matuska 		break;
7316716fd348SMartin Matuska 	case POOL_TRIM_START:
7317716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7318716fd348SMartin Matuska 			(void) printf("Start TRIM %s", path);
7319716fd348SMartin Matuska 			if (active && error == 0)
7320716fd348SMartin Matuska 				(void) printf(" failed (already active)");
7321716fd348SMartin Matuska 			else if (error != 0)
7322716fd348SMartin Matuska 				(void) printf(" failed (error %d)", error);
7323716fd348SMartin Matuska 			(void) printf("\n");
7324716fd348SMartin Matuska 		}
7325716fd348SMartin Matuska 		break;
7326716fd348SMartin Matuska 	case POOL_TRIM_SUSPEND:
7327716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 4) {
7328716fd348SMartin Matuska 			(void) printf("Suspend TRIM %s", path);
7329716fd348SMartin Matuska 			if (!active)
7330716fd348SMartin Matuska 				(void) printf(" failed (no TRIM active)");
7331716fd348SMartin Matuska 			(void) printf("\n");
7332716fd348SMartin Matuska 		}
7333716fd348SMartin Matuska 		break;
7334716fd348SMartin Matuska 	}
7335716fd348SMartin Matuska 	free(path);
7336716fd348SMartin Matuska 	mutex_exit(&ztest_vdev_lock);
7337716fd348SMartin Matuska }
7338716fd348SMartin Matuska 
7339e2df9bb4SMartin Matuska void
7340e2df9bb4SMartin Matuska ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
7341e2df9bb4SMartin Matuska {
7342e2df9bb4SMartin Matuska 	(void) zd, (void) id;
7343e2df9bb4SMartin Matuska 
7344e2df9bb4SMartin Matuska 	spa_t *spa = ztest_spa;
7345e2df9bb4SMartin Matuska 	uint64_t pct = ztest_random(15) + 1;
7346e2df9bb4SMartin Matuska 
7347e2df9bb4SMartin Matuska 	(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
7348e2df9bb4SMartin Matuska }
7349e2df9bb4SMartin Matuska 
7350716fd348SMartin Matuska /*
7351716fd348SMartin Matuska  * Verify pool integrity by running zdb.
7352716fd348SMartin Matuska  */
7353716fd348SMartin Matuska static void
7354abcdc1b9SMartin Matuska ztest_run_zdb(uint64_t guid)
7355716fd348SMartin Matuska {
7356716fd348SMartin Matuska 	int status;
7357716fd348SMartin Matuska 	char *bin;
7358716fd348SMartin Matuska 	char *zdb;
7359716fd348SMartin Matuska 	char *zbuf;
7360716fd348SMartin Matuska 	const int len = MAXPATHLEN + MAXNAMELEN + 20;
7361716fd348SMartin Matuska 	FILE *fp;
7362716fd348SMartin Matuska 
7363716fd348SMartin Matuska 	bin = umem_alloc(len, UMEM_NOFAIL);
7364716fd348SMartin Matuska 	zdb = umem_alloc(len, UMEM_NOFAIL);
7365716fd348SMartin Matuska 	zbuf = umem_alloc(1024, UMEM_NOFAIL);
7366716fd348SMartin Matuska 
7367716fd348SMartin Matuska 	ztest_get_zdb_bin(bin, len);
7368716fd348SMartin Matuska 
7369716fd348SMartin Matuska 	char **set_gvars_args = ztest_global_vars_to_zdb_args();
7370be181ee2SMartin Matuska 	if (set_gvars_args == NULL) {
7371be181ee2SMartin Matuska 		fatal(B_FALSE, "Failed to allocate memory in "
7372be181ee2SMartin Matuska 		    "ztest_global_vars_to_zdb_args(). Cannot run zdb.\n");
7373be181ee2SMartin Matuska 	}
7374716fd348SMartin Matuska 	char *set_gvars_args_joined = join_strings(set_gvars_args, " ");
7375716fd348SMartin Matuska 	free(set_gvars_args);
7376716fd348SMartin Matuska 
7377716fd348SMartin Matuska 	size_t would = snprintf(zdb, len,
7378abcdc1b9SMartin Matuska 	    "%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64,
7379716fd348SMartin Matuska 	    bin,
7380716fd348SMartin Matuska 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
7381716fd348SMartin Matuska 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
7382716fd348SMartin Matuska 	    set_gvars_args_joined,
7383716fd348SMartin Matuska 	    ztest_opts.zo_dir,
7384abcdc1b9SMartin Matuska 	    guid);
7385716fd348SMartin Matuska 	ASSERT3U(would, <, len);
7386716fd348SMartin Matuska 
7387dbd5678dSMartin Matuska 	umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1);
7388716fd348SMartin Matuska 
7389716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 5)
7390716fd348SMartin Matuska 		(void) printf("Executing %s\n", zdb);
7391716fd348SMartin Matuska 
7392716fd348SMartin Matuska 	fp = popen(zdb, "r");
7393716fd348SMartin Matuska 
7394716fd348SMartin Matuska 	while (fgets(zbuf, 1024, fp) != NULL)
7395716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 3)
7396716fd348SMartin Matuska 			(void) printf("%s", zbuf);
7397716fd348SMartin Matuska 
7398716fd348SMartin Matuska 	status = pclose(fp);
7399716fd348SMartin Matuska 
7400716fd348SMartin Matuska 	if (status == 0)
7401716fd348SMartin Matuska 		goto out;
7402716fd348SMartin Matuska 
7403716fd348SMartin Matuska 	ztest_dump_core = 0;
7404716fd348SMartin Matuska 	if (WIFEXITED(status))
7405716fd348SMartin Matuska 		fatal(B_FALSE, "'%s' exit code %d", zdb, WEXITSTATUS(status));
7406716fd348SMartin Matuska 	else
7407716fd348SMartin Matuska 		fatal(B_FALSE, "'%s' died with signal %d",
7408716fd348SMartin Matuska 		    zdb, WTERMSIG(status));
7409716fd348SMartin Matuska out:
7410716fd348SMartin Matuska 	umem_free(bin, len);
7411716fd348SMartin Matuska 	umem_free(zdb, len);
7412716fd348SMartin Matuska 	umem_free(zbuf, 1024);
7413716fd348SMartin Matuska }
7414716fd348SMartin Matuska 
7415716fd348SMartin Matuska static void
7416a0b956f5SMartin Matuska ztest_walk_pool_directory(const char *header)
7417716fd348SMartin Matuska {
7418716fd348SMartin Matuska 	spa_t *spa = NULL;
7419716fd348SMartin Matuska 
7420716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6)
7421a0b956f5SMartin Matuska 		(void) puts(header);
7422716fd348SMartin Matuska 
7423716fd348SMartin Matuska 	mutex_enter(&spa_namespace_lock);
7424716fd348SMartin Matuska 	while ((spa = spa_next(spa)) != NULL)
7425716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 6)
7426716fd348SMartin Matuska 			(void) printf("\t%s\n", spa_name(spa));
7427716fd348SMartin Matuska 	mutex_exit(&spa_namespace_lock);
7428716fd348SMartin Matuska }
7429716fd348SMartin Matuska 
7430716fd348SMartin Matuska static void
7431716fd348SMartin Matuska ztest_spa_import_export(char *oldname, char *newname)
7432716fd348SMartin Matuska {
7433716fd348SMartin Matuska 	nvlist_t *config, *newconfig;
7434716fd348SMartin Matuska 	uint64_t pool_guid;
7435716fd348SMartin Matuska 	spa_t *spa;
7436716fd348SMartin Matuska 	int error;
7437716fd348SMartin Matuska 
7438716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 4) {
7439716fd348SMartin Matuska 		(void) printf("import/export: old = %s, new = %s\n",
7440716fd348SMartin Matuska 		    oldname, newname);
7441716fd348SMartin Matuska 	}
7442716fd348SMartin Matuska 
7443716fd348SMartin Matuska 	/*
7444716fd348SMartin Matuska 	 * Clean up from previous runs.
7445716fd348SMartin Matuska 	 */
7446716fd348SMartin Matuska 	(void) spa_destroy(newname);
7447716fd348SMartin Matuska 
7448716fd348SMartin Matuska 	/*
7449716fd348SMartin Matuska 	 * Get the pool's configuration and guid.
7450716fd348SMartin Matuska 	 */
7451716fd348SMartin Matuska 	VERIFY0(spa_open(oldname, &spa, FTAG));
7452716fd348SMartin Matuska 
7453716fd348SMartin Matuska 	/*
7454716fd348SMartin Matuska 	 * Kick off a scrub to tickle scrub/export races.
7455716fd348SMartin Matuska 	 */
7456716fd348SMartin Matuska 	if (ztest_random(2) == 0)
7457716fd348SMartin Matuska 		(void) spa_scan(spa, POOL_SCAN_SCRUB);
7458716fd348SMartin Matuska 
7459716fd348SMartin Matuska 	pool_guid = spa_guid(spa);
7460716fd348SMartin Matuska 	spa_close(spa, FTAG);
7461716fd348SMartin Matuska 
7462716fd348SMartin Matuska 	ztest_walk_pool_directory("pools before export");
7463716fd348SMartin Matuska 
7464716fd348SMartin Matuska 	/*
7465716fd348SMartin Matuska 	 * Export it.
7466716fd348SMartin Matuska 	 */
7467716fd348SMartin Matuska 	VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE));
7468716fd348SMartin Matuska 
7469716fd348SMartin Matuska 	ztest_walk_pool_directory("pools after export");
7470716fd348SMartin Matuska 
7471716fd348SMartin Matuska 	/*
7472716fd348SMartin Matuska 	 * Try to import it.
7473716fd348SMartin Matuska 	 */
7474716fd348SMartin Matuska 	newconfig = spa_tryimport(config);
7475716fd348SMartin Matuska 	ASSERT3P(newconfig, !=, NULL);
7476716fd348SMartin Matuska 	fnvlist_free(newconfig);
7477716fd348SMartin Matuska 
7478716fd348SMartin Matuska 	/*
7479716fd348SMartin Matuska 	 * Import it under the new name.
7480716fd348SMartin Matuska 	 */
7481716fd348SMartin Matuska 	error = spa_import(newname, config, NULL, 0);
7482716fd348SMartin Matuska 	if (error != 0) {
7483716fd348SMartin Matuska 		dump_nvlist(config, 0);
7484716fd348SMartin Matuska 		fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
7485716fd348SMartin Matuska 		    oldname, newname, error);
7486716fd348SMartin Matuska 	}
7487716fd348SMartin Matuska 
7488716fd348SMartin Matuska 	ztest_walk_pool_directory("pools after import");
7489716fd348SMartin Matuska 
7490716fd348SMartin Matuska 	/*
7491716fd348SMartin Matuska 	 * Try to import it again -- should fail with EEXIST.
7492716fd348SMartin Matuska 	 */
7493716fd348SMartin Matuska 	VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
7494716fd348SMartin Matuska 
7495716fd348SMartin Matuska 	/*
7496716fd348SMartin Matuska 	 * Try to import it under a different name -- should fail with EEXIST.
7497716fd348SMartin Matuska 	 */
7498716fd348SMartin Matuska 	VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
7499716fd348SMartin Matuska 
7500716fd348SMartin Matuska 	/*
7501716fd348SMartin Matuska 	 * Verify that the pool is no longer visible under the old name.
7502716fd348SMartin Matuska 	 */
7503716fd348SMartin Matuska 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
7504716fd348SMartin Matuska 
7505716fd348SMartin Matuska 	/*
7506716fd348SMartin Matuska 	 * Verify that we can open and close the pool using the new name.
7507716fd348SMartin Matuska 	 */
7508716fd348SMartin Matuska 	VERIFY0(spa_open(newname, &spa, FTAG));
7509716fd348SMartin Matuska 	ASSERT3U(pool_guid, ==, spa_guid(spa));
7510716fd348SMartin Matuska 	spa_close(spa, FTAG);
7511716fd348SMartin Matuska 
7512716fd348SMartin Matuska 	fnvlist_free(config);
7513716fd348SMartin Matuska }
7514716fd348SMartin Matuska 
7515716fd348SMartin Matuska static void
7516716fd348SMartin Matuska ztest_resume(spa_t *spa)
7517716fd348SMartin Matuska {
7518716fd348SMartin Matuska 	if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6)
7519716fd348SMartin Matuska 		(void) printf("resuming from suspended state\n");
7520716fd348SMartin Matuska 	spa_vdev_state_enter(spa, SCL_NONE);
7521716fd348SMartin Matuska 	vdev_clear(spa, NULL);
7522716fd348SMartin Matuska 	(void) spa_vdev_state_exit(spa, NULL, 0);
7523716fd348SMartin Matuska 	(void) zio_resume(spa);
7524716fd348SMartin Matuska }
7525716fd348SMartin Matuska 
7526716fd348SMartin Matuska static __attribute__((noreturn)) void
7527716fd348SMartin Matuska ztest_resume_thread(void *arg)
7528716fd348SMartin Matuska {
7529716fd348SMartin Matuska 	spa_t *spa = arg;
7530716fd348SMartin Matuska 
7531e2df9bb4SMartin Matuska 	/*
7532e2df9bb4SMartin Matuska 	 * Synthesize aged DDT entries for ddt prune testing
7533e2df9bb4SMartin Matuska 	 */
7534e2df9bb4SMartin Matuska 	ddt_prune_artificial_age = B_TRUE;
7535e2df9bb4SMartin Matuska 	if (ztest_opts.zo_verbose >= 3)
7536e2df9bb4SMartin Matuska 		ddt_dump_prune_histogram = B_TRUE;
7537e2df9bb4SMartin Matuska 
7538716fd348SMartin Matuska 	while (!ztest_exiting) {
7539716fd348SMartin Matuska 		if (spa_suspended(spa))
7540716fd348SMartin Matuska 			ztest_resume(spa);
7541716fd348SMartin Matuska 		(void) poll(NULL, 0, 100);
7542716fd348SMartin Matuska 
7543716fd348SMartin Matuska 		/*
7544716fd348SMartin Matuska 		 * Periodically change the zfs_compressed_arc_enabled setting.
7545716fd348SMartin Matuska 		 */
7546716fd348SMartin Matuska 		if (ztest_random(10) == 0)
7547716fd348SMartin Matuska 			zfs_compressed_arc_enabled = ztest_random(2);
7548716fd348SMartin Matuska 
7549716fd348SMartin Matuska 		/*
7550716fd348SMartin Matuska 		 * Periodically change the zfs_abd_scatter_enabled setting.
7551716fd348SMartin Matuska 		 */
7552716fd348SMartin Matuska 		if (ztest_random(10) == 0)
7553716fd348SMartin Matuska 			zfs_abd_scatter_enabled = ztest_random(2);
7554716fd348SMartin Matuska 	}
7555716fd348SMartin Matuska 
7556716fd348SMartin Matuska 	thread_exit();
7557716fd348SMartin Matuska }
7558716fd348SMartin Matuska 
7559716fd348SMartin Matuska static __attribute__((noreturn)) void
7560716fd348SMartin Matuska ztest_deadman_thread(void *arg)
7561716fd348SMartin Matuska {
7562716fd348SMartin Matuska 	ztest_shared_t *zs = arg;
7563716fd348SMartin Matuska 	spa_t *spa = ztest_spa;
7564716fd348SMartin Matuska 	hrtime_t delay, overdue, last_run = gethrtime();
7565716fd348SMartin Matuska 
7566716fd348SMartin Matuska 	delay = (zs->zs_thread_stop - zs->zs_thread_start) +
7567716fd348SMartin Matuska 	    MSEC2NSEC(zfs_deadman_synctime_ms);
7568716fd348SMartin Matuska 
7569716fd348SMartin Matuska 	while (!ztest_exiting) {
7570716fd348SMartin Matuska 		/*
7571716fd348SMartin Matuska 		 * Wait for the delay timer while checking occasionally
7572716fd348SMartin Matuska 		 * if we should stop.
7573716fd348SMartin Matuska 		 */
7574716fd348SMartin Matuska 		if (gethrtime() < last_run + delay) {
7575716fd348SMartin Matuska 			(void) poll(NULL, 0, 1000);
7576716fd348SMartin Matuska 			continue;
7577716fd348SMartin Matuska 		}
7578716fd348SMartin Matuska 
7579716fd348SMartin Matuska 		/*
7580716fd348SMartin Matuska 		 * If the pool is suspended then fail immediately. Otherwise,
7581716fd348SMartin Matuska 		 * check to see if the pool is making any progress. If
7582716fd348SMartin Matuska 		 * vdev_deadman() discovers that there hasn't been any recent
7583716fd348SMartin Matuska 		 * I/Os then it will end up aborting the tests.
7584716fd348SMartin Matuska 		 */
7585716fd348SMartin Matuska 		if (spa_suspended(spa) || spa->spa_root_vdev == NULL) {
7586716fd348SMartin Matuska 			fatal(B_FALSE,
7587dbd5678dSMartin Matuska 			    "aborting test after %llu seconds because "
7588716fd348SMartin Matuska 			    "pool has transitioned to a suspended state.",
7589dbd5678dSMartin Matuska 			    (u_longlong_t)zfs_deadman_synctime_ms / 1000);
7590716fd348SMartin Matuska 		}
7591716fd348SMartin Matuska 		vdev_deadman(spa->spa_root_vdev, FTAG);
7592716fd348SMartin Matuska 
7593716fd348SMartin Matuska 		/*
7594716fd348SMartin Matuska 		 * If the process doesn't complete within a grace period of
7595716fd348SMartin Matuska 		 * zfs_deadman_synctime_ms over the expected finish time,
7596716fd348SMartin Matuska 		 * then it may be hung and is terminated.
7597716fd348SMartin Matuska 		 */
7598716fd348SMartin Matuska 		overdue = zs->zs_proc_stop + MSEC2NSEC(zfs_deadman_synctime_ms);
7599716fd348SMartin Matuska 		if (gethrtime() > overdue) {
7600716fd348SMartin Matuska 			fatal(B_FALSE,
7601716fd348SMartin Matuska 			    "aborting test after %llu seconds because "
7602716fd348SMartin Matuska 			    "the process is overdue for termination.",
7603716fd348SMartin Matuska 			    (gethrtime() - zs->zs_proc_start) / NANOSEC);
7604716fd348SMartin Matuska 		}
7605716fd348SMartin Matuska 
7606716fd348SMartin Matuska 		(void) printf("ztest has been running for %lld seconds\n",
7607716fd348SMartin Matuska 		    (gethrtime() - zs->zs_proc_start) / NANOSEC);
7608716fd348SMartin Matuska 
7609716fd348SMartin Matuska 		last_run = gethrtime();
7610716fd348SMartin Matuska 		delay = MSEC2NSEC(zfs_deadman_checktime_ms);
7611716fd348SMartin Matuska 	}
7612716fd348SMartin Matuska 
7613716fd348SMartin Matuska 	thread_exit();
7614716fd348SMartin Matuska }
7615716fd348SMartin Matuska 
7616716fd348SMartin Matuska static void
7617716fd348SMartin Matuska ztest_execute(int test, ztest_info_t *zi, uint64_t id)
7618716fd348SMartin Matuska {
7619716fd348SMartin Matuska 	ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets];
7620716fd348SMartin Matuska 	ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test);
7621716fd348SMartin Matuska 	hrtime_t functime = gethrtime();
7622716fd348SMartin Matuska 	int i;
7623716fd348SMartin Matuska 
7624716fd348SMartin Matuska 	for (i = 0; i < zi->zi_iters; i++)
7625716fd348SMartin Matuska 		zi->zi_func(zd, id);
7626716fd348SMartin Matuska 
7627716fd348SMartin Matuska 	functime = gethrtime() - functime;
7628716fd348SMartin Matuska 
7629716fd348SMartin Matuska 	atomic_add_64(&zc->zc_count, 1);
7630716fd348SMartin Matuska 	atomic_add_64(&zc->zc_time, functime);
7631716fd348SMartin Matuska 
7632716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 4)
7633716fd348SMartin Matuska 		(void) printf("%6.2f sec in %s\n",
7634716fd348SMartin Matuska 		    (double)functime / NANOSEC, zi->zi_funcname);
7635716fd348SMartin Matuska }
7636716fd348SMartin Matuska 
7637e716630dSMartin Matuska typedef struct ztest_raidz_expand_io {
7638e716630dSMartin Matuska 	uint64_t	rzx_id;
7639e716630dSMartin Matuska 	uint64_t	rzx_amount;
7640e716630dSMartin Matuska 	uint64_t	rzx_bufsize;
7641e716630dSMartin Matuska 	const void	*rzx_buffer;
7642e716630dSMartin Matuska 	uint64_t	rzx_alloc_max;
7643e716630dSMartin Matuska 	spa_t		*rzx_spa;
7644e716630dSMartin Matuska } ztest_expand_io_t;
7645e716630dSMartin Matuska 
7646e716630dSMartin Matuska #undef OD_ARRAY_SIZE
7647e716630dSMartin Matuska #define	OD_ARRAY_SIZE	10
7648e716630dSMartin Matuska 
7649e716630dSMartin Matuska /*
7650e716630dSMartin Matuska  * Write a request amount of data to some dataset objects.
7651e716630dSMartin Matuska  * There will be ztest_opts.zo_threads count of these running in parallel.
7652e716630dSMartin Matuska  */
7653e716630dSMartin Matuska static __attribute__((noreturn)) void
7654e716630dSMartin Matuska ztest_rzx_thread(void *arg)
7655e716630dSMartin Matuska {
7656e716630dSMartin Matuska 	ztest_expand_io_t *info = (ztest_expand_io_t *)arg;
7657e716630dSMartin Matuska 	ztest_od_t *od;
7658e716630dSMartin Matuska 	int batchsize;
7659e716630dSMartin Matuska 	int od_size;
7660e716630dSMartin Matuska 	ztest_ds_t *zd = &ztest_ds[info->rzx_id % ztest_opts.zo_datasets];
7661e716630dSMartin Matuska 	spa_t *spa = info->rzx_spa;
7662e716630dSMartin Matuska 
7663e716630dSMartin Matuska 	od_size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
7664e716630dSMartin Matuska 	od = umem_alloc(od_size, UMEM_NOFAIL);
7665e716630dSMartin Matuska 	batchsize = OD_ARRAY_SIZE;
7666e716630dSMartin Matuska 
7667e716630dSMartin Matuska 	/* Create objects to write to */
7668e716630dSMartin Matuska 	for (int b = 0; b < batchsize; b++) {
7669e716630dSMartin Matuska 		ztest_od_init(od + b, info->rzx_id, FTAG, b,
7670e716630dSMartin Matuska 		    DMU_OT_UINT64_OTHER, 0, 0, 0);
7671e716630dSMartin Matuska 	}
7672e716630dSMartin Matuska 	if (ztest_object_init(zd, od, od_size, B_FALSE) != 0) {
7673e716630dSMartin Matuska 		umem_free(od, od_size);
7674e716630dSMartin Matuska 		thread_exit();
7675e716630dSMartin Matuska 	}
7676e716630dSMartin Matuska 
7677e716630dSMartin Matuska 	for (uint64_t offset = 0, written = 0; written < info->rzx_amount;
7678e716630dSMartin Matuska 	    offset += info->rzx_bufsize) {
7679e716630dSMartin Matuska 		/* write to 10 objects */
7680e716630dSMartin Matuska 		for (int i = 0; i < batchsize && written < info->rzx_amount;
7681e716630dSMartin Matuska 		    i++) {
7682e716630dSMartin Matuska 			(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
7683e716630dSMartin Matuska 			ztest_write(zd, od[i].od_object, offset,
7684e716630dSMartin Matuska 			    info->rzx_bufsize, info->rzx_buffer);
7685e716630dSMartin Matuska 			(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
7686e716630dSMartin Matuska 			written += info->rzx_bufsize;
7687e716630dSMartin Matuska 		}
7688e716630dSMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
7689e716630dSMartin Matuska 		/* due to inflation, we'll typically bail here */
7690e716630dSMartin Matuska 		if (metaslab_class_get_alloc(spa_normal_class(spa)) >
7691e716630dSMartin Matuska 		    info->rzx_alloc_max) {
7692e716630dSMartin Matuska 			break;
7693e716630dSMartin Matuska 		}
7694e716630dSMartin Matuska 	}
7695e716630dSMartin Matuska 
7696e716630dSMartin Matuska 	/* Remove a few objects to leave some holes in allocation space */
7697e716630dSMartin Matuska 	mutex_enter(&zd->zd_dirobj_lock);
7698e716630dSMartin Matuska 	(void) ztest_remove(zd, od, 2);
7699e716630dSMartin Matuska 	mutex_exit(&zd->zd_dirobj_lock);
7700e716630dSMartin Matuska 
7701e716630dSMartin Matuska 	umem_free(od, od_size);
7702e716630dSMartin Matuska 
7703e716630dSMartin Matuska 	thread_exit();
7704e716630dSMartin Matuska }
7705e716630dSMartin Matuska 
7706716fd348SMartin Matuska static __attribute__((noreturn)) void
7707716fd348SMartin Matuska ztest_thread(void *arg)
7708716fd348SMartin Matuska {
7709716fd348SMartin Matuska 	int rand;
7710716fd348SMartin Matuska 	uint64_t id = (uintptr_t)arg;
7711716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
7712716fd348SMartin Matuska 	uint64_t call_next;
7713716fd348SMartin Matuska 	hrtime_t now;
7714716fd348SMartin Matuska 	ztest_info_t *zi;
7715716fd348SMartin Matuska 	ztest_shared_callstate_t *zc;
7716716fd348SMartin Matuska 
7717716fd348SMartin Matuska 	while ((now = gethrtime()) < zs->zs_thread_stop) {
7718716fd348SMartin Matuska 		/*
7719716fd348SMartin Matuska 		 * See if it's time to force a crash.
7720716fd348SMartin Matuska 		 */
7721e716630dSMartin Matuska 		if (now > zs->zs_thread_kill &&
7722e716630dSMartin Matuska 		    raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE) {
7723716fd348SMartin Matuska 			ztest_kill(zs);
7724e716630dSMartin Matuska 		}
7725716fd348SMartin Matuska 
7726716fd348SMartin Matuska 		/*
7727716fd348SMartin Matuska 		 * If we're getting ENOSPC with some regularity, stop.
7728716fd348SMartin Matuska 		 */
7729716fd348SMartin Matuska 		if (zs->zs_enospc_count > 10)
7730716fd348SMartin Matuska 			break;
7731716fd348SMartin Matuska 
7732716fd348SMartin Matuska 		/*
7733716fd348SMartin Matuska 		 * Pick a random function to execute.
7734716fd348SMartin Matuska 		 */
7735716fd348SMartin Matuska 		rand = ztest_random(ZTEST_FUNCS);
7736716fd348SMartin Matuska 		zi = &ztest_info[rand];
7737716fd348SMartin Matuska 		zc = ZTEST_GET_SHARED_CALLSTATE(rand);
7738716fd348SMartin Matuska 		call_next = zc->zc_next;
7739716fd348SMartin Matuska 
7740716fd348SMartin Matuska 		if (now >= call_next &&
7741716fd348SMartin Matuska 		    atomic_cas_64(&zc->zc_next, call_next, call_next +
7742716fd348SMartin Matuska 		    ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) {
7743716fd348SMartin Matuska 			ztest_execute(rand, zi, id);
7744716fd348SMartin Matuska 		}
7745716fd348SMartin Matuska 	}
7746716fd348SMartin Matuska 
7747716fd348SMartin Matuska 	thread_exit();
7748716fd348SMartin Matuska }
7749716fd348SMartin Matuska 
7750716fd348SMartin Matuska static void
7751a0b956f5SMartin Matuska ztest_dataset_name(char *dsname, const char *pool, int d)
7752716fd348SMartin Matuska {
7753716fd348SMartin Matuska 	(void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d);
7754716fd348SMartin Matuska }
7755716fd348SMartin Matuska 
7756716fd348SMartin Matuska static void
7757716fd348SMartin Matuska ztest_dataset_destroy(int d)
7758716fd348SMartin Matuska {
7759716fd348SMartin Matuska 	char name[ZFS_MAX_DATASET_NAME_LEN];
7760716fd348SMartin Matuska 	int t;
7761716fd348SMartin Matuska 
7762716fd348SMartin Matuska 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
7763716fd348SMartin Matuska 
7764716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 3)
7765716fd348SMartin Matuska 		(void) printf("Destroying %s to free up space\n", name);
7766716fd348SMartin Matuska 
7767716fd348SMartin Matuska 	/*
7768716fd348SMartin Matuska 	 * Cleanup any non-standard clones and snapshots.  In general,
7769716fd348SMartin Matuska 	 * ztest thread t operates on dataset (t % zopt_datasets),
7770716fd348SMartin Matuska 	 * so there may be more than one thing to clean up.
7771716fd348SMartin Matuska 	 */
7772716fd348SMartin Matuska 	for (t = d; t < ztest_opts.zo_threads;
7773716fd348SMartin Matuska 	    t += ztest_opts.zo_datasets)
7774716fd348SMartin Matuska 		ztest_dsl_dataset_cleanup(name, t);
7775716fd348SMartin Matuska 
7776716fd348SMartin Matuska 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
7777716fd348SMartin Matuska 	    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
7778716fd348SMartin Matuska }
7779716fd348SMartin Matuska 
7780716fd348SMartin Matuska static void
7781716fd348SMartin Matuska ztest_dataset_dirobj_verify(ztest_ds_t *zd)
7782716fd348SMartin Matuska {
7783716fd348SMartin Matuska 	uint64_t usedobjs, dirobjs, scratch;
7784716fd348SMartin Matuska 
7785716fd348SMartin Matuska 	/*
7786716fd348SMartin Matuska 	 * ZTEST_DIROBJ is the object directory for the entire dataset.
7787716fd348SMartin Matuska 	 * Therefore, the number of objects in use should equal the
7788716fd348SMartin Matuska 	 * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
7789716fd348SMartin Matuska 	 * If not, we have an object leak.
7790716fd348SMartin Matuska 	 *
7791716fd348SMartin Matuska 	 * Note that we can only check this in ztest_dataset_open(),
7792716fd348SMartin Matuska 	 * when the open-context and syncing-context values agree.
7793716fd348SMartin Matuska 	 * That's because zap_count() returns the open-context value,
7794716fd348SMartin Matuska 	 * while dmu_objset_space() returns the rootbp fill count.
7795716fd348SMartin Matuska 	 */
7796716fd348SMartin Matuska 	VERIFY0(zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
7797716fd348SMartin Matuska 	dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
7798716fd348SMartin Matuska 	ASSERT3U(dirobjs + 1, ==, usedobjs);
7799716fd348SMartin Matuska }
7800716fd348SMartin Matuska 
7801716fd348SMartin Matuska static int
7802716fd348SMartin Matuska ztest_dataset_open(int d)
7803716fd348SMartin Matuska {
7804716fd348SMartin Matuska 	ztest_ds_t *zd = &ztest_ds[d];
7805716fd348SMartin Matuska 	uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
7806716fd348SMartin Matuska 	objset_t *os;
7807716fd348SMartin Matuska 	zilog_t *zilog;
7808716fd348SMartin Matuska 	char name[ZFS_MAX_DATASET_NAME_LEN];
7809716fd348SMartin Matuska 	int error;
7810716fd348SMartin Matuska 
7811716fd348SMartin Matuska 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
7812716fd348SMartin Matuska 
7813716fd348SMartin Matuska 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
7814716fd348SMartin Matuska 
7815716fd348SMartin Matuska 	error = ztest_dataset_create(name);
7816716fd348SMartin Matuska 	if (error == ENOSPC) {
7817716fd348SMartin Matuska 		(void) pthread_rwlock_unlock(&ztest_name_lock);
7818716fd348SMartin Matuska 		ztest_record_enospc(FTAG);
7819716fd348SMartin Matuska 		return (error);
7820716fd348SMartin Matuska 	}
7821716fd348SMartin Matuska 	ASSERT(error == 0 || error == EEXIST);
7822716fd348SMartin Matuska 
7823716fd348SMartin Matuska 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
7824716fd348SMartin Matuska 	    B_TRUE, zd, &os));
7825716fd348SMartin Matuska 	(void) pthread_rwlock_unlock(&ztest_name_lock);
7826716fd348SMartin Matuska 
7827716fd348SMartin Matuska 	ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
7828716fd348SMartin Matuska 
7829716fd348SMartin Matuska 	zilog = zd->zd_zilog;
7830716fd348SMartin Matuska 
7831716fd348SMartin Matuska 	if (zilog->zl_header->zh_claim_lr_seq != 0 &&
7832716fd348SMartin Matuska 	    zilog->zl_header->zh_claim_lr_seq < committed_seq)
7833716fd348SMartin Matuska 		fatal(B_FALSE, "missing log records: "
7834716fd348SMartin Matuska 		    "claimed %"PRIu64" < committed %"PRIu64"",
7835716fd348SMartin Matuska 		    zilog->zl_header->zh_claim_lr_seq, committed_seq);
7836716fd348SMartin Matuska 
7837716fd348SMartin Matuska 	ztest_dataset_dirobj_verify(zd);
7838716fd348SMartin Matuska 
7839716fd348SMartin Matuska 	zil_replay(os, zd, ztest_replay_vector);
7840716fd348SMartin Matuska 
7841716fd348SMartin Matuska 	ztest_dataset_dirobj_verify(zd);
7842716fd348SMartin Matuska 
7843716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 6)
7844716fd348SMartin Matuska 		(void) printf("%s replay %"PRIu64" blocks, "
7845716fd348SMartin Matuska 		    "%"PRIu64" records, seq %"PRIu64"\n",
7846716fd348SMartin Matuska 		    zd->zd_name,
7847716fd348SMartin Matuska 		    zilog->zl_parse_blk_count,
7848716fd348SMartin Matuska 		    zilog->zl_parse_lr_count,
7849716fd348SMartin Matuska 		    zilog->zl_replaying_seq);
7850716fd348SMartin Matuska 
7851271171e0SMartin Matuska 	zilog = zil_open(os, ztest_get_data, NULL);
7852716fd348SMartin Matuska 
7853716fd348SMartin Matuska 	if (zilog->zl_replaying_seq != 0 &&
7854716fd348SMartin Matuska 	    zilog->zl_replaying_seq < committed_seq)
7855716fd348SMartin Matuska 		fatal(B_FALSE, "missing log records: "
7856716fd348SMartin Matuska 		    "replayed %"PRIu64" < committed %"PRIu64"",
7857716fd348SMartin Matuska 		    zilog->zl_replaying_seq, committed_seq);
7858716fd348SMartin Matuska 
7859716fd348SMartin Matuska 	return (0);
7860716fd348SMartin Matuska }
7861716fd348SMartin Matuska 
7862716fd348SMartin Matuska static void
7863716fd348SMartin Matuska ztest_dataset_close(int d)
7864716fd348SMartin Matuska {
7865716fd348SMartin Matuska 	ztest_ds_t *zd = &ztest_ds[d];
7866716fd348SMartin Matuska 
7867716fd348SMartin Matuska 	zil_close(zd->zd_zilog);
7868716fd348SMartin Matuska 	dmu_objset_disown(zd->zd_os, B_TRUE, zd);
7869716fd348SMartin Matuska 
7870716fd348SMartin Matuska 	ztest_zd_fini(zd);
7871716fd348SMartin Matuska }
7872716fd348SMartin Matuska 
7873716fd348SMartin Matuska static int
7874716fd348SMartin Matuska ztest_replay_zil_cb(const char *name, void *arg)
7875716fd348SMartin Matuska {
7876716fd348SMartin Matuska 	(void) arg;
7877716fd348SMartin Matuska 	objset_t *os;
7878716fd348SMartin Matuska 	ztest_ds_t *zdtmp;
7879716fd348SMartin Matuska 
7880716fd348SMartin Matuska 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_ANY, B_TRUE,
7881716fd348SMartin Matuska 	    B_TRUE, FTAG, &os));
7882716fd348SMartin Matuska 
7883716fd348SMartin Matuska 	zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
7884716fd348SMartin Matuska 
7885716fd348SMartin Matuska 	ztest_zd_init(zdtmp, NULL, os);
7886716fd348SMartin Matuska 	zil_replay(os, zdtmp, ztest_replay_vector);
7887716fd348SMartin Matuska 	ztest_zd_fini(zdtmp);
7888716fd348SMartin Matuska 
7889716fd348SMartin Matuska 	if (dmu_objset_zil(os)->zl_parse_lr_count != 0 &&
7890716fd348SMartin Matuska 	    ztest_opts.zo_verbose >= 6) {
7891716fd348SMartin Matuska 		zilog_t *zilog = dmu_objset_zil(os);
7892716fd348SMartin Matuska 
7893716fd348SMartin Matuska 		(void) printf("%s replay %"PRIu64" blocks, "
7894716fd348SMartin Matuska 		    "%"PRIu64" records, seq %"PRIu64"\n",
7895716fd348SMartin Matuska 		    name,
7896716fd348SMartin Matuska 		    zilog->zl_parse_blk_count,
7897716fd348SMartin Matuska 		    zilog->zl_parse_lr_count,
7898716fd348SMartin Matuska 		    zilog->zl_replaying_seq);
7899716fd348SMartin Matuska 	}
7900716fd348SMartin Matuska 
7901716fd348SMartin Matuska 	umem_free(zdtmp, sizeof (ztest_ds_t));
7902716fd348SMartin Matuska 
7903716fd348SMartin Matuska 	dmu_objset_disown(os, B_TRUE, FTAG);
7904716fd348SMartin Matuska 	return (0);
7905716fd348SMartin Matuska }
7906716fd348SMartin Matuska 
7907716fd348SMartin Matuska static void
7908716fd348SMartin Matuska ztest_freeze(void)
7909716fd348SMartin Matuska {
7910716fd348SMartin Matuska 	ztest_ds_t *zd = &ztest_ds[0];
7911716fd348SMartin Matuska 	spa_t *spa;
7912716fd348SMartin Matuska 	int numloops = 0;
7913716fd348SMartin Matuska 
7914e716630dSMartin Matuska 	/* freeze not supported during RAIDZ expansion */
7915e716630dSMartin Matuska 	if (ztest_opts.zo_raid_do_expand)
7916e716630dSMartin Matuska 		return;
7917e716630dSMartin Matuska 
7918716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 3)
7919716fd348SMartin Matuska 		(void) printf("testing spa_freeze()...\n");
7920716fd348SMartin Matuska 
7921e716630dSMartin Matuska 	raidz_scratch_verify();
7922716fd348SMartin Matuska 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
7923716fd348SMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
7924716fd348SMartin Matuska 	VERIFY0(ztest_dataset_open(0));
7925716fd348SMartin Matuska 	ztest_spa = spa;
7926716fd348SMartin Matuska 
7927716fd348SMartin Matuska 	/*
7928716fd348SMartin Matuska 	 * Force the first log block to be transactionally allocated.
7929716fd348SMartin Matuska 	 * We have to do this before we freeze the pool -- otherwise
7930716fd348SMartin Matuska 	 * the log chain won't be anchored.
7931716fd348SMartin Matuska 	 */
7932716fd348SMartin Matuska 	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
7933716fd348SMartin Matuska 		ztest_dmu_object_alloc_free(zd, 0);
7934716fd348SMartin Matuska 		zil_commit(zd->zd_zilog, 0);
7935716fd348SMartin Matuska 	}
7936716fd348SMartin Matuska 
7937716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
7938716fd348SMartin Matuska 
7939716fd348SMartin Matuska 	/*
7940716fd348SMartin Matuska 	 * Freeze the pool.  This stops spa_sync() from doing anything,
7941716fd348SMartin Matuska 	 * so that the only way to record changes from now on is the ZIL.
7942716fd348SMartin Matuska 	 */
7943716fd348SMartin Matuska 	spa_freeze(spa);
7944716fd348SMartin Matuska 
7945716fd348SMartin Matuska 	/*
7946716fd348SMartin Matuska 	 * Because it is hard to predict how much space a write will actually
7947716fd348SMartin Matuska 	 * require beforehand, we leave ourselves some fudge space to write over
7948716fd348SMartin Matuska 	 * capacity.
7949716fd348SMartin Matuska 	 */
7950716fd348SMartin Matuska 	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
7951716fd348SMartin Matuska 
7952716fd348SMartin Matuska 	/*
7953716fd348SMartin Matuska 	 * Run tests that generate log records but don't alter the pool config
7954716fd348SMartin Matuska 	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
7955716fd348SMartin Matuska 	 * We do a txg_wait_synced() after each iteration to force the txg
7956716fd348SMartin Matuska 	 * to increase well beyond the last synced value in the uberblock.
7957716fd348SMartin Matuska 	 * The ZIL should be OK with that.
7958716fd348SMartin Matuska 	 *
7959716fd348SMartin Matuska 	 * Run a random number of times less than zo_maxloops and ensure we do
7960716fd348SMartin Matuska 	 * not run out of space on the pool.
7961716fd348SMartin Matuska 	 */
7962716fd348SMartin Matuska 	while (ztest_random(10) != 0 &&
7963716fd348SMartin Matuska 	    numloops++ < ztest_opts.zo_maxloops &&
7964716fd348SMartin Matuska 	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
7965716fd348SMartin Matuska 		ztest_od_t od;
7966716fd348SMartin Matuska 		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
7967716fd348SMartin Matuska 		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
7968716fd348SMartin Matuska 		ztest_io(zd, od.od_object,
7969716fd348SMartin Matuska 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
7970716fd348SMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
7971716fd348SMartin Matuska 	}
7972716fd348SMartin Matuska 
7973716fd348SMartin Matuska 	/*
7974716fd348SMartin Matuska 	 * Commit all of the changes we just generated.
7975716fd348SMartin Matuska 	 */
7976716fd348SMartin Matuska 	zil_commit(zd->zd_zilog, 0);
7977716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
7978716fd348SMartin Matuska 
7979716fd348SMartin Matuska 	/*
7980716fd348SMartin Matuska 	 * Close our dataset and close the pool.
7981716fd348SMartin Matuska 	 */
7982716fd348SMartin Matuska 	ztest_dataset_close(0);
7983716fd348SMartin Matuska 	spa_close(spa, FTAG);
7984716fd348SMartin Matuska 	kernel_fini();
7985716fd348SMartin Matuska 
7986716fd348SMartin Matuska 	/*
7987716fd348SMartin Matuska 	 * Open and close the pool and dataset to induce log replay.
7988716fd348SMartin Matuska 	 */
7989e716630dSMartin Matuska 	raidz_scratch_verify();
7990716fd348SMartin Matuska 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
7991716fd348SMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
7992716fd348SMartin Matuska 	ASSERT3U(spa_freeze_txg(spa), ==, UINT64_MAX);
7993716fd348SMartin Matuska 	VERIFY0(ztest_dataset_open(0));
7994716fd348SMartin Matuska 	ztest_spa = spa;
7995716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
7996716fd348SMartin Matuska 	ztest_dataset_close(0);
7997716fd348SMartin Matuska 	ztest_reguid(NULL, 0);
7998716fd348SMartin Matuska 
7999716fd348SMartin Matuska 	spa_close(spa, FTAG);
8000716fd348SMartin Matuska 	kernel_fini();
8001716fd348SMartin Matuska }
8002716fd348SMartin Matuska 
8003716fd348SMartin Matuska static void
8004716fd348SMartin Matuska ztest_import_impl(void)
8005716fd348SMartin Matuska {
8006716fd348SMartin Matuska 	importargs_t args = { 0 };
8007716fd348SMartin Matuska 	nvlist_t *cfg = NULL;
8008716fd348SMartin Matuska 	int nsearch = 1;
8009716fd348SMartin Matuska 	char *searchdirs[nsearch];
8010716fd348SMartin Matuska 	int flags = ZFS_IMPORT_MISSING_LOG;
8011716fd348SMartin Matuska 
8012716fd348SMartin Matuska 	searchdirs[0] = ztest_opts.zo_dir;
8013716fd348SMartin Matuska 	args.paths = nsearch;
8014716fd348SMartin Matuska 	args.path = searchdirs;
8015716fd348SMartin Matuska 	args.can_be_active = B_FALSE;
8016716fd348SMartin Matuska 
8017dbd5678dSMartin Matuska 	libpc_handle_t lpch = {
8018dbd5678dSMartin Matuska 		.lpc_lib_handle = NULL,
8019dbd5678dSMartin Matuska 		.lpc_ops = &libzpool_config_ops,
8020dbd5678dSMartin Matuska 		.lpc_printerr = B_TRUE
8021dbd5678dSMartin Matuska 	};
8022dbd5678dSMartin Matuska 	VERIFY0(zpool_find_config(&lpch, ztest_opts.zo_pool, &cfg, &args));
8023716fd348SMartin Matuska 	VERIFY0(spa_import(ztest_opts.zo_pool, cfg, NULL, flags));
8024716fd348SMartin Matuska 	fnvlist_free(cfg);
8025716fd348SMartin Matuska }
8026716fd348SMartin Matuska 
8027716fd348SMartin Matuska /*
8028716fd348SMartin Matuska  * Import a storage pool with the given name.
8029716fd348SMartin Matuska  */
8030716fd348SMartin Matuska static void
8031716fd348SMartin Matuska ztest_import(ztest_shared_t *zs)
8032716fd348SMartin Matuska {
8033716fd348SMartin Matuska 	spa_t *spa;
8034716fd348SMartin Matuska 
8035716fd348SMartin Matuska 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
8036716fd348SMartin Matuska 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
8037716fd348SMartin Matuska 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
8038716fd348SMartin Matuska 
8039e716630dSMartin Matuska 	raidz_scratch_verify();
8040716fd348SMartin Matuska 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
8041716fd348SMartin Matuska 
8042716fd348SMartin Matuska 	ztest_import_impl();
8043716fd348SMartin Matuska 
8044716fd348SMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
8045716fd348SMartin Matuska 	zs->zs_metaslab_sz =
8046716fd348SMartin Matuska 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
8047abcdc1b9SMartin Matuska 	zs->zs_guid = spa_guid(spa);
8048716fd348SMartin Matuska 	spa_close(spa, FTAG);
8049716fd348SMartin Matuska 
8050716fd348SMartin Matuska 	kernel_fini();
8051716fd348SMartin Matuska 
8052716fd348SMartin Matuska 	if (!ztest_opts.zo_mmp_test) {
8053abcdc1b9SMartin Matuska 		ztest_run_zdb(zs->zs_guid);
8054716fd348SMartin Matuska 		ztest_freeze();
8055abcdc1b9SMartin Matuska 		ztest_run_zdb(zs->zs_guid);
8056716fd348SMartin Matuska 	}
8057716fd348SMartin Matuska 
8058716fd348SMartin Matuska 	(void) pthread_rwlock_destroy(&ztest_name_lock);
8059716fd348SMartin Matuska 	mutex_destroy(&ztest_vdev_lock);
8060716fd348SMartin Matuska 	mutex_destroy(&ztest_checkpoint_lock);
8061716fd348SMartin Matuska }
8062716fd348SMartin Matuska 
8063716fd348SMartin Matuska /*
8064e716630dSMartin Matuska  * After the expansion was killed, check that the pool is healthy
8065716fd348SMartin Matuska  */
8066716fd348SMartin Matuska static void
8067e716630dSMartin Matuska ztest_raidz_expand_check(spa_t *spa)
8068716fd348SMartin Matuska {
8069e716630dSMartin Matuska 	ASSERT3U(ztest_opts.zo_raidz_expand_test, ==, RAIDZ_EXPAND_KILLED);
8070e716630dSMartin Matuska 	/*
8071e716630dSMartin Matuska 	 * Set pool check done flag, main program will run a zdb check
8072e716630dSMartin Matuska 	 * of the pool when we exit.
8073e716630dSMartin Matuska 	 */
8074e716630dSMartin Matuska 	ztest_shared_opts->zo_raidz_expand_test = RAIDZ_EXPAND_CHECKED;
8075e716630dSMartin Matuska 
8076e716630dSMartin Matuska 	/* Wait for reflow to finish */
8077e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8078e716630dSMartin Matuska 		(void) printf("\nwaiting for reflow to finish ...\n");
8079e716630dSMartin Matuska 	}
8080e716630dSMartin Matuska 	pool_raidz_expand_stat_t rzx_stats;
8081e716630dSMartin Matuska 	pool_raidz_expand_stat_t *pres = &rzx_stats;
8082e716630dSMartin Matuska 	do {
8083e716630dSMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
8084e716630dSMartin Matuska 		(void) poll(NULL, 0, 500); /* wait 1/2 second */
8085e716630dSMartin Matuska 
8086e716630dSMartin Matuska 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
8087e716630dSMartin Matuska 		(void) spa_raidz_expand_get_stats(spa, pres);
8088e716630dSMartin Matuska 		spa_config_exit(spa, SCL_CONFIG, FTAG);
8089e716630dSMartin Matuska 	} while (pres->pres_state != DSS_FINISHED &&
8090e716630dSMartin Matuska 	    pres->pres_reflowed < pres->pres_to_reflow);
8091e716630dSMartin Matuska 
8092e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8093e716630dSMartin Matuska 		(void) printf("verifying an interrupted raidz "
8094e716630dSMartin Matuska 		    "expansion using a pool scrub ...\n");
8095e716630dSMartin Matuska 	}
8096*87bf66d4SMartin Matuska 
8097e716630dSMartin Matuska 	/* Will fail here if there is non-recoverable corruption detected */
8098*87bf66d4SMartin Matuska 	int error = ztest_scrub_impl(spa);
8099*87bf66d4SMartin Matuska 	if (error == EBUSY)
8100*87bf66d4SMartin Matuska 		error = 0;
8101*87bf66d4SMartin Matuska 
8102*87bf66d4SMartin Matuska 	VERIFY0(error);
8103*87bf66d4SMartin Matuska 
8104e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8105e716630dSMartin Matuska 		(void) printf("raidz expansion scrub check complete\n");
8106e716630dSMartin Matuska 	}
8107e716630dSMartin Matuska }
8108e716630dSMartin Matuska 
8109e716630dSMartin Matuska /*
8110e716630dSMartin Matuska  * Start a raidz expansion test.  We run some I/O on the pool for a while
8111e716630dSMartin Matuska  * to get some data in the pool.  Then we grow the raidz and
8112e716630dSMartin Matuska  * kill the test at the requested offset into the reflow, verifying that
8113e716630dSMartin Matuska  * doing such does not lead to pool corruption.
8114e716630dSMartin Matuska  */
8115e716630dSMartin Matuska static void
8116e716630dSMartin Matuska ztest_raidz_expand_run(ztest_shared_t *zs, spa_t *spa)
8117e716630dSMartin Matuska {
8118e716630dSMartin Matuska 	nvlist_t *root;
8119e716630dSMartin Matuska 	pool_raidz_expand_stat_t rzx_stats;
8120e716630dSMartin Matuska 	pool_raidz_expand_stat_t *pres = &rzx_stats;
8121716fd348SMartin Matuska 	kthread_t **run_threads;
8122e716630dSMartin Matuska 	vdev_t *cvd, *rzvd = spa->spa_root_vdev->vdev_child[0];
8123e716630dSMartin Matuska 	int total_disks = rzvd->vdev_children;
8124e716630dSMartin Matuska 	int data_disks = total_disks - vdev_get_nparity(rzvd);
8125e716630dSMartin Matuska 	uint64_t alloc_goal;
8126e716630dSMartin Matuska 	uint64_t csize;
8127e716630dSMartin Matuska 	int error, t;
8128e716630dSMartin Matuska 	int threads = ztest_opts.zo_threads;
8129e716630dSMartin Matuska 	ztest_expand_io_t *thread_args;
8130716fd348SMartin Matuska 
8131e716630dSMartin Matuska 	ASSERT3U(ztest_opts.zo_raidz_expand_test, !=, RAIDZ_EXPAND_NONE);
813204e0313aSBrooks Davis 	ASSERT3P(rzvd->vdev_ops, ==, &vdev_raidz_ops);
8133e716630dSMartin Matuska 	ztest_opts.zo_raidz_expand_test = RAIDZ_EXPAND_STARTED;
8134716fd348SMartin Matuska 
8135e716630dSMartin Matuska 	/* Setup a 1 MiB buffer of random data */
8136e716630dSMartin Matuska 	uint64_t bufsize = 1024 * 1024;
8137e716630dSMartin Matuska 	void *buffer = umem_alloc(bufsize, UMEM_NOFAIL);
8138716fd348SMartin Matuska 
8139e716630dSMartin Matuska 	if (read(ztest_fd_rand, buffer, bufsize) != bufsize) {
8140e716630dSMartin Matuska 		fatal(B_TRUE, "short read from /dev/urandom");
8141716fd348SMartin Matuska 	}
8142716fd348SMartin Matuska 	/*
8143e716630dSMartin Matuska 	 * Put some data in the pool and then attach a vdev to initiate
8144e716630dSMartin Matuska 	 * reflow.
8145716fd348SMartin Matuska 	 */
8146e716630dSMartin Matuska 	run_threads = umem_zalloc(threads * sizeof (kthread_t *), UMEM_NOFAIL);
8147e716630dSMartin Matuska 	thread_args = umem_zalloc(threads * sizeof (ztest_expand_io_t),
8148e716630dSMartin Matuska 	    UMEM_NOFAIL);
8149e716630dSMartin Matuska 	/* Aim for roughly 25% of allocatable space up to 1GB */
8150e716630dSMartin Matuska 	alloc_goal = (vdev_get_min_asize(rzvd) * data_disks) / total_disks;
8151e716630dSMartin Matuska 	alloc_goal = MIN(alloc_goal >> 2, 1024*1024*1024);
8152e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8153e716630dSMartin Matuska 		(void) printf("adding data to pool '%s', goal %llu bytes\n",
8154e716630dSMartin Matuska 		    ztest_opts.zo_pool, (u_longlong_t)alloc_goal);
8155716fd348SMartin Matuska 	}
8156716fd348SMartin Matuska 
8157716fd348SMartin Matuska 	/*
8158e716630dSMartin Matuska 	 * Kick off all the I/O generators that run in parallel.
8159716fd348SMartin Matuska 	 */
8160e716630dSMartin Matuska 	for (t = 0; t < threads; t++) {
8161e716630dSMartin Matuska 		if (t < ztest_opts.zo_datasets && ztest_dataset_open(t) != 0) {
8162e716630dSMartin Matuska 			umem_free(run_threads, threads * sizeof (kthread_t *));
8163e716630dSMartin Matuska 			umem_free(buffer, bufsize);
8164e716630dSMartin Matuska 			return;
8165716fd348SMartin Matuska 		}
8166e716630dSMartin Matuska 		thread_args[t].rzx_id = t;
8167e716630dSMartin Matuska 		thread_args[t].rzx_amount = alloc_goal / threads;
8168e716630dSMartin Matuska 		thread_args[t].rzx_bufsize = bufsize;
8169e716630dSMartin Matuska 		thread_args[t].rzx_buffer = buffer;
8170e716630dSMartin Matuska 		thread_args[t].rzx_alloc_max = alloc_goal;
8171e716630dSMartin Matuska 		thread_args[t].rzx_spa = spa;
8172e716630dSMartin Matuska 		run_threads[t] = thread_create(NULL, 0, ztest_rzx_thread,
8173e716630dSMartin Matuska 		    &thread_args[t], 0, NULL, TS_RUN | TS_JOINABLE,
8174e716630dSMartin Matuska 		    defclsyspri);
8175e716630dSMartin Matuska 	}
8176716fd348SMartin Matuska 
8177716fd348SMartin Matuska 	/*
8178e716630dSMartin Matuska 	 * Wait for all of the writers to complete.
8179716fd348SMartin Matuska 	 */
8180e716630dSMartin Matuska 	for (t = 0; t < threads; t++)
8181e716630dSMartin Matuska 		VERIFY0(thread_join(run_threads[t]));
8182e716630dSMartin Matuska 
8183e716630dSMartin Matuska 	/*
8184e716630dSMartin Matuska 	 * Close all datasets. This must be done after all the threads
8185e716630dSMartin Matuska 	 * are joined so we can be sure none of the datasets are in-use
8186e716630dSMartin Matuska 	 * by any of the threads.
8187e716630dSMartin Matuska 	 */
8188e716630dSMartin Matuska 	for (t = 0; t < ztest_opts.zo_threads; t++) {
8189e716630dSMartin Matuska 		if (t < ztest_opts.zo_datasets)
8190e716630dSMartin Matuska 			ztest_dataset_close(t);
8191e716630dSMartin Matuska 	}
8192e716630dSMartin Matuska 
8193716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
8194716fd348SMartin Matuska 
8195e716630dSMartin Matuska 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
8196e716630dSMartin Matuska 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
8197e716630dSMartin Matuska 
8198e716630dSMartin Matuska 	umem_free(buffer, bufsize);
8199e716630dSMartin Matuska 	umem_free(run_threads, threads * sizeof (kthread_t *));
8200e716630dSMartin Matuska 	umem_free(thread_args, threads * sizeof (ztest_expand_io_t));
8201e716630dSMartin Matuska 
8202e716630dSMartin Matuska 	/* Set our reflow target to 25%, 50% or 75% of allocated size */
8203e716630dSMartin Matuska 	uint_t multiple = ztest_random(3) + 1;
8204e716630dSMartin Matuska 	uint64_t reflow_max = (rzvd->vdev_stat.vs_alloc * multiple) / 4;
8205e716630dSMartin Matuska 	raidz_expand_max_reflow_bytes = reflow_max;
8206e716630dSMartin Matuska 
8207e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8208e716630dSMartin Matuska 		(void) printf("running raidz expansion test, killing when "
8209e716630dSMartin Matuska 		    "reflow reaches %llu bytes (%u/4 of allocated space)\n",
8210e716630dSMartin Matuska 		    (u_longlong_t)reflow_max, multiple);
8211716fd348SMartin Matuska 	}
8212716fd348SMartin Matuska 
8213e716630dSMartin Matuska 	/* XXX - do we want some I/O load during the reflow? */
8214e716630dSMartin Matuska 
8215e716630dSMartin Matuska 	/*
8216e716630dSMartin Matuska 	 * Use a disk size that is larger than existing ones
8217e716630dSMartin Matuska 	 */
8218e716630dSMartin Matuska 	cvd = rzvd->vdev_child[0];
8219e716630dSMartin Matuska 	csize = vdev_get_min_asize(cvd);
8220e716630dSMartin Matuska 	csize += csize / 10;
8221e716630dSMartin Matuska 	/*
8222e716630dSMartin Matuska 	 * Path to vdev to be attached
8223e716630dSMartin Matuska 	 */
8224e716630dSMartin Matuska 	char *newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
8225e716630dSMartin Matuska 	(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
8226e716630dSMartin Matuska 	    ztest_opts.zo_dir, ztest_opts.zo_pool, rzvd->vdev_children);
8227e716630dSMartin Matuska 	/*
8228e716630dSMartin Matuska 	 * Build the nvlist describing newpath.
8229e716630dSMartin Matuska 	 */
8230e716630dSMartin Matuska 	root = make_vdev_root(newpath, NULL, NULL, csize, ztest_get_ashift(),
8231e716630dSMartin Matuska 	    NULL, 0, 0, 1);
8232e716630dSMartin Matuska 	/*
8233e716630dSMartin Matuska 	 * Expand the raidz vdev by attaching the new disk
8234e716630dSMartin Matuska 	 */
8235e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8236e716630dSMartin Matuska 		(void) printf("expanding raidz: %d wide to %d wide with '%s'\n",
8237e716630dSMartin Matuska 		    (int)rzvd->vdev_children, (int)rzvd->vdev_children + 1,
8238e716630dSMartin Matuska 		    newpath);
8239e716630dSMartin Matuska 	}
8240e716630dSMartin Matuska 	error = spa_vdev_attach(spa, rzvd->vdev_guid, root, B_FALSE, B_FALSE);
8241e716630dSMartin Matuska 	nvlist_free(root);
8242e716630dSMartin Matuska 	if (error != 0) {
8243e716630dSMartin Matuska 		fatal(0, "raidz expand: attach (%s %llu) returned %d",
8244e716630dSMartin Matuska 		    newpath, (long long)csize, error);
8245e716630dSMartin Matuska 	}
8246e716630dSMartin Matuska 
8247e716630dSMartin Matuska 	/*
8248e716630dSMartin Matuska 	 * Wait for reflow to begin
8249e716630dSMartin Matuska 	 */
8250e716630dSMartin Matuska 	while (spa->spa_raidz_expand == NULL) {
8251e716630dSMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
8252e716630dSMartin Matuska 		(void) poll(NULL, 0, 100); /* wait 1/10 second */
8253e716630dSMartin Matuska 	}
8254e716630dSMartin Matuska 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
8255e716630dSMartin Matuska 	(void) spa_raidz_expand_get_stats(spa, pres);
8256e716630dSMartin Matuska 	spa_config_exit(spa, SCL_CONFIG, FTAG);
8257e716630dSMartin Matuska 	while (pres->pres_state != DSS_SCANNING) {
8258e716630dSMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
8259e716630dSMartin Matuska 		(void) poll(NULL, 0, 100); /* wait 1/10 second */
8260e716630dSMartin Matuska 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
8261e716630dSMartin Matuska 		(void) spa_raidz_expand_get_stats(spa, pres);
8262e716630dSMartin Matuska 		spa_config_exit(spa, SCL_CONFIG, FTAG);
8263e716630dSMartin Matuska 	}
8264e716630dSMartin Matuska 
8265e716630dSMartin Matuska 	ASSERT3U(pres->pres_state, ==, DSS_SCANNING);
8266e716630dSMartin Matuska 	ASSERT3U(pres->pres_to_reflow, !=, 0);
8267e716630dSMartin Matuska 	/*
8268e716630dSMartin Matuska 	 * Set so when we are killed we go to raidz checking rather than
8269e716630dSMartin Matuska 	 * restarting test.
8270e716630dSMartin Matuska 	 */
8271e716630dSMartin Matuska 	ztest_shared_opts->zo_raidz_expand_test = RAIDZ_EXPAND_KILLED;
8272e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8273e716630dSMartin Matuska 		(void) printf("raidz expansion reflow started, waiting for "
8274e716630dSMartin Matuska 		    "%llu bytes to be copied\n", (u_longlong_t)reflow_max);
8275e716630dSMartin Matuska 	}
8276e716630dSMartin Matuska 
8277e716630dSMartin Matuska 	/*
8278e716630dSMartin Matuska 	 * Wait for reflow maximum to be reached and then kill the test
8279e716630dSMartin Matuska 	 */
8280e716630dSMartin Matuska 	while (pres->pres_reflowed < reflow_max) {
8281e716630dSMartin Matuska 		txg_wait_synced(spa_get_dsl(spa), 0);
8282e716630dSMartin Matuska 		(void) poll(NULL, 0, 100); /* wait 1/10 second */
8283e716630dSMartin Matuska 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
8284e716630dSMartin Matuska 		(void) spa_raidz_expand_get_stats(spa, pres);
8285e716630dSMartin Matuska 		spa_config_exit(spa, SCL_CONFIG, FTAG);
8286e716630dSMartin Matuska 	}
8287e716630dSMartin Matuska 
8288e716630dSMartin Matuska 	/* Reset the reflow pause before killing */
8289e716630dSMartin Matuska 	raidz_expand_max_reflow_bytes = 0;
8290e716630dSMartin Matuska 
8291e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8292e716630dSMartin Matuska 		(void) printf("killing raidz expansion test after reflow "
8293e716630dSMartin Matuska 		    "reached %llu bytes\n", (u_longlong_t)pres->pres_reflowed);
8294e716630dSMartin Matuska 	}
8295e716630dSMartin Matuska 
8296e716630dSMartin Matuska 	/*
8297e716630dSMartin Matuska 	 * Kill ourself to simulate a panic during a reflow.  Our parent will
8298e716630dSMartin Matuska 	 * restart the test and the changed flag value will drive the test
8299e716630dSMartin Matuska 	 * through the scrub/check code to verify the pool is not corrupted.
8300e716630dSMartin Matuska 	 */
8301e716630dSMartin Matuska 	ztest_kill(zs);
8302e716630dSMartin Matuska }
8303e716630dSMartin Matuska 
8304e716630dSMartin Matuska static void
8305e716630dSMartin Matuska ztest_generic_run(ztest_shared_t *zs, spa_t *spa)
8306e716630dSMartin Matuska {
8307e716630dSMartin Matuska 	kthread_t **run_threads;
8308e716630dSMartin Matuska 	int t;
8309e716630dSMartin Matuska 
8310716fd348SMartin Matuska 	run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *),
8311716fd348SMartin Matuska 	    UMEM_NOFAIL);
8312716fd348SMartin Matuska 
8313716fd348SMartin Matuska 	/*
8314716fd348SMartin Matuska 	 * Kick off all the tests that run in parallel.
8315716fd348SMartin Matuska 	 */
8316716fd348SMartin Matuska 	for (t = 0; t < ztest_opts.zo_threads; t++) {
8317716fd348SMartin Matuska 		if (t < ztest_opts.zo_datasets && ztest_dataset_open(t) != 0) {
8318716fd348SMartin Matuska 			umem_free(run_threads, ztest_opts.zo_threads *
8319716fd348SMartin Matuska 			    sizeof (kthread_t *));
8320716fd348SMartin Matuska 			return;
8321716fd348SMartin Matuska 		}
8322716fd348SMartin Matuska 
8323716fd348SMartin Matuska 		run_threads[t] = thread_create(NULL, 0, ztest_thread,
8324716fd348SMartin Matuska 		    (void *)(uintptr_t)t, 0, NULL, TS_RUN | TS_JOINABLE,
8325716fd348SMartin Matuska 		    defclsyspri);
8326716fd348SMartin Matuska 	}
8327716fd348SMartin Matuska 
8328716fd348SMartin Matuska 	/*
8329716fd348SMartin Matuska 	 * Wait for all of the tests to complete.
8330716fd348SMartin Matuska 	 */
8331716fd348SMartin Matuska 	for (t = 0; t < ztest_opts.zo_threads; t++)
8332716fd348SMartin Matuska 		VERIFY0(thread_join(run_threads[t]));
8333716fd348SMartin Matuska 
8334716fd348SMartin Matuska 	/*
8335716fd348SMartin Matuska 	 * Close all datasets. This must be done after all the threads
8336716fd348SMartin Matuska 	 * are joined so we can be sure none of the datasets are in-use
8337716fd348SMartin Matuska 	 * by any of the threads.
8338716fd348SMartin Matuska 	 */
8339716fd348SMartin Matuska 	for (t = 0; t < ztest_opts.zo_threads; t++) {
8340716fd348SMartin Matuska 		if (t < ztest_opts.zo_datasets)
8341716fd348SMartin Matuska 			ztest_dataset_close(t);
8342716fd348SMartin Matuska 	}
8343716fd348SMartin Matuska 
8344716fd348SMartin Matuska 	txg_wait_synced(spa_get_dsl(spa), 0);
8345716fd348SMartin Matuska 
8346716fd348SMartin Matuska 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
8347716fd348SMartin Matuska 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
8348716fd348SMartin Matuska 
8349716fd348SMartin Matuska 	umem_free(run_threads, ztest_opts.zo_threads * sizeof (kthread_t *));
8350e716630dSMartin Matuska }
8351e716630dSMartin Matuska 
8352e716630dSMartin Matuska /*
8353e716630dSMartin Matuska  * Setup our test context and kick off threads to run tests on all datasets
8354e716630dSMartin Matuska  * in parallel.
8355e716630dSMartin Matuska  */
8356e716630dSMartin Matuska static void
8357e716630dSMartin Matuska ztest_run(ztest_shared_t *zs)
8358e716630dSMartin Matuska {
8359e716630dSMartin Matuska 	spa_t *spa;
8360e716630dSMartin Matuska 	objset_t *os;
8361e716630dSMartin Matuska 	kthread_t *resume_thread, *deadman_thread;
8362e716630dSMartin Matuska 	uint64_t object;
8363e716630dSMartin Matuska 	int error;
8364e716630dSMartin Matuska 	int t, d;
8365e716630dSMartin Matuska 
8366e716630dSMartin Matuska 	ztest_exiting = B_FALSE;
8367e716630dSMartin Matuska 
8368e716630dSMartin Matuska 	/*
8369e716630dSMartin Matuska 	 * Initialize parent/child shared state.
8370e716630dSMartin Matuska 	 */
8371e716630dSMartin Matuska 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
8372e716630dSMartin Matuska 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
8373e716630dSMartin Matuska 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
8374e716630dSMartin Matuska 
8375e716630dSMartin Matuska 	zs->zs_thread_start = gethrtime();
8376e716630dSMartin Matuska 	zs->zs_thread_stop =
8377e716630dSMartin Matuska 	    zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
8378e716630dSMartin Matuska 	zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
8379e716630dSMartin Matuska 	zs->zs_thread_kill = zs->zs_thread_stop;
8380e716630dSMartin Matuska 	if (ztest_random(100) < ztest_opts.zo_killrate) {
8381e716630dSMartin Matuska 		zs->zs_thread_kill -=
8382e716630dSMartin Matuska 		    ztest_random(ztest_opts.zo_passtime * NANOSEC);
8383e716630dSMartin Matuska 	}
8384e716630dSMartin Matuska 
8385e716630dSMartin Matuska 	mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL);
8386e716630dSMartin Matuska 
8387e716630dSMartin Matuska 	list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
8388e716630dSMartin Matuska 	    offsetof(ztest_cb_data_t, zcd_node));
8389e716630dSMartin Matuska 
8390e716630dSMartin Matuska 	/*
8391e716630dSMartin Matuska 	 * Open our pool.  It may need to be imported first depending on
8392e716630dSMartin Matuska 	 * what tests were running when the previous pass was terminated.
8393e716630dSMartin Matuska 	 */
8394e716630dSMartin Matuska 	raidz_scratch_verify();
8395e716630dSMartin Matuska 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
8396e716630dSMartin Matuska 	error = spa_open(ztest_opts.zo_pool, &spa, FTAG);
8397e716630dSMartin Matuska 	if (error) {
8398e716630dSMartin Matuska 		VERIFY3S(error, ==, ENOENT);
8399e716630dSMartin Matuska 		ztest_import_impl();
8400e716630dSMartin Matuska 		VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
8401e716630dSMartin Matuska 		zs->zs_metaslab_sz =
8402e716630dSMartin Matuska 		    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
8403e716630dSMartin Matuska 	}
8404e716630dSMartin Matuska 
8405e716630dSMartin Matuska 	metaslab_preload_limit = ztest_random(20) + 1;
8406e716630dSMartin Matuska 	ztest_spa = spa;
8407e716630dSMartin Matuska 
8408e716630dSMartin Matuska 	/*
8409e716630dSMartin Matuska 	 * XXX - BUGBUG raidz expansion do not run this for generic for now
8410e716630dSMartin Matuska 	 */
8411e716630dSMartin Matuska 	if (ztest_opts.zo_raidz_expand_test != RAIDZ_EXPAND_NONE)
8412e716630dSMartin Matuska 		VERIFY0(vdev_raidz_impl_set("cycle"));
8413e716630dSMartin Matuska 
8414e716630dSMartin Matuska 	dmu_objset_stats_t dds;
8415e716630dSMartin Matuska 	VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool,
8416e716630dSMartin Matuska 	    DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
8417e716630dSMartin Matuska 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
8418e716630dSMartin Matuska 	dmu_objset_fast_stat(os, &dds);
8419e716630dSMartin Matuska 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
8420e716630dSMartin Matuska 	dmu_objset_disown(os, B_TRUE, FTAG);
8421e716630dSMartin Matuska 
8422e716630dSMartin Matuska 	/* Give the dedicated raidz expansion test more grace time */
8423e716630dSMartin Matuska 	if (ztest_opts.zo_raidz_expand_test != RAIDZ_EXPAND_NONE)
8424e716630dSMartin Matuska 		zfs_deadman_synctime_ms *= 2;
8425e716630dSMartin Matuska 
8426e716630dSMartin Matuska 	/*
8427e716630dSMartin Matuska 	 * Create a thread to periodically resume suspended I/O.
8428e716630dSMartin Matuska 	 */
8429e716630dSMartin Matuska 	resume_thread = thread_create(NULL, 0, ztest_resume_thread,
8430e716630dSMartin Matuska 	    spa, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
8431e716630dSMartin Matuska 
8432e716630dSMartin Matuska 	/*
8433e716630dSMartin Matuska 	 * Create a deadman thread and set to panic if we hang.
8434e716630dSMartin Matuska 	 */
8435e716630dSMartin Matuska 	deadman_thread = thread_create(NULL, 0, ztest_deadman_thread,
8436e716630dSMartin Matuska 	    zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
8437e716630dSMartin Matuska 
8438e716630dSMartin Matuska 	spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
8439e716630dSMartin Matuska 
8440e716630dSMartin Matuska 	/*
8441e716630dSMartin Matuska 	 * Verify that we can safely inquire about any object,
8442e716630dSMartin Matuska 	 * whether it's allocated or not.  To make it interesting,
8443e716630dSMartin Matuska 	 * we probe a 5-wide window around each power of two.
8444e716630dSMartin Matuska 	 * This hits all edge cases, including zero and the max.
8445e716630dSMartin Matuska 	 */
8446e716630dSMartin Matuska 	for (t = 0; t < 64; t++) {
8447e716630dSMartin Matuska 		for (d = -5; d <= 5; d++) {
8448e716630dSMartin Matuska 			error = dmu_object_info(spa->spa_meta_objset,
8449e716630dSMartin Matuska 			    (1ULL << t) + d, NULL);
8450e716630dSMartin Matuska 			ASSERT(error == 0 || error == ENOENT ||
8451e716630dSMartin Matuska 			    error == EINVAL);
8452e716630dSMartin Matuska 		}
8453e716630dSMartin Matuska 	}
8454e716630dSMartin Matuska 
8455e716630dSMartin Matuska 	/*
8456e716630dSMartin Matuska 	 * If we got any ENOSPC errors on the previous run, destroy something.
8457e716630dSMartin Matuska 	 */
8458e716630dSMartin Matuska 	if (zs->zs_enospc_count != 0) {
8459e716630dSMartin Matuska 		/* Not expecting ENOSPC errors during raidz expansion tests */
8460e716630dSMartin Matuska 		ASSERT3U(ztest_opts.zo_raidz_expand_test, ==,
8461e716630dSMartin Matuska 		    RAIDZ_EXPAND_NONE);
8462e716630dSMartin Matuska 
8463e716630dSMartin Matuska 		int d = ztest_random(ztest_opts.zo_datasets);
8464e716630dSMartin Matuska 		ztest_dataset_destroy(d);
8465e716630dSMartin Matuska 	}
8466e716630dSMartin Matuska 	zs->zs_enospc_count = 0;
8467e716630dSMartin Matuska 
8468e716630dSMartin Matuska 	/*
8469e716630dSMartin Matuska 	 * If we were in the middle of ztest_device_removal() and were killed
8470e716630dSMartin Matuska 	 * we need to ensure the removal and scrub complete before running
8471e716630dSMartin Matuska 	 * any tests that check ztest_device_removal_active. The removal will
8472e716630dSMartin Matuska 	 * be restarted automatically when the spa is opened, but we need to
8473e716630dSMartin Matuska 	 * initiate the scrub manually if it is not already in progress. Note
8474e716630dSMartin Matuska 	 * that we always run the scrub whenever an indirect vdev exists
8475e716630dSMartin Matuska 	 * because we have no way of knowing for sure if ztest_device_removal()
8476e716630dSMartin Matuska 	 * fully completed its scrub before the pool was reimported.
8477e716630dSMartin Matuska 	 *
8478e716630dSMartin Matuska 	 * Does not apply for the RAIDZ expansion specific test runs
8479e716630dSMartin Matuska 	 */
8480e716630dSMartin Matuska 	if (ztest_opts.zo_raidz_expand_test == RAIDZ_EXPAND_NONE &&
8481e716630dSMartin Matuska 	    (spa->spa_removing_phys.sr_state == DSS_SCANNING ||
8482e716630dSMartin Matuska 	    spa->spa_removing_phys.sr_prev_indirect_vdev != -1)) {
8483e716630dSMartin Matuska 		while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
8484e716630dSMartin Matuska 			txg_wait_synced(spa_get_dsl(spa), 0);
8485e716630dSMartin Matuska 
8486e716630dSMartin Matuska 		error = ztest_scrub_impl(spa);
8487e716630dSMartin Matuska 		if (error == EBUSY)
8488e716630dSMartin Matuska 			error = 0;
8489e716630dSMartin Matuska 		ASSERT0(error);
8490e716630dSMartin Matuska 	}
8491e716630dSMartin Matuska 
8492e716630dSMartin Matuska 	if (ztest_opts.zo_verbose >= 4)
8493e716630dSMartin Matuska 		(void) printf("starting main threads...\n");
8494e716630dSMartin Matuska 
8495e716630dSMartin Matuska 	/*
8496e716630dSMartin Matuska 	 * Replay all logs of all datasets in the pool. This is primarily for
8497e716630dSMartin Matuska 	 * temporary datasets which wouldn't otherwise get replayed, which
8498e716630dSMartin Matuska 	 * can trigger failures when attempting to offline a SLOG in
8499e716630dSMartin Matuska 	 * ztest_fault_inject().
8500e716630dSMartin Matuska 	 */
8501e716630dSMartin Matuska 	(void) dmu_objset_find(ztest_opts.zo_pool, ztest_replay_zil_cb,
8502e716630dSMartin Matuska 	    NULL, DS_FIND_CHILDREN);
8503e716630dSMartin Matuska 
8504e716630dSMartin Matuska 	if (ztest_opts.zo_raidz_expand_test == RAIDZ_EXPAND_REQUESTED)
8505e716630dSMartin Matuska 		ztest_raidz_expand_run(zs, spa);
8506e716630dSMartin Matuska 	else if (ztest_opts.zo_raidz_expand_test == RAIDZ_EXPAND_KILLED)
8507e716630dSMartin Matuska 		ztest_raidz_expand_check(spa);
8508e716630dSMartin Matuska 	else
8509e716630dSMartin Matuska 		ztest_generic_run(zs, spa);
8510716fd348SMartin Matuska 
8511716fd348SMartin Matuska 	/* Kill the resume and deadman threads */
8512716fd348SMartin Matuska 	ztest_exiting = B_TRUE;
8513716fd348SMartin Matuska 	VERIFY0(thread_join(resume_thread));
8514716fd348SMartin Matuska 	VERIFY0(thread_join(deadman_thread));
8515716fd348SMartin Matuska 	ztest_resume(spa);
8516716fd348SMartin Matuska 
8517716fd348SMartin Matuska 	/*
8518716fd348SMartin Matuska 	 * Right before closing the pool, kick off a bunch of async I/O;
8519716fd348SMartin Matuska 	 * spa_close() should wait for it to complete.
8520716fd348SMartin Matuska 	 */
8521716fd348SMartin Matuska 	for (object = 1; object < 50; object++) {
8522716fd348SMartin Matuska 		dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
8523716fd348SMartin Matuska 		    ZIO_PRIORITY_SYNC_READ);
8524716fd348SMartin Matuska 	}
8525716fd348SMartin Matuska 
8526716fd348SMartin Matuska 	/* Verify that at least one commit cb was called in a timely fashion */
8527716fd348SMartin Matuska 	if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
8528716fd348SMartin Matuska 		VERIFY0(zc_min_txg_delay);
8529716fd348SMartin Matuska 
8530716fd348SMartin Matuska 	spa_close(spa, FTAG);
8531716fd348SMartin Matuska 
8532716fd348SMartin Matuska 	/*
8533716fd348SMartin Matuska 	 * Verify that we can loop over all pools.
8534716fd348SMartin Matuska 	 */
8535716fd348SMartin Matuska 	mutex_enter(&spa_namespace_lock);
8536716fd348SMartin Matuska 	for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
8537716fd348SMartin Matuska 		if (ztest_opts.zo_verbose > 3)
8538716fd348SMartin Matuska 			(void) printf("spa_next: found %s\n", spa_name(spa));
8539716fd348SMartin Matuska 	mutex_exit(&spa_namespace_lock);
8540716fd348SMartin Matuska 
8541716fd348SMartin Matuska 	/*
8542716fd348SMartin Matuska 	 * Verify that we can export the pool and reimport it under a
8543716fd348SMartin Matuska 	 * different name.
8544716fd348SMartin Matuska 	 */
8545716fd348SMartin Matuska 	if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) {
8546716fd348SMartin Matuska 		char name[ZFS_MAX_DATASET_NAME_LEN];
8547716fd348SMartin Matuska 		(void) snprintf(name, sizeof (name), "%s_import",
8548716fd348SMartin Matuska 		    ztest_opts.zo_pool);
8549716fd348SMartin Matuska 		ztest_spa_import_export(ztest_opts.zo_pool, name);
8550716fd348SMartin Matuska 		ztest_spa_import_export(name, ztest_opts.zo_pool);
8551716fd348SMartin Matuska 	}
8552716fd348SMartin Matuska 
8553716fd348SMartin Matuska 	kernel_fini();
8554716fd348SMartin Matuska 
8555716fd348SMartin Matuska 	list_destroy(&zcl.zcl_callbacks);
8556716fd348SMartin Matuska 	mutex_destroy(&zcl.zcl_callbacks_lock);
8557716fd348SMartin Matuska 	(void) pthread_rwlock_destroy(&ztest_name_lock);
8558716fd348SMartin Matuska 	mutex_destroy(&ztest_vdev_lock);
8559716fd348SMartin Matuska 	mutex_destroy(&ztest_checkpoint_lock);
8560716fd348SMartin Matuska }
8561716fd348SMartin Matuska 
8562716fd348SMartin Matuska static void
8563716fd348SMartin Matuska print_time(hrtime_t t, char *timebuf)
8564716fd348SMartin Matuska {
8565716fd348SMartin Matuska 	hrtime_t s = t / NANOSEC;
8566716fd348SMartin Matuska 	hrtime_t m = s / 60;
8567716fd348SMartin Matuska 	hrtime_t h = m / 60;
8568716fd348SMartin Matuska 	hrtime_t d = h / 24;
8569716fd348SMartin Matuska 
8570716fd348SMartin Matuska 	s -= m * 60;
8571716fd348SMartin Matuska 	m -= h * 60;
8572716fd348SMartin Matuska 	h -= d * 24;
8573716fd348SMartin Matuska 
8574716fd348SMartin Matuska 	timebuf[0] = '\0';
8575716fd348SMartin Matuska 
8576716fd348SMartin Matuska 	if (d)
8577716fd348SMartin Matuska 		(void) sprintf(timebuf,
8578716fd348SMartin Matuska 		    "%llud%02lluh%02llum%02llus", d, h, m, s);
8579716fd348SMartin Matuska 	else if (h)
8580716fd348SMartin Matuska 		(void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
8581716fd348SMartin Matuska 	else if (m)
8582716fd348SMartin Matuska 		(void) sprintf(timebuf, "%llum%02llus", m, s);
8583716fd348SMartin Matuska 	else
8584716fd348SMartin Matuska 		(void) sprintf(timebuf, "%llus", s);
8585716fd348SMartin Matuska }
8586716fd348SMartin Matuska 
8587716fd348SMartin Matuska static nvlist_t *
8588ce4dcb97SMartin Matuska make_random_pool_props(void)
8589716fd348SMartin Matuska {
8590716fd348SMartin Matuska 	nvlist_t *props;
8591716fd348SMartin Matuska 
8592716fd348SMartin Matuska 	props = fnvlist_alloc();
8593716fd348SMartin Matuska 
8594ce4dcb97SMartin Matuska 	/* Twenty percent of the time enable ZPOOL_PROP_DEDUP_TABLE_QUOTA */
8595ce4dcb97SMartin Matuska 	if (ztest_random(5) == 0) {
8596ce4dcb97SMartin Matuska 		fnvlist_add_uint64(props,
8597ce4dcb97SMartin Matuska 		    zpool_prop_to_name(ZPOOL_PROP_DEDUP_TABLE_QUOTA),
8598ce4dcb97SMartin Matuska 		    2 * 1024 * 1024);
8599ce4dcb97SMartin Matuska 	}
8600716fd348SMartin Matuska 
8601ce4dcb97SMartin Matuska 	/* Fifty percent of the time enable ZPOOL_PROP_AUTOREPLACE */
8602ce4dcb97SMartin Matuska 	if (ztest_random(2) == 0) {
8603716fd348SMartin Matuska 		fnvlist_add_uint64(props,
8604716fd348SMartin Matuska 		    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1);
8605ce4dcb97SMartin Matuska 	}
8606716fd348SMartin Matuska 
8607716fd348SMartin Matuska 	return (props);
8608716fd348SMartin Matuska }
8609716fd348SMartin Matuska 
8610716fd348SMartin Matuska /*
8611716fd348SMartin Matuska  * Create a storage pool with the given name and initial vdev size.
8612716fd348SMartin Matuska  * Then test spa_freeze() functionality.
8613716fd348SMartin Matuska  */
8614716fd348SMartin Matuska static void
8615716fd348SMartin Matuska ztest_init(ztest_shared_t *zs)
8616716fd348SMartin Matuska {
8617716fd348SMartin Matuska 	spa_t *spa;
8618716fd348SMartin Matuska 	nvlist_t *nvroot, *props;
8619716fd348SMartin Matuska 	int i;
8620716fd348SMartin Matuska 
8621716fd348SMartin Matuska 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
8622716fd348SMartin Matuska 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
8623716fd348SMartin Matuska 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
8624716fd348SMartin Matuska 
8625e716630dSMartin Matuska 	raidz_scratch_verify();
8626716fd348SMartin Matuska 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
8627716fd348SMartin Matuska 
8628716fd348SMartin Matuska 	/*
8629716fd348SMartin Matuska 	 * Create the storage pool.
8630716fd348SMartin Matuska 	 */
8631716fd348SMartin Matuska 	(void) spa_destroy(ztest_opts.zo_pool);
8632716fd348SMartin Matuska 	ztest_shared->zs_vdev_next_leaf = 0;
8633716fd348SMartin Matuska 	zs->zs_splits = 0;
8634716fd348SMartin Matuska 	zs->zs_mirrors = ztest_opts.zo_mirrors;
8635716fd348SMartin Matuska 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
8636716fd348SMartin Matuska 	    NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
8637ce4dcb97SMartin Matuska 	props = make_random_pool_props();
8638716fd348SMartin Matuska 
8639716fd348SMartin Matuska 	/*
8640716fd348SMartin Matuska 	 * We don't expect the pool to suspend unless maxfaults == 0,
8641716fd348SMartin Matuska 	 * in which case ztest_fault_inject() temporarily takes away
8642716fd348SMartin Matuska 	 * the only valid replica.
8643716fd348SMartin Matuska 	 */
8644716fd348SMartin Matuska 	fnvlist_add_uint64(props,
8645716fd348SMartin Matuska 	    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
8646716fd348SMartin Matuska 	    MAXFAULTS(zs) ? ZIO_FAILURE_MODE_PANIC : ZIO_FAILURE_MODE_WAIT);
8647716fd348SMartin Matuska 
8648716fd348SMartin Matuska 	for (i = 0; i < SPA_FEATURES; i++) {
8649716fd348SMartin Matuska 		char *buf;
8650716fd348SMartin Matuska 
8651716fd348SMartin Matuska 		if (!spa_feature_table[i].fi_zfs_mod_supported)
8652716fd348SMartin Matuska 			continue;
8653716fd348SMartin Matuska 
8654716fd348SMartin Matuska 		/*
8655716fd348SMartin Matuska 		 * 75% chance of using the log space map feature. We want ztest
8656716fd348SMartin Matuska 		 * to exercise both the code paths that use the log space map
8657716fd348SMartin Matuska 		 * feature and the ones that don't.
8658716fd348SMartin Matuska 		 */
8659716fd348SMartin Matuska 		if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
8660716fd348SMartin Matuska 			continue;
8661716fd348SMartin Matuska 
8662e2df9bb4SMartin Matuska 		/*
8663e2df9bb4SMartin Matuska 		 * split 50/50 between legacy and fast dedup
8664e2df9bb4SMartin Matuska 		 */
8665e2df9bb4SMartin Matuska 		if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
8666e2df9bb4SMartin Matuska 			continue;
8667e2df9bb4SMartin Matuska 
8668716fd348SMartin Matuska 		VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
8669716fd348SMartin Matuska 		    spa_feature_table[i].fi_uname));
8670716fd348SMartin Matuska 		fnvlist_add_uint64(props, buf, 0);
8671716fd348SMartin Matuska 		free(buf);
8672716fd348SMartin Matuska 	}
8673716fd348SMartin Matuska 
8674716fd348SMartin Matuska 	VERIFY0(spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
8675716fd348SMartin Matuska 	fnvlist_free(nvroot);
8676716fd348SMartin Matuska 	fnvlist_free(props);
8677716fd348SMartin Matuska 
8678716fd348SMartin Matuska 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
8679716fd348SMartin Matuska 	zs->zs_metaslab_sz =
8680716fd348SMartin Matuska 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
8681abcdc1b9SMartin Matuska 	zs->zs_guid = spa_guid(spa);
8682716fd348SMartin Matuska 	spa_close(spa, FTAG);
8683716fd348SMartin Matuska 
8684716fd348SMartin Matuska 	kernel_fini();
8685716fd348SMartin Matuska 
8686716fd348SMartin Matuska 	if (!ztest_opts.zo_mmp_test) {
8687abcdc1b9SMartin Matuska 		ztest_run_zdb(zs->zs_guid);
8688716fd348SMartin Matuska 		ztest_freeze();
8689abcdc1b9SMartin Matuska 		ztest_run_zdb(zs->zs_guid);
8690716fd348SMartin Matuska 	}
8691716fd348SMartin Matuska 
8692716fd348SMartin Matuska 	(void) pthread_rwlock_destroy(&ztest_name_lock);
8693716fd348SMartin Matuska 	mutex_destroy(&ztest_vdev_lock);
8694716fd348SMartin Matuska 	mutex_destroy(&ztest_checkpoint_lock);
8695716fd348SMartin Matuska }
8696716fd348SMartin Matuska 
8697716fd348SMartin Matuska static void
8698716fd348SMartin Matuska setup_data_fd(void)
8699716fd348SMartin Matuska {
8700716fd348SMartin Matuska 	static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
8701716fd348SMartin Matuska 
8702716fd348SMartin Matuska 	ztest_fd_data = mkstemp(ztest_name_data);
8703716fd348SMartin Matuska 	ASSERT3S(ztest_fd_data, >=, 0);
8704716fd348SMartin Matuska 	(void) unlink(ztest_name_data);
8705716fd348SMartin Matuska }
8706716fd348SMartin Matuska 
8707716fd348SMartin Matuska static int
8708716fd348SMartin Matuska shared_data_size(ztest_shared_hdr_t *hdr)
8709716fd348SMartin Matuska {
8710716fd348SMartin Matuska 	int size;
8711716fd348SMartin Matuska 
8712716fd348SMartin Matuska 	size = hdr->zh_hdr_size;
8713716fd348SMartin Matuska 	size += hdr->zh_opts_size;
8714716fd348SMartin Matuska 	size += hdr->zh_size;
8715716fd348SMartin Matuska 	size += hdr->zh_stats_size * hdr->zh_stats_count;
8716716fd348SMartin Matuska 	size += hdr->zh_ds_size * hdr->zh_ds_count;
8717e716630dSMartin Matuska 	size += hdr->zh_scratch_state_size;
8718716fd348SMartin Matuska 
8719716fd348SMartin Matuska 	return (size);
8720716fd348SMartin Matuska }
8721716fd348SMartin Matuska 
8722716fd348SMartin Matuska static void
8723716fd348SMartin Matuska setup_hdr(void)
8724716fd348SMartin Matuska {
8725716fd348SMartin Matuska 	int size;
8726716fd348SMartin Matuska 	ztest_shared_hdr_t *hdr;
8727716fd348SMartin Matuska 
8728716fd348SMartin Matuska 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
8729716fd348SMartin Matuska 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
8730716fd348SMartin Matuska 	ASSERT3P(hdr, !=, MAP_FAILED);
8731716fd348SMartin Matuska 
8732716fd348SMartin Matuska 	VERIFY0(ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
8733716fd348SMartin Matuska 
8734716fd348SMartin Matuska 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
8735716fd348SMartin Matuska 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
8736716fd348SMartin Matuska 	hdr->zh_size = sizeof (ztest_shared_t);
8737716fd348SMartin Matuska 	hdr->zh_stats_size = sizeof (ztest_shared_callstate_t);
8738716fd348SMartin Matuska 	hdr->zh_stats_count = ZTEST_FUNCS;
8739716fd348SMartin Matuska 	hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
8740716fd348SMartin Matuska 	hdr->zh_ds_count = ztest_opts.zo_datasets;
8741e716630dSMartin Matuska 	hdr->zh_scratch_state_size = sizeof (ztest_shared_scratch_state_t);
8742716fd348SMartin Matuska 
8743716fd348SMartin Matuska 	size = shared_data_size(hdr);
8744716fd348SMartin Matuska 	VERIFY0(ftruncate(ztest_fd_data, size));
8745716fd348SMartin Matuska 
8746716fd348SMartin Matuska 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
8747716fd348SMartin Matuska }
8748716fd348SMartin Matuska 
8749716fd348SMartin Matuska static void
8750716fd348SMartin Matuska setup_data(void)
8751716fd348SMartin Matuska {
8752716fd348SMartin Matuska 	int size, offset;
8753716fd348SMartin Matuska 	ztest_shared_hdr_t *hdr;
8754716fd348SMartin Matuska 	uint8_t *buf;
8755716fd348SMartin Matuska 
8756716fd348SMartin Matuska 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
8757716fd348SMartin Matuska 	    PROT_READ, MAP_SHARED, ztest_fd_data, 0);
8758716fd348SMartin Matuska 	ASSERT3P(hdr, !=, MAP_FAILED);
8759716fd348SMartin Matuska 
8760716fd348SMartin Matuska 	size = shared_data_size(hdr);
8761716fd348SMartin Matuska 
8762716fd348SMartin Matuska 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
8763716fd348SMartin Matuska 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
8764716fd348SMartin Matuska 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
8765716fd348SMartin Matuska 	ASSERT3P(hdr, !=, MAP_FAILED);
8766716fd348SMartin Matuska 	buf = (uint8_t *)hdr;
8767716fd348SMartin Matuska 
8768716fd348SMartin Matuska 	offset = hdr->zh_hdr_size;
8769716fd348SMartin Matuska 	ztest_shared_opts = (void *)&buf[offset];
8770716fd348SMartin Matuska 	offset += hdr->zh_opts_size;
8771716fd348SMartin Matuska 	ztest_shared = (void *)&buf[offset];
8772716fd348SMartin Matuska 	offset += hdr->zh_size;
8773716fd348SMartin Matuska 	ztest_shared_callstate = (void *)&buf[offset];
8774716fd348SMartin Matuska 	offset += hdr->zh_stats_size * hdr->zh_stats_count;
8775716fd348SMartin Matuska 	ztest_shared_ds = (void *)&buf[offset];
8776e716630dSMartin Matuska 	offset += hdr->zh_ds_size * hdr->zh_ds_count;
8777e716630dSMartin Matuska 	ztest_scratch_state = (void *)&buf[offset];
8778716fd348SMartin Matuska }
8779716fd348SMartin Matuska 
8780716fd348SMartin Matuska static boolean_t
8781716fd348SMartin Matuska exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp)
8782716fd348SMartin Matuska {
8783716fd348SMartin Matuska 	pid_t pid;
8784716fd348SMartin Matuska 	int status;
8785716fd348SMartin Matuska 	char *cmdbuf = NULL;
8786716fd348SMartin Matuska 
8787716fd348SMartin Matuska 	pid = fork();
8788716fd348SMartin Matuska 
8789716fd348SMartin Matuska 	if (cmd == NULL) {
8790716fd348SMartin Matuska 		cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
8791716fd348SMartin Matuska 		(void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN);
8792716fd348SMartin Matuska 		cmd = cmdbuf;
8793716fd348SMartin Matuska 	}
8794716fd348SMartin Matuska 
8795716fd348SMartin Matuska 	if (pid == -1)
8796716fd348SMartin Matuska 		fatal(B_TRUE, "fork failed");
8797716fd348SMartin Matuska 
8798716fd348SMartin Matuska 	if (pid == 0) {	/* child */
8799716fd348SMartin Matuska 		char fd_data_str[12];
8800716fd348SMartin Matuska 
8801716fd348SMartin Matuska 		VERIFY3S(11, >=,
8802716fd348SMartin Matuska 		    snprintf(fd_data_str, 12, "%d", ztest_fd_data));
8803716fd348SMartin Matuska 		VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1));
8804716fd348SMartin Matuska 
8805716fd348SMartin Matuska 		if (libpath != NULL) {
8806716fd348SMartin Matuska 			const char *curlp = getenv("LD_LIBRARY_PATH");
8807716fd348SMartin Matuska 			if (curlp == NULL)
8808716fd348SMartin Matuska 				VERIFY0(setenv("LD_LIBRARY_PATH", libpath, 1));
8809716fd348SMartin Matuska 			else {
8810716fd348SMartin Matuska 				char *newlp = NULL;
8811716fd348SMartin Matuska 				VERIFY3S(-1, !=,
8812716fd348SMartin Matuska 				    asprintf(&newlp, "%s:%s", libpath, curlp));
8813716fd348SMartin Matuska 				VERIFY0(setenv("LD_LIBRARY_PATH", newlp, 1));
8814c7046f76SMartin Matuska 				free(newlp);
8815716fd348SMartin Matuska 			}
8816716fd348SMartin Matuska 		}
8817716fd348SMartin Matuska 		(void) execl(cmd, cmd, (char *)NULL);
8818716fd348SMartin Matuska 		ztest_dump_core = B_FALSE;
8819716fd348SMartin Matuska 		fatal(B_TRUE, "exec failed: %s", cmd);
8820716fd348SMartin Matuska 	}
8821716fd348SMartin Matuska 
8822716fd348SMartin Matuska 	if (cmdbuf != NULL) {
8823716fd348SMartin Matuska 		umem_free(cmdbuf, MAXPATHLEN);
8824716fd348SMartin Matuska 		cmd = NULL;
8825716fd348SMartin Matuska 	}
8826716fd348SMartin Matuska 
8827716fd348SMartin Matuska 	while (waitpid(pid, &status, 0) != pid)
8828716fd348SMartin Matuska 		continue;
8829716fd348SMartin Matuska 	if (statusp != NULL)
8830716fd348SMartin Matuska 		*statusp = status;
8831716fd348SMartin Matuska 
8832716fd348SMartin Matuska 	if (WIFEXITED(status)) {
8833716fd348SMartin Matuska 		if (WEXITSTATUS(status) != 0) {
8834716fd348SMartin Matuska 			(void) fprintf(stderr, "child exited with code %d\n",
8835716fd348SMartin Matuska 			    WEXITSTATUS(status));
8836716fd348SMartin Matuska 			exit(2);
8837716fd348SMartin Matuska 		}
8838716fd348SMartin Matuska 		return (B_FALSE);
8839716fd348SMartin Matuska 	} else if (WIFSIGNALED(status)) {
8840716fd348SMartin Matuska 		if (!ignorekill || WTERMSIG(status) != SIGKILL) {
8841716fd348SMartin Matuska 			(void) fprintf(stderr, "child died with signal %d\n",
8842716fd348SMartin Matuska 			    WTERMSIG(status));
8843716fd348SMartin Matuska 			exit(3);
8844716fd348SMartin Matuska 		}
8845716fd348SMartin Matuska 		return (B_TRUE);
8846716fd348SMartin Matuska 	} else {
8847716fd348SMartin Matuska 		(void) fprintf(stderr, "something strange happened to child\n");
8848716fd348SMartin Matuska 		exit(4);
8849716fd348SMartin Matuska 	}
8850716fd348SMartin Matuska }
8851716fd348SMartin Matuska 
8852716fd348SMartin Matuska static void
8853716fd348SMartin Matuska ztest_run_init(void)
8854716fd348SMartin Matuska {
8855716fd348SMartin Matuska 	int i;
8856716fd348SMartin Matuska 
8857716fd348SMartin Matuska 	ztest_shared_t *zs = ztest_shared;
8858716fd348SMartin Matuska 
8859716fd348SMartin Matuska 	/*
8860716fd348SMartin Matuska 	 * Blow away any existing copy of zpool.cache
8861716fd348SMartin Matuska 	 */
8862716fd348SMartin Matuska 	(void) remove(spa_config_path);
8863716fd348SMartin Matuska 
8864716fd348SMartin Matuska 	if (ztest_opts.zo_init == 0) {
8865716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 1)
8866716fd348SMartin Matuska 			(void) printf("Importing pool %s\n",
8867716fd348SMartin Matuska 			    ztest_opts.zo_pool);
8868716fd348SMartin Matuska 		ztest_import(zs);
8869716fd348SMartin Matuska 		return;
8870716fd348SMartin Matuska 	}
8871716fd348SMartin Matuska 
8872716fd348SMartin Matuska 	/*
8873716fd348SMartin Matuska 	 * Create and initialize our storage pool.
8874716fd348SMartin Matuska 	 */
8875716fd348SMartin Matuska 	for (i = 1; i <= ztest_opts.zo_init; i++) {
8876716fd348SMartin Matuska 		memset(zs, 0, sizeof (*zs));
8877716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 3 &&
8878716fd348SMartin Matuska 		    ztest_opts.zo_init != 1) {
8879716fd348SMartin Matuska 			(void) printf("ztest_init(), pass %d\n", i);
8880716fd348SMartin Matuska 		}
8881716fd348SMartin Matuska 		ztest_init(zs);
8882716fd348SMartin Matuska 	}
8883716fd348SMartin Matuska }
8884716fd348SMartin Matuska 
8885716fd348SMartin Matuska int
8886716fd348SMartin Matuska main(int argc, char **argv)
8887716fd348SMartin Matuska {
8888716fd348SMartin Matuska 	int kills = 0;
8889716fd348SMartin Matuska 	int iters = 0;
8890716fd348SMartin Matuska 	int older = 0;
8891716fd348SMartin Matuska 	int newer = 0;
8892716fd348SMartin Matuska 	ztest_shared_t *zs;
8893716fd348SMartin Matuska 	ztest_info_t *zi;
8894716fd348SMartin Matuska 	ztest_shared_callstate_t *zc;
8895716fd348SMartin Matuska 	char timebuf[100];
8896716fd348SMartin Matuska 	char numbuf[NN_NUMBUF_SZ];
8897716fd348SMartin Matuska 	char *cmd;
8898716fd348SMartin Matuska 	boolean_t hasalt;
8899716fd348SMartin Matuska 	int f, err;
8900716fd348SMartin Matuska 	char *fd_data_str = getenv("ZTEST_FD_DATA");
8901716fd348SMartin Matuska 	struct sigaction action;
8902716fd348SMartin Matuska 
8903716fd348SMartin Matuska 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
8904716fd348SMartin Matuska 
8905716fd348SMartin Matuska 	dprintf_setup(&argc, argv);
8906716fd348SMartin Matuska 	zfs_deadman_synctime_ms = 300000;
8907716fd348SMartin Matuska 	zfs_deadman_checktime_ms = 30000;
8908716fd348SMartin Matuska 	/*
8909716fd348SMartin Matuska 	 * As two-word space map entries may not come up often (especially
8910716fd348SMartin Matuska 	 * if pool and vdev sizes are small) we want to force at least some
8911716fd348SMartin Matuska 	 * of them so the feature get tested.
8912716fd348SMartin Matuska 	 */
8913716fd348SMartin Matuska 	zfs_force_some_double_word_sm_entries = B_TRUE;
8914716fd348SMartin Matuska 
8915716fd348SMartin Matuska 	/*
8916716fd348SMartin Matuska 	 * Verify that even extensively damaged split blocks with many
8917716fd348SMartin Matuska 	 * segments can be reconstructed in a reasonable amount of time
8918716fd348SMartin Matuska 	 * when reconstruction is known to be possible.
8919716fd348SMartin Matuska 	 *
8920716fd348SMartin Matuska 	 * Note: the lower this value is, the more damage we inflict, and
8921716fd348SMartin Matuska 	 * the more time ztest spends in recovering that damage. We chose
8922716fd348SMartin Matuska 	 * to induce damage 1/100th of the time so recovery is tested but
8923716fd348SMartin Matuska 	 * not so frequently that ztest doesn't get to test other code paths.
8924716fd348SMartin Matuska 	 */
8925716fd348SMartin Matuska 	zfs_reconstruct_indirect_damage_fraction = 100;
8926716fd348SMartin Matuska 
8927716fd348SMartin Matuska 	action.sa_handler = sig_handler;
8928716fd348SMartin Matuska 	sigemptyset(&action.sa_mask);
8929716fd348SMartin Matuska 	action.sa_flags = 0;
8930716fd348SMartin Matuska 
8931716fd348SMartin Matuska 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
8932716fd348SMartin Matuska 		(void) fprintf(stderr, "ztest: cannot catch SIGSEGV: %s.\n",
8933716fd348SMartin Matuska 		    strerror(errno));
8934716fd348SMartin Matuska 		exit(EXIT_FAILURE);
8935716fd348SMartin Matuska 	}
8936716fd348SMartin Matuska 
8937716fd348SMartin Matuska 	if (sigaction(SIGABRT, &action, NULL) < 0) {
8938716fd348SMartin Matuska 		(void) fprintf(stderr, "ztest: cannot catch SIGABRT: %s.\n",
8939716fd348SMartin Matuska 		    strerror(errno));
8940716fd348SMartin Matuska 		exit(EXIT_FAILURE);
8941716fd348SMartin Matuska 	}
8942716fd348SMartin Matuska 
8943716fd348SMartin Matuska 	/*
8944716fd348SMartin Matuska 	 * Force random_get_bytes() to use /dev/urandom in order to prevent
8945716fd348SMartin Matuska 	 * ztest from needlessly depleting the system entropy pool.
8946716fd348SMartin Matuska 	 */
8947716fd348SMartin Matuska 	random_path = "/dev/urandom";
8948716fd348SMartin Matuska 	ztest_fd_rand = open(random_path, O_RDONLY | O_CLOEXEC);
8949716fd348SMartin Matuska 	ASSERT3S(ztest_fd_rand, >=, 0);
8950716fd348SMartin Matuska 
8951716fd348SMartin Matuska 	if (!fd_data_str) {
8952716fd348SMartin Matuska 		process_options(argc, argv);
8953716fd348SMartin Matuska 
8954716fd348SMartin Matuska 		setup_data_fd();
8955716fd348SMartin Matuska 		setup_hdr();
8956716fd348SMartin Matuska 		setup_data();
8957716fd348SMartin Matuska 		memcpy(ztest_shared_opts, &ztest_opts,
8958716fd348SMartin Matuska 		    sizeof (*ztest_shared_opts));
8959716fd348SMartin Matuska 	} else {
8960716fd348SMartin Matuska 		ztest_fd_data = atoi(fd_data_str);
8961716fd348SMartin Matuska 		setup_data();
8962716fd348SMartin Matuska 		memcpy(&ztest_opts, ztest_shared_opts, sizeof (ztest_opts));
8963716fd348SMartin Matuska 	}
8964716fd348SMartin Matuska 	ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
8965716fd348SMartin Matuska 
8966716fd348SMartin Matuska 	err = ztest_set_global_vars();
8967716fd348SMartin Matuska 	if (err != 0 && !fd_data_str) {
8968716fd348SMartin Matuska 		/* error message done by ztest_set_global_vars */
8969716fd348SMartin Matuska 		exit(EXIT_FAILURE);
8970716fd348SMartin Matuska 	} else {
8971716fd348SMartin Matuska 		/* children should not be spawned if setting gvars fails */
8972716fd348SMartin Matuska 		VERIFY3S(err, ==, 0);
8973716fd348SMartin Matuska 	}
8974716fd348SMartin Matuska 
8975716fd348SMartin Matuska 	/* Override location of zpool.cache */
8976716fd348SMartin Matuska 	VERIFY3S(asprintf((char **)&spa_config_path, "%s/zpool.cache",
8977716fd348SMartin Matuska 	    ztest_opts.zo_dir), !=, -1);
8978716fd348SMartin Matuska 
8979716fd348SMartin Matuska 	ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
8980716fd348SMartin Matuska 	    UMEM_NOFAIL);
8981716fd348SMartin Matuska 	zs = ztest_shared;
8982716fd348SMartin Matuska 
8983716fd348SMartin Matuska 	if (fd_data_str) {
8984716fd348SMartin Matuska 		metaslab_force_ganging = ztest_opts.zo_metaslab_force_ganging;
8985716fd348SMartin Matuska 		metaslab_df_alloc_threshold =
8986716fd348SMartin Matuska 		    zs->zs_metaslab_df_alloc_threshold;
8987716fd348SMartin Matuska 
8988716fd348SMartin Matuska 		if (zs->zs_do_init)
8989716fd348SMartin Matuska 			ztest_run_init();
8990716fd348SMartin Matuska 		else
8991716fd348SMartin Matuska 			ztest_run(zs);
8992716fd348SMartin Matuska 		exit(0);
8993716fd348SMartin Matuska 	}
8994716fd348SMartin Matuska 
8995716fd348SMartin Matuska 	hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0);
8996716fd348SMartin Matuska 
8997716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
8998716fd348SMartin Matuska 		(void) printf("%"PRIu64" vdevs, %d datasets, %d threads, "
8999e716630dSMartin Matuska 		    "%d %s disks, parity %d, %"PRIu64" seconds...\n\n",
9000716fd348SMartin Matuska 		    ztest_opts.zo_vdevs,
9001716fd348SMartin Matuska 		    ztest_opts.zo_datasets,
9002716fd348SMartin Matuska 		    ztest_opts.zo_threads,
9003716fd348SMartin Matuska 		    ztest_opts.zo_raid_children,
9004716fd348SMartin Matuska 		    ztest_opts.zo_raid_type,
9005e716630dSMartin Matuska 		    ztest_opts.zo_raid_parity,
9006716fd348SMartin Matuska 		    ztest_opts.zo_time);
9007716fd348SMartin Matuska 	}
9008716fd348SMartin Matuska 
9009716fd348SMartin Matuska 	cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
9010716fd348SMartin Matuska 	(void) strlcpy(cmd, getexecname(), MAXNAMELEN);
9011716fd348SMartin Matuska 
9012716fd348SMartin Matuska 	zs->zs_do_init = B_TRUE;
9013716fd348SMartin Matuska 	if (strlen(ztest_opts.zo_alt_ztest) != 0) {
9014716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 1) {
9015716fd348SMartin Matuska 			(void) printf("Executing older ztest for "
9016716fd348SMartin Matuska 			    "initialization: %s\n", ztest_opts.zo_alt_ztest);
9017716fd348SMartin Matuska 		}
9018716fd348SMartin Matuska 		VERIFY(!exec_child(ztest_opts.zo_alt_ztest,
9019716fd348SMartin Matuska 		    ztest_opts.zo_alt_libpath, B_FALSE, NULL));
9020716fd348SMartin Matuska 	} else {
9021716fd348SMartin Matuska 		VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL));
9022716fd348SMartin Matuska 	}
9023716fd348SMartin Matuska 	zs->zs_do_init = B_FALSE;
9024716fd348SMartin Matuska 
9025716fd348SMartin Matuska 	zs->zs_proc_start = gethrtime();
9026716fd348SMartin Matuska 	zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC;
9027716fd348SMartin Matuska 
9028716fd348SMartin Matuska 	for (f = 0; f < ZTEST_FUNCS; f++) {
9029716fd348SMartin Matuska 		zi = &ztest_info[f];
9030716fd348SMartin Matuska 		zc = ZTEST_GET_SHARED_CALLSTATE(f);
9031716fd348SMartin Matuska 		if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop)
9032716fd348SMartin Matuska 			zc->zc_next = UINT64_MAX;
9033716fd348SMartin Matuska 		else
9034716fd348SMartin Matuska 			zc->zc_next = zs->zs_proc_start +
9035716fd348SMartin Matuska 			    ztest_random(2 * zi->zi_interval[0] + 1);
9036716fd348SMartin Matuska 	}
9037716fd348SMartin Matuska 
9038716fd348SMartin Matuska 	/*
9039716fd348SMartin Matuska 	 * Run the tests in a loop.  These tests include fault injection
9040716fd348SMartin Matuska 	 * to verify that self-healing data works, and forced crashes
9041716fd348SMartin Matuska 	 * to verify that we never lose on-disk consistency.
9042716fd348SMartin Matuska 	 */
9043716fd348SMartin Matuska 	while (gethrtime() < zs->zs_proc_stop) {
9044716fd348SMartin Matuska 		int status;
9045716fd348SMartin Matuska 		boolean_t killed;
9046716fd348SMartin Matuska 
9047716fd348SMartin Matuska 		/*
9048716fd348SMartin Matuska 		 * Initialize the workload counters for each function.
9049716fd348SMartin Matuska 		 */
9050716fd348SMartin Matuska 		for (f = 0; f < ZTEST_FUNCS; f++) {
9051716fd348SMartin Matuska 			zc = ZTEST_GET_SHARED_CALLSTATE(f);
9052716fd348SMartin Matuska 			zc->zc_count = 0;
9053716fd348SMartin Matuska 			zc->zc_time = 0;
9054716fd348SMartin Matuska 		}
9055716fd348SMartin Matuska 
9056716fd348SMartin Matuska 		/* Set the allocation switch size */
9057716fd348SMartin Matuska 		zs->zs_metaslab_df_alloc_threshold =
9058716fd348SMartin Matuska 		    ztest_random(zs->zs_metaslab_sz / 4) + 1;
9059716fd348SMartin Matuska 
9060716fd348SMartin Matuska 		if (!hasalt || ztest_random(2) == 0) {
9061716fd348SMartin Matuska 			if (hasalt && ztest_opts.zo_verbose >= 1) {
9062716fd348SMartin Matuska 				(void) printf("Executing newer ztest: %s\n",
9063716fd348SMartin Matuska 				    cmd);
9064716fd348SMartin Matuska 			}
9065716fd348SMartin Matuska 			newer++;
9066716fd348SMartin Matuska 			killed = exec_child(cmd, NULL, B_TRUE, &status);
9067716fd348SMartin Matuska 		} else {
9068716fd348SMartin Matuska 			if (hasalt && ztest_opts.zo_verbose >= 1) {
9069716fd348SMartin Matuska 				(void) printf("Executing older ztest: %s\n",
9070716fd348SMartin Matuska 				    ztest_opts.zo_alt_ztest);
9071716fd348SMartin Matuska 			}
9072716fd348SMartin Matuska 			older++;
9073716fd348SMartin Matuska 			killed = exec_child(ztest_opts.zo_alt_ztest,
9074716fd348SMartin Matuska 			    ztest_opts.zo_alt_libpath, B_TRUE, &status);
9075716fd348SMartin Matuska 		}
9076716fd348SMartin Matuska 
9077716fd348SMartin Matuska 		if (killed)
9078716fd348SMartin Matuska 			kills++;
9079716fd348SMartin Matuska 		iters++;
9080716fd348SMartin Matuska 
9081716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 1) {
9082716fd348SMartin Matuska 			hrtime_t now = gethrtime();
9083716fd348SMartin Matuska 
9084716fd348SMartin Matuska 			now = MIN(now, zs->zs_proc_stop);
9085716fd348SMartin Matuska 			print_time(zs->zs_proc_stop - now, timebuf);
9086716fd348SMartin Matuska 			nicenum(zs->zs_space, numbuf, sizeof (numbuf));
9087716fd348SMartin Matuska 
9088716fd348SMartin Matuska 			(void) printf("Pass %3d, %8s, %3"PRIu64" ENOSPC, "
9089716fd348SMartin Matuska 			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
9090716fd348SMartin Matuska 			    iters,
9091716fd348SMartin Matuska 			    WIFEXITED(status) ? "Complete" : "SIGKILL",
9092716fd348SMartin Matuska 			    zs->zs_enospc_count,
9093716fd348SMartin Matuska 			    100.0 * zs->zs_alloc / zs->zs_space,
9094716fd348SMartin Matuska 			    numbuf,
9095716fd348SMartin Matuska 			    100.0 * (now - zs->zs_proc_start) /
9096716fd348SMartin Matuska 			    (ztest_opts.zo_time * NANOSEC), timebuf);
9097716fd348SMartin Matuska 		}
9098716fd348SMartin Matuska 
9099716fd348SMartin Matuska 		if (ztest_opts.zo_verbose >= 2) {
9100716fd348SMartin Matuska 			(void) printf("\nWorkload summary:\n\n");
9101716fd348SMartin Matuska 			(void) printf("%7s %9s   %s\n",
9102716fd348SMartin Matuska 			    "Calls", "Time", "Function");
9103716fd348SMartin Matuska 			(void) printf("%7s %9s   %s\n",
9104716fd348SMartin Matuska 			    "-----", "----", "--------");
9105716fd348SMartin Matuska 			for (f = 0; f < ZTEST_FUNCS; f++) {
9106716fd348SMartin Matuska 				zi = &ztest_info[f];
9107716fd348SMartin Matuska 				zc = ZTEST_GET_SHARED_CALLSTATE(f);
9108716fd348SMartin Matuska 				print_time(zc->zc_time, timebuf);
9109716fd348SMartin Matuska 				(void) printf("%7"PRIu64" %9s   %s\n",
9110716fd348SMartin Matuska 				    zc->zc_count, timebuf,
9111716fd348SMartin Matuska 				    zi->zi_funcname);
9112716fd348SMartin Matuska 			}
9113716fd348SMartin Matuska 			(void) printf("\n");
9114716fd348SMartin Matuska 		}
9115716fd348SMartin Matuska 
9116716fd348SMartin Matuska 		if (!ztest_opts.zo_mmp_test)
9117abcdc1b9SMartin Matuska 			ztest_run_zdb(zs->zs_guid);
9118e716630dSMartin Matuska 		if (ztest_shared_opts->zo_raidz_expand_test ==
9119e716630dSMartin Matuska 		    RAIDZ_EXPAND_CHECKED)
9120e716630dSMartin Matuska 			break; /* raidz expand test complete */
9121716fd348SMartin Matuska 	}
9122716fd348SMartin Matuska 
9123716fd348SMartin Matuska 	if (ztest_opts.zo_verbose >= 1) {
9124716fd348SMartin Matuska 		if (hasalt) {
9125716fd348SMartin Matuska 			(void) printf("%d runs of older ztest: %s\n", older,
9126716fd348SMartin Matuska 			    ztest_opts.zo_alt_ztest);
9127716fd348SMartin Matuska 			(void) printf("%d runs of newer ztest: %s\n", newer,
9128716fd348SMartin Matuska 			    cmd);
9129716fd348SMartin Matuska 		}
9130716fd348SMartin Matuska 		(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
9131716fd348SMartin Matuska 		    kills, iters - kills, (100.0 * kills) / MAX(1, iters));
9132716fd348SMartin Matuska 	}
9133716fd348SMartin Matuska 
9134716fd348SMartin Matuska 	umem_free(cmd, MAXNAMELEN);
9135716fd348SMartin Matuska 
9136716fd348SMartin Matuska 	return (0);
9137716fd348SMartin Matuska }
9138