xref: /spdk/app/fio/bdev/fio_plugin.c (revision cec5ba284b55d19c90359936d77b707e398829f7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "spdk/bdev.h"
10 #include "spdk/bdev_zone.h"
11 #include "spdk/accel.h"
12 #include "spdk/env.h"
13 #include "spdk/file.h"
14 #include "spdk/init.h"
15 #include "spdk/thread.h"
16 #include "spdk/log.h"
17 #include "spdk/string.h"
18 #include "spdk/queue.h"
19 #include "spdk/util.h"
20 #include "spdk/rpc.h"
21 
22 #include "spdk_internal/event.h"
23 
24 #include "config-host.h"
25 #include "fio.h"
26 #include "optgroup.h"
27 
28 #ifdef for_each_rw_ddir
29 #define FIO_HAS_ZBD (FIO_IOOPS_VERSION >= 26)
30 #else
31 #define FIO_HAS_ZBD (0)
32 #endif
33 
34 /* FreeBSD is missing CLOCK_MONOTONIC_RAW,
35  * so alternative is provided. */
36 #ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
37 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
38 #endif
39 
40 struct spdk_fio_options {
41 	void *pad;
42 	char *conf;
43 	char *json_conf;
44 	char *env_context;
45 	char *log_flags;
46 	unsigned mem_mb;
47 	int mem_single_seg;
48 	int initial_zone_reset;
49 	int zone_append;
50 	char *rpc_listen_addr;
51 };
52 
53 struct spdk_fio_request {
54 	struct io_u		*io;
55 	struct thread_data	*td;
56 };
57 
58 struct spdk_fio_target {
59 	struct spdk_bdev	*bdev;
60 	struct spdk_bdev_desc	*desc;
61 	struct spdk_io_channel	*ch;
62 	bool zone_append_enabled;
63 
64 	TAILQ_ENTRY(spdk_fio_target) link;
65 };
66 
67 struct spdk_fio_thread {
68 	struct thread_data		*td; /* fio thread context */
69 	struct spdk_thread		*thread; /* spdk thread context */
70 
71 	TAILQ_HEAD(, spdk_fio_target)	targets;
72 	bool				failed; /* true if the thread failed to initialize */
73 
74 	struct io_u		**iocq;		/* io completion queue */
75 	unsigned int		iocq_count;	/* number of iocq entries filled by last getevents */
76 	unsigned int		iocq_size;	/* number of iocq entries allocated */
77 
78 	TAILQ_ENTRY(spdk_fio_thread)	link;
79 };
80 
81 struct spdk_fio_zone_cb_arg {
82 	struct spdk_fio_target *target;
83 	struct spdk_bdev_zone_info *spdk_zones;
84 	int completed;
85 	uint64_t offset_blocks;
86 	struct zbd_zone *fio_zones;
87 	unsigned int nr_zones;
88 };
89 
90 /* On App Thread (oat) context used for making sync calls from async calls. */
91 struct spdk_fio_oat_ctx {
92 	union {
93 		struct spdk_fio_setup_args {
94 			struct thread_data *td;
95 		} sa;
96 		struct spdk_fio_bdev_get_zoned_model_args {
97 			struct fio_file *f;
98 			enum zbd_zoned_model *model;
99 		} zma;
100 		struct spdk_fio_bdev_get_max_open_zones_args {
101 			struct fio_file *f;
102 			unsigned int *max_open_zones;
103 		} moza;
104 	} u;
105 	pthread_mutex_t mutex;
106 	pthread_cond_t cond;
107 	int ret;
108 };
109 
110 static bool g_spdk_env_initialized = false;
111 static const char *g_json_config_file = NULL;
112 static void *g_json_data;
113 static size_t g_json_data_size;
114 static const char *g_rpc_listen_addr = NULL;
115 
116 static int spdk_fio_init(struct thread_data *td);
117 static void spdk_fio_cleanup(struct thread_data *td);
118 static size_t spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread);
119 static int spdk_fio_handle_options(struct thread_data *td, struct fio_file *f,
120 				   struct spdk_bdev *bdev);
121 static int spdk_fio_handle_options_per_target(struct thread_data *td, struct fio_file *f);
122 static void spdk_fio_setup_oat(void *ctx);
123 
124 static pthread_t g_init_thread_id = 0;
125 static pthread_mutex_t g_init_mtx = PTHREAD_MUTEX_INITIALIZER;
126 static pthread_cond_t g_init_cond;
127 static bool g_poll_loop = true;
128 static TAILQ_HEAD(, spdk_fio_thread) g_threads = TAILQ_HEAD_INITIALIZER(g_threads);
129 
130 /* Default polling timeout (ns) */
131 #define SPDK_FIO_POLLING_TIMEOUT 1000000000ULL
132 
133 static __thread bool g_internal_thread = false;
134 
135 /* Run msg_fn on app thread ("oat") and wait for it to call spdk_fio_wake_oat_waiter() */
136 static void
137 spdk_fio_sync_run_oat(void (*msg_fn)(void *), struct spdk_fio_oat_ctx *ctx)
138 {
139 	assert(!spdk_thread_is_app_thread(NULL));
140 
141 	pthread_mutex_init(&ctx->mutex, NULL);
142 	pthread_cond_init(&ctx->cond, NULL);
143 	pthread_mutex_lock(&ctx->mutex);
144 
145 	spdk_thread_send_msg(spdk_thread_get_app_thread(), msg_fn, ctx);
146 
147 	/* Wake up the poll loop in spdk_init_thread_poll() */
148 	pthread_mutex_lock(&g_init_mtx);
149 	pthread_cond_signal(&g_init_cond);
150 	pthread_mutex_unlock(&g_init_mtx);
151 
152 	/* Wait for msg_fn() to call spdk_fio_wake_oat_waiter() */
153 	pthread_cond_wait(&ctx->cond, &ctx->mutex);
154 	pthread_mutex_unlock(&ctx->mutex);
155 
156 	pthread_mutex_destroy(&ctx->mutex);
157 	pthread_cond_destroy(&ctx->cond);
158 }
159 
160 static void
161 spdk_fio_wake_oat_waiter(struct spdk_fio_oat_ctx *ctx)
162 {
163 	pthread_mutex_lock(&ctx->mutex);
164 	pthread_cond_signal(&ctx->cond);
165 	pthread_mutex_unlock(&ctx->mutex);
166 }
167 
168 static int
169 spdk_fio_schedule_thread(struct spdk_thread *thread)
170 {
171 	struct spdk_fio_thread *fio_thread;
172 
173 	if (g_internal_thread) {
174 		/* Do nothing. */
175 		return 0;
176 	}
177 
178 	fio_thread = spdk_thread_get_ctx(thread);
179 
180 	pthread_mutex_lock(&g_init_mtx);
181 	TAILQ_INSERT_TAIL(&g_threads, fio_thread, link);
182 	pthread_mutex_unlock(&g_init_mtx);
183 
184 	return 0;
185 }
186 
187 static int
188 spdk_fio_init_thread(struct thread_data *td)
189 {
190 	struct spdk_fio_thread *fio_thread;
191 	struct spdk_thread *thread;
192 
193 	g_internal_thread = true;
194 	thread = spdk_thread_create("fio_thread", NULL);
195 	g_internal_thread = false;
196 	if (!thread) {
197 		SPDK_ERRLOG("failed to allocate thread\n");
198 		return -1;
199 	}
200 
201 	fio_thread = spdk_thread_get_ctx(thread);
202 	fio_thread->td = td;
203 	fio_thread->thread = thread;
204 	td->io_ops_data = fio_thread;
205 
206 	spdk_set_thread(thread);
207 
208 	fio_thread->iocq_size = td->o.iodepth;
209 	fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *));
210 	assert(fio_thread->iocq != NULL);
211 
212 	TAILQ_INIT(&fio_thread->targets);
213 
214 	return 0;
215 }
216 
217 static void
218 spdk_fio_bdev_close_targets(void *arg)
219 {
220 	struct spdk_fio_thread *fio_thread = arg;
221 	struct spdk_fio_target *target, *tmp;
222 
223 	TAILQ_FOREACH_SAFE(target, &fio_thread->targets, link, tmp) {
224 		TAILQ_REMOVE(&fio_thread->targets, target, link);
225 		spdk_put_io_channel(target->ch);
226 		spdk_bdev_close(target->desc);
227 		free(target);
228 	}
229 }
230 
231 static void
232 spdk_fio_cleanup_thread(struct spdk_fio_thread *fio_thread)
233 {
234 	spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_close_targets, fio_thread);
235 
236 	pthread_mutex_lock(&g_init_mtx);
237 	TAILQ_INSERT_TAIL(&g_threads, fio_thread, link);
238 	pthread_mutex_unlock(&g_init_mtx);
239 }
240 
241 static void
242 spdk_fio_calc_timeout(struct spdk_fio_thread *fio_thread, struct timespec *ts)
243 {
244 	uint64_t timeout, now;
245 
246 	if (spdk_thread_has_active_pollers(fio_thread->thread)) {
247 		return;
248 	}
249 
250 	timeout = spdk_thread_next_poller_expiration(fio_thread->thread);
251 	now = spdk_get_ticks();
252 
253 	if (timeout == 0) {
254 		timeout = now + (SPDK_FIO_POLLING_TIMEOUT * spdk_get_ticks_hz()) / SPDK_SEC_TO_NSEC;
255 	}
256 
257 	if (timeout > now) {
258 		timeout = ((timeout - now) * SPDK_SEC_TO_NSEC) / spdk_get_ticks_hz() +
259 			  ts->tv_sec * SPDK_SEC_TO_NSEC + ts->tv_nsec;
260 
261 		ts->tv_sec  = timeout / SPDK_SEC_TO_NSEC;
262 		ts->tv_nsec = timeout % SPDK_SEC_TO_NSEC;
263 	}
264 }
265 
266 static void
267 spdk_fio_bdev_init_done(int rc, void *cb_arg)
268 {
269 	*(bool *)cb_arg = true;
270 
271 	free(g_json_data);
272 	if (rc) {
273 		SPDK_ERRLOG("RUNTIME RPCs failed\n");
274 		exit(1);
275 	}
276 }
277 
278 static void
279 spdk_fio_bdev_subsystem_init_done(int rc, void *cb_arg)
280 {
281 	if (rc) {
282 		SPDK_ERRLOG("subsystem init failed\n");
283 		exit(1);
284 	}
285 
286 	spdk_rpc_set_state(SPDK_RPC_RUNTIME);
287 	spdk_subsystem_load_config(g_json_data, g_json_data_size,
288 				   spdk_fio_bdev_init_done, cb_arg, true);
289 }
290 
291 static void
292 spdk_fio_bdev_startup_done(int rc, void *cb_arg)
293 {
294 	if (rc) {
295 		SPDK_ERRLOG("STARTUP RPCs failed\n");
296 		exit(1);
297 	}
298 
299 	if (g_rpc_listen_addr != NULL) {
300 		if (spdk_rpc_initialize(g_rpc_listen_addr, NULL) != 0) {
301 			SPDK_ERRLOG("could not initialize RPC address %s\n", g_rpc_listen_addr);
302 			exit(1);
303 		}
304 	}
305 
306 	spdk_subsystem_init(spdk_fio_bdev_subsystem_init_done, cb_arg);
307 }
308 
309 static void
310 spdk_fio_bdev_init_start(void *arg)
311 {
312 	bool *done = arg;
313 
314 	g_json_data = spdk_posix_file_load_from_name(g_json_config_file, &g_json_data_size);
315 
316 	if (g_json_data == NULL) {
317 		SPDK_ERRLOG("could not allocate buffer for json config file\n");
318 		exit(1);
319 	}
320 
321 	/* Load SPDK_RPC_STARTUP RPCs from config file */
322 	assert(spdk_rpc_get_state() == SPDK_RPC_STARTUP);
323 	spdk_subsystem_load_config(g_json_data, g_json_data_size,
324 				   spdk_fio_bdev_startup_done, done, true);
325 }
326 
327 static void
328 spdk_fio_bdev_fini_done(void *cb_arg)
329 {
330 	*(bool *)cb_arg = true;
331 
332 	spdk_rpc_finish();
333 }
334 
335 static void
336 spdk_fio_bdev_fini_start(void *arg)
337 {
338 	bool *done = arg;
339 
340 	spdk_subsystem_fini(spdk_fio_bdev_fini_done, done);
341 }
342 
343 static void *
344 spdk_init_thread_poll(void *arg)
345 {
346 	struct spdk_fio_options		*eo = arg;
347 	struct spdk_fio_thread		*fio_thread;
348 	struct spdk_fio_thread		*thread, *tmp;
349 	struct spdk_env_opts		opts;
350 	bool				done;
351 	int				rc;
352 	struct timespec			ts;
353 	struct thread_data		td = {};
354 
355 	/* Create a dummy thread data for use on the initialization thread. */
356 	td.o.iodepth = 32;
357 	td.eo = eo;
358 
359 	/* Parse the SPDK configuration file */
360 	eo = arg;
361 
362 	if (eo->conf && eo->json_conf) {
363 		SPDK_ERRLOG("Cannot provide two types of configuration files\n");
364 		rc = EINVAL;
365 		goto err_exit;
366 	} else if (eo->conf && strlen(eo->conf)) {
367 		g_json_config_file = eo->conf;
368 	} else if (eo->json_conf && strlen(eo->json_conf)) {
369 		g_json_config_file = eo->json_conf;
370 	} else {
371 		SPDK_ERRLOG("No configuration file provided\n");
372 		rc = EINVAL;
373 		goto err_exit;
374 	}
375 
376 	/* Initialize the RPC listen address */
377 	if (eo->rpc_listen_addr) {
378 		g_rpc_listen_addr = eo->rpc_listen_addr;
379 	}
380 
381 	/* Initialize the environment library */
382 	opts.opts_size = sizeof(opts);
383 	spdk_env_opts_init(&opts);
384 	opts.name = "fio";
385 
386 	if (eo->mem_mb) {
387 		opts.mem_size = eo->mem_mb;
388 	}
389 	opts.hugepage_single_segments = eo->mem_single_seg;
390 	if (eo->env_context) {
391 		opts.env_context = eo->env_context;
392 	}
393 
394 	if (spdk_env_init(&opts) < 0) {
395 		SPDK_ERRLOG("Unable to initialize SPDK env\n");
396 		rc = EINVAL;
397 		goto err_exit;
398 	}
399 	spdk_unaffinitize_thread();
400 
401 	if (eo->log_flags) {
402 		char *sp = NULL;
403 		char *tok = strtok_r(eo->log_flags, ",", &sp);
404 		do {
405 			rc = spdk_log_set_flag(tok);
406 			if (rc < 0) {
407 				SPDK_ERRLOG("unknown spdk log flag %s\n", tok);
408 				rc = EINVAL;
409 				goto err_exit;
410 			}
411 		} while ((tok = strtok_r(NULL, ",", &sp)) != NULL);
412 #ifdef DEBUG
413 		spdk_log_set_print_level(SPDK_LOG_DEBUG);
414 #endif
415 	}
416 
417 	spdk_thread_lib_init(spdk_fio_schedule_thread, sizeof(struct spdk_fio_thread));
418 
419 	/* Create an SPDK thread temporarily */
420 	rc = spdk_fio_init_thread(&td);
421 	if (rc < 0) {
422 		SPDK_ERRLOG("Failed to create initialization thread\n");
423 		goto err_exit;
424 	}
425 
426 	fio_thread = td.io_ops_data;
427 
428 	/* Initialize the bdev layer */
429 	done = false;
430 	spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_init_start, &done);
431 
432 	do {
433 		spdk_fio_poll_thread(fio_thread);
434 	} while (!done);
435 
436 	/*
437 	 * Continue polling until there are no more events.
438 	 * This handles any final events posted by pollers.
439 	 */
440 	while (spdk_fio_poll_thread(fio_thread) > 0) {};
441 
442 	/* Set condition variable */
443 	pthread_mutex_lock(&g_init_mtx);
444 	pthread_cond_signal(&g_init_cond);
445 
446 	pthread_mutex_unlock(&g_init_mtx);
447 
448 	while (g_poll_loop) {
449 		spdk_fio_poll_thread(fio_thread);
450 
451 		pthread_mutex_lock(&g_init_mtx);
452 		if (!TAILQ_EMPTY(&g_threads)) {
453 			TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) {
454 				if (spdk_thread_is_exited(thread->thread)) {
455 					TAILQ_REMOVE(&g_threads, thread, link);
456 					free(thread->iocq);
457 					spdk_thread_destroy(thread->thread);
458 				} else {
459 					spdk_fio_poll_thread(thread);
460 				}
461 			}
462 
463 			/* If there are exiting threads to poll, don't sleep. */
464 			pthread_mutex_unlock(&g_init_mtx);
465 			continue;
466 		}
467 
468 		/* Figure out how long to sleep. */
469 		clock_gettime(CLOCK_MONOTONIC, &ts);
470 		spdk_fio_calc_timeout(fio_thread, &ts);
471 
472 		rc = pthread_cond_timedwait(&g_init_cond, &g_init_mtx, &ts);
473 		pthread_mutex_unlock(&g_init_mtx);
474 
475 		if (rc != 0 && rc != ETIMEDOUT) {
476 			break;
477 		}
478 	}
479 
480 	spdk_fio_cleanup_thread(fio_thread);
481 
482 	/* Finalize the bdev layer */
483 	done = false;
484 	spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_fini_start, &done);
485 
486 	do {
487 		spdk_fio_poll_thread(fio_thread);
488 
489 		TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) {
490 			spdk_fio_poll_thread(thread);
491 		}
492 	} while (!done);
493 
494 	/* Now exit all the threads */
495 	TAILQ_FOREACH(thread, &g_threads, link) {
496 		spdk_set_thread(thread->thread);
497 		spdk_thread_exit(thread->thread);
498 		spdk_set_thread(NULL);
499 	}
500 
501 	/* And wait for them to gracefully exit */
502 	while (!TAILQ_EMPTY(&g_threads)) {
503 		TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) {
504 			if (spdk_thread_is_exited(thread->thread)) {
505 				TAILQ_REMOVE(&g_threads, thread, link);
506 				free(thread->iocq);
507 				spdk_thread_destroy(thread->thread);
508 			} else {
509 				spdk_thread_poll(thread->thread, 0, 0);
510 			}
511 		}
512 	}
513 
514 	pthread_exit(NULL);
515 
516 err_exit:
517 	exit(rc);
518 	return NULL;
519 }
520 
521 static int
522 spdk_fio_init_env(struct thread_data *td)
523 {
524 	pthread_condattr_t attr;
525 	int rc = -1;
526 
527 	if (pthread_condattr_init(&attr)) {
528 		SPDK_ERRLOG("Unable to initialize condition variable\n");
529 		return -1;
530 	}
531 
532 	if (pthread_condattr_setclock(&attr, CLOCK_MONOTONIC)) {
533 		SPDK_ERRLOG("Unable to initialize condition variable\n");
534 		goto out;
535 	}
536 
537 	if (pthread_cond_init(&g_init_cond, &attr)) {
538 		SPDK_ERRLOG("Unable to initialize condition variable\n");
539 		goto out;
540 	}
541 
542 	/*
543 	 * Spawn a thread to handle initialization operations and to poll things
544 	 * like the admin queues periodically.
545 	 */
546 	rc = pthread_create(&g_init_thread_id, NULL, &spdk_init_thread_poll, td->eo);
547 	if (rc != 0) {
548 		SPDK_ERRLOG("Unable to spawn thread to poll admin queue. It won't be polled.\n");
549 	}
550 
551 	/* Wait for background thread to advance past the initialization */
552 	pthread_mutex_lock(&g_init_mtx);
553 	pthread_cond_wait(&g_init_cond, &g_init_mtx);
554 	pthread_mutex_unlock(&g_init_mtx);
555 out:
556 	pthread_condattr_destroy(&attr);
557 	return rc;
558 }
559 
560 static bool
561 fio_redirected_to_dev_null(void)
562 {
563 	char path[PATH_MAX] = "";
564 	ssize_t ret;
565 
566 	ret = readlink("/proc/self/fd/1", path, sizeof(path));
567 
568 	if (ret == -1 || strcmp(path, "/dev/null") != 0) {
569 		return false;
570 	}
571 
572 	ret = readlink("/proc/self/fd/2", path, sizeof(path));
573 
574 	if (ret == -1 || strcmp(path, "/dev/null") != 0) {
575 		return false;
576 	}
577 
578 	return true;
579 }
580 
581 static int
582 spdk_fio_init_spdk_env(struct thread_data *td)
583 {
584 	static pthread_mutex_t setup_lock = PTHREAD_MUTEX_INITIALIZER;
585 
586 	pthread_mutex_lock(&setup_lock);
587 	if (!g_spdk_env_initialized) {
588 		if (spdk_fio_init_env(td)) {
589 			pthread_mutex_unlock(&setup_lock);
590 			SPDK_ERRLOG("failed to initialize\n");
591 			return -1;
592 		}
593 
594 		g_spdk_env_initialized = true;
595 	}
596 	pthread_mutex_unlock(&setup_lock);
597 
598 	return 0;
599 }
600 
601 /* Called for each thread to fill in the 'real_file_size' member for
602  * each file associated with this thread. This is called prior to
603  * the init operation (spdk_fio_init()) below. This call will occur
604  * on the initial start up thread if 'create_serialize' is true, or
605  * on the thread actually associated with 'thread_data' if 'create_serialize'
606  * is false.
607  */
608 static int
609 spdk_fio_setup(struct thread_data *td)
610 {
611 	struct spdk_fio_oat_ctx ctx = { 0 };
612 
613 	/*
614 	 * If we're running in a daemonized FIO instance, it's possible
615 	 * fd 1/2 were re-used for something important by FIO. Newer fio
616 	 * versions are careful to redirect those to /dev/null, but if we're
617 	 * not, we'll abort early, so we don't accidentally write messages to
618 	 * an important file, etc.
619 	 */
620 	if (is_backend && !fio_redirected_to_dev_null()) {
621 		char buf[1024];
622 		snprintf(buf, sizeof(buf),
623 			 "SPDK FIO plugin is in daemon mode, but stdout/stderr "
624 			 "aren't redirected to /dev/null. Aborting.");
625 		fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf));
626 		return -1;
627 	}
628 
629 	if (!td->o.use_thread) {
630 		SPDK_ERRLOG("must set thread=1 when using spdk plugin\n");
631 		return -1;
632 	}
633 
634 	if (spdk_fio_init_spdk_env(td) != 0) {
635 		return -1;
636 	}
637 
638 	ctx.u.sa.td = td;
639 	spdk_fio_sync_run_oat(spdk_fio_setup_oat, &ctx);
640 	return ctx.ret;
641 }
642 
643 static int
644 _spdk_fio_add_file(void *ctx, struct spdk_bdev *bdev)
645 {
646 	struct thread_data *td = ctx;
647 
648 	add_file(td, spdk_bdev_get_name(bdev), 0, 1);
649 	return 0;
650 }
651 
652 static void
653 spdk_fio_setup_oat(void *_ctx)
654 {
655 	struct spdk_fio_oat_ctx *ctx = _ctx;
656 	struct thread_data *td = ctx->u.sa.td;
657 	unsigned int i;
658 	struct fio_file *f;
659 
660 	if (td->o.nr_files == 1 && strcmp(td->files[0]->file_name, "*") == 0) {
661 		/* add all available bdevs as fio targets */
662 		spdk_for_each_bdev_leaf(td, _spdk_fio_add_file);
663 	}
664 
665 	for_each_file(td, f, i) {
666 		struct spdk_bdev *bdev;
667 
668 		if (strcmp(f->file_name, "*") == 0) {
669 			/* Explicitly set file size to 0 here to make sure fio doesn't try to
670 			 * actually send I/O to this "*" file.
671 			 */
672 			f->real_file_size = 0;
673 			continue;
674 		}
675 
676 		bdev = spdk_bdev_get_by_name(f->file_name);
677 		if (!bdev) {
678 			SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name);
679 			ctx->ret = -1;
680 			goto out;
681 		}
682 
683 		f->real_file_size = spdk_bdev_get_num_blocks(bdev) *
684 				    spdk_bdev_get_block_size(bdev);
685 		f->filetype = FIO_TYPE_BLOCK;
686 		fio_file_set_size_known(f);
687 
688 		ctx->ret = spdk_fio_handle_options(td, f, bdev);
689 		if (ctx->ret) {
690 			goto out;
691 		}
692 	}
693 
694 	ctx->ret = 0;
695 out:
696 	spdk_fio_wake_oat_waiter(ctx);
697 }
698 
699 static void
700 fio_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
701 		  void *event_ctx)
702 {
703 	SPDK_WARNLOG("Unsupported bdev event: type %d\n", type);
704 }
705 
706 static void
707 spdk_fio_bdev_open(void *arg)
708 {
709 	struct thread_data *td = arg;
710 	struct spdk_fio_thread *fio_thread;
711 	unsigned int i;
712 	struct fio_file *f;
713 	int rc;
714 
715 	fio_thread = td->io_ops_data;
716 
717 	for_each_file(td, f, i) {
718 		struct spdk_fio_target *target;
719 
720 		if (strcmp(f->file_name, "*") == 0) {
721 			continue;
722 		}
723 
724 		target = calloc(1, sizeof(*target));
725 		if (!target) {
726 			SPDK_ERRLOG("Unable to allocate memory for I/O target.\n");
727 			fio_thread->failed = true;
728 			return;
729 		}
730 
731 		rc = spdk_bdev_open_ext(f->file_name, true, fio_bdev_event_cb, NULL,
732 					&target->desc);
733 		if (rc) {
734 			SPDK_ERRLOG("Unable to open bdev %s\n", f->file_name);
735 			free(target);
736 			fio_thread->failed = true;
737 			return;
738 		}
739 
740 		target->bdev = spdk_bdev_desc_get_bdev(target->desc);
741 
742 		target->ch = spdk_bdev_get_io_channel(target->desc);
743 		if (!target->ch) {
744 			SPDK_ERRLOG("Unable to get I/O channel for bdev.\n");
745 			spdk_bdev_close(target->desc);
746 			free(target);
747 			fio_thread->failed = true;
748 			return;
749 		}
750 
751 		f->engine_data = target;
752 
753 		rc = spdk_fio_handle_options_per_target(td, f);
754 		if (rc) {
755 			SPDK_ERRLOG("Failed to handle options for: %s\n", f->file_name);
756 			f->engine_data = NULL;
757 			spdk_put_io_channel(target->ch);
758 			spdk_bdev_close(target->desc);
759 			free(target);
760 			fio_thread->failed = true;
761 			return;
762 		}
763 
764 		TAILQ_INSERT_TAIL(&fio_thread->targets, target, link);
765 	}
766 }
767 
768 /* Called for each thread, on that thread, shortly after the thread
769  * starts.
770  *
771  * Also called by spdk_fio_report_zones(), since we need an I/O channel
772  * in order to get the zone report. (fio calls the .report_zones callback
773  * before it calls the .init callback.)
774  * Therefore, if fio was run with --zonemode=zbd, the thread will already
775  * be initialized by the time that fio calls the .init callback.
776  */
777 static int
778 spdk_fio_init(struct thread_data *td)
779 {
780 	struct spdk_fio_thread *fio_thread;
781 	int rc;
782 
783 	if (spdk_fio_init_spdk_env(td) != 0) {
784 		return -1;
785 	}
786 
787 	/* If thread has already been initialized, do nothing. */
788 	if (td->io_ops_data) {
789 		return 0;
790 	}
791 
792 	rc = spdk_fio_init_thread(td);
793 	if (rc) {
794 		return rc;
795 	}
796 
797 	fio_thread = td->io_ops_data;
798 	assert(fio_thread);
799 	fio_thread->failed = false;
800 
801 	spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_open, td);
802 
803 	while (spdk_fio_poll_thread(fio_thread) > 0) {}
804 
805 	if (fio_thread->failed) {
806 		return -1;
807 	}
808 
809 	return 0;
810 }
811 
812 static void
813 spdk_fio_cleanup(struct thread_data *td)
814 {
815 	struct spdk_fio_thread *fio_thread = td->io_ops_data;
816 
817 	spdk_fio_cleanup_thread(fio_thread);
818 	td->io_ops_data = NULL;
819 }
820 
821 static int
822 spdk_fio_open(struct thread_data *td, struct fio_file *f)
823 {
824 
825 	return 0;
826 }
827 
828 static int
829 spdk_fio_close(struct thread_data *td, struct fio_file *f)
830 {
831 	return 0;
832 }
833 
834 static int
835 spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem)
836 {
837 	struct spdk_fio_thread	*fio_thread = td->io_ops_data;
838 	struct spdk_fio_target	*fio_target;
839 	int32_t numa_id = SPDK_ENV_NUMA_ID_ANY, tmp_numa_id;
840 
841 	/* If all bdevs used by this fio_thread have the same numa socket
842 	 * id, allocate from that socket. If they come from different numa
843 	 * sockets, then don't try to optimize and just use NUMA_ID_ANY.
844 	 */
845 	TAILQ_FOREACH(fio_target, &fio_thread->targets, link) {
846 		tmp_numa_id = spdk_bdev_get_numa_id(fio_target->bdev);
847 		if (numa_id == SPDK_ENV_NUMA_ID_ANY) {
848 			numa_id = tmp_numa_id;
849 		} else if (tmp_numa_id != numa_id &&
850 			   tmp_numa_id != SPDK_ENV_NUMA_ID_ANY) {
851 			numa_id = SPDK_ENV_NUMA_ID_ANY;
852 			break;
853 		}
854 	}
855 
856 	td->orig_buffer = spdk_dma_zmalloc_socket(total_mem, 0x1000, NULL, numa_id);
857 	return td->orig_buffer == NULL;
858 }
859 
860 static void
861 spdk_fio_iomem_free(struct thread_data *td)
862 {
863 	spdk_dma_free(td->orig_buffer);
864 }
865 
866 static int
867 spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
868 {
869 	struct spdk_fio_request	*fio_req;
870 
871 	io_u->engine_data = NULL;
872 
873 	fio_req = calloc(1, sizeof(*fio_req));
874 	if (fio_req == NULL) {
875 		return 1;
876 	}
877 	fio_req->io = io_u;
878 	fio_req->td = td;
879 
880 	io_u->engine_data = fio_req;
881 
882 	return 0;
883 }
884 
885 static void
886 spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
887 {
888 	struct spdk_fio_request *fio_req = io_u->engine_data;
889 
890 	if (fio_req) {
891 		assert(fio_req->io == io_u);
892 		free(fio_req);
893 		io_u->engine_data = NULL;
894 	}
895 }
896 
897 static void
898 spdk_fio_completion_cb(struct spdk_bdev_io *bdev_io,
899 		       bool success,
900 		       void *cb_arg)
901 {
902 	struct spdk_fio_request		*fio_req = cb_arg;
903 	struct thread_data		*td = fio_req->td;
904 	struct spdk_fio_thread		*fio_thread = td->io_ops_data;
905 
906 	assert(fio_thread->iocq_count < fio_thread->iocq_size);
907 	fio_req->io->error = success ? 0 : EIO;
908 	fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io;
909 
910 	spdk_bdev_free_io(bdev_io);
911 }
912 
913 #if FIO_IOOPS_VERSION >= 24
914 typedef enum fio_q_status fio_q_status_t;
915 #else
916 typedef int fio_q_status_t;
917 #endif
918 
919 static uint64_t
920 spdk_fio_zone_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *zone_start,
921 			      uint64_t num_bytes, uint64_t *num_blocks)
922 {
923 	uint32_t block_size = spdk_bdev_get_block_size(bdev);
924 	*zone_start = spdk_bdev_get_zone_id(bdev, offset_bytes / block_size);
925 	*num_blocks = num_bytes / block_size;
926 	return (offset_bytes % block_size) | (num_bytes % block_size);
927 }
928 
929 static fio_q_status_t
930 spdk_fio_queue(struct thread_data *td, struct io_u *io_u)
931 {
932 	int rc = 1;
933 	struct spdk_fio_request	*fio_req = io_u->engine_data;
934 	struct spdk_fio_target *target = io_u->file->engine_data;
935 
936 	assert(fio_req->td == td);
937 
938 	if (!target) {
939 		SPDK_ERRLOG("Unable to look up correct I/O target.\n");
940 		fio_req->io->error = ENODEV;
941 		return FIO_Q_COMPLETED;
942 	}
943 
944 	switch (io_u->ddir) {
945 	case DDIR_READ:
946 		rc = spdk_bdev_read(target->desc, target->ch,
947 				    io_u->buf, io_u->offset, io_u->xfer_buflen,
948 				    spdk_fio_completion_cb, fio_req);
949 		break;
950 	case DDIR_WRITE:
951 		if (!target->zone_append_enabled) {
952 			rc = spdk_bdev_write(target->desc, target->ch,
953 					     io_u->buf, io_u->offset, io_u->xfer_buflen,
954 					     spdk_fio_completion_cb, fio_req);
955 		} else {
956 			uint64_t zone_start, num_blocks;
957 			if (spdk_fio_zone_bytes_to_blocks(target->bdev, io_u->offset, &zone_start,
958 							  io_u->xfer_buflen, &num_blocks) != 0) {
959 				rc = -EINVAL;
960 				break;
961 			}
962 			rc = spdk_bdev_zone_append(target->desc, target->ch, io_u->buf,
963 						   zone_start, num_blocks, spdk_fio_completion_cb,
964 						   fio_req);
965 		}
966 		break;
967 	case DDIR_TRIM:
968 		rc = spdk_bdev_unmap(target->desc, target->ch,
969 				     io_u->offset, io_u->xfer_buflen,
970 				     spdk_fio_completion_cb, fio_req);
971 		break;
972 	case DDIR_SYNC:
973 		rc = spdk_bdev_flush(target->desc, target->ch,
974 				     io_u->offset, io_u->xfer_buflen,
975 				     spdk_fio_completion_cb, fio_req);
976 		break;
977 	default:
978 		assert(false);
979 		break;
980 	}
981 
982 	if (rc == -ENOMEM) {
983 		return FIO_Q_BUSY;
984 	}
985 
986 	if (rc != 0) {
987 		fio_req->io->error = abs(rc);
988 		return FIO_Q_COMPLETED;
989 	}
990 
991 	return FIO_Q_QUEUED;
992 }
993 
994 static struct io_u *
995 spdk_fio_event(struct thread_data *td, int event)
996 {
997 	struct spdk_fio_thread *fio_thread = td->io_ops_data;
998 
999 	assert(event >= 0);
1000 	assert((unsigned)event < fio_thread->iocq_count);
1001 	return fio_thread->iocq[event];
1002 }
1003 
1004 static size_t
1005 spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread)
1006 {
1007 	return spdk_thread_poll(fio_thread->thread, 0, 0);
1008 }
1009 
1010 static int
1011 spdk_fio_getevents(struct thread_data *td, unsigned int min,
1012 		   unsigned int max, const struct timespec *t)
1013 {
1014 	struct spdk_fio_thread *fio_thread = td->io_ops_data;
1015 	struct timespec t0, t1;
1016 	uint64_t timeout = 0;
1017 
1018 	if (t) {
1019 		timeout = t->tv_sec * SPDK_SEC_TO_NSEC + t->tv_nsec;
1020 		clock_gettime(CLOCK_MONOTONIC_RAW, &t0);
1021 	}
1022 
1023 	fio_thread->iocq_count = 0;
1024 
1025 	for (;;) {
1026 		spdk_fio_poll_thread(fio_thread);
1027 
1028 		if (fio_thread->iocq_count >= min) {
1029 			return fio_thread->iocq_count;
1030 		}
1031 
1032 		if (t) {
1033 			clock_gettime(CLOCK_MONOTONIC_RAW, &t1);
1034 			uint64_t elapse = ((t1.tv_sec - t0.tv_sec) * SPDK_SEC_TO_NSEC)
1035 					  + t1.tv_nsec - t0.tv_nsec;
1036 			if (elapse > timeout) {
1037 				break;
1038 			}
1039 		}
1040 	}
1041 
1042 	return fio_thread->iocq_count;
1043 }
1044 
1045 static int
1046 spdk_fio_invalidate(struct thread_data *td, struct fio_file *f)
1047 {
1048 	/* TODO: This should probably send a flush to the device, but for now just return successful. */
1049 	return 0;
1050 }
1051 
1052 #if FIO_HAS_ZBD
1053 /* Runs on app thread (oat) */
1054 static void
1055 spdk_fio_get_zoned_model_oat(void *arg)
1056 {
1057 	struct spdk_fio_oat_ctx *ctx = arg;
1058 	struct fio_file *f = ctx->u.zma.f;
1059 	enum zbd_zoned_model *model = ctx->u.zma.model;
1060 	struct spdk_bdev *bdev;
1061 
1062 	if (f->filetype != FIO_TYPE_BLOCK) {
1063 		SPDK_ERRLOG("Unsupported filetype: %d\n", f->filetype);
1064 		ctx->ret = -EINVAL;
1065 		goto out;
1066 	}
1067 
1068 	bdev = spdk_bdev_get_by_name(f->file_name);
1069 	if (!bdev) {
1070 		SPDK_ERRLOG("Cannot get zoned model, no bdev with name: %s\n", f->file_name);
1071 		ctx->ret = -ENODEV;
1072 		goto out;
1073 	}
1074 
1075 	if (spdk_bdev_is_zoned(bdev)) {
1076 		*model = ZBD_HOST_MANAGED;
1077 	} else {
1078 		*model = ZBD_NONE;
1079 	}
1080 
1081 	ctx->ret = 0;
1082 out:
1083 	spdk_fio_wake_oat_waiter(ctx);
1084 }
1085 
1086 static int
1087 spdk_fio_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model)
1088 {
1089 	struct spdk_fio_oat_ctx ctx = { 0 };
1090 
1091 	ctx.u.zma.f = f;
1092 	ctx.u.zma.model = model;
1093 
1094 	spdk_fio_sync_run_oat(spdk_fio_get_zoned_model_oat, &ctx);
1095 
1096 	return ctx.ret;
1097 }
1098 
1099 
1100 static void
1101 spdk_fio_bdev_get_zone_info_done(struct spdk_bdev_io *bdev_io, bool success, void *arg)
1102 {
1103 	struct spdk_fio_zone_cb_arg *cb_arg = arg;
1104 	unsigned int i;
1105 	int handled_zones = 0;
1106 
1107 	if (!success) {
1108 		spdk_bdev_free_io(bdev_io);
1109 		cb_arg->completed = -EIO;
1110 		return;
1111 	}
1112 
1113 	for (i = 0; i < cb_arg->nr_zones; i++) {
1114 		struct spdk_bdev_zone_info *zone_src = &cb_arg->spdk_zones[handled_zones];
1115 		struct zbd_zone *zone_dest = &cb_arg->fio_zones[handled_zones];
1116 		uint32_t block_size = spdk_bdev_get_block_size(cb_arg->target->bdev);
1117 
1118 		switch (zone_src->type) {
1119 		case SPDK_BDEV_ZONE_TYPE_SEQWR:
1120 			zone_dest->type = ZBD_ZONE_TYPE_SWR;
1121 			break;
1122 		case SPDK_BDEV_ZONE_TYPE_SEQWP:
1123 			zone_dest->type = ZBD_ZONE_TYPE_SWP;
1124 			break;
1125 		case SPDK_BDEV_ZONE_TYPE_CNV:
1126 			zone_dest->type = ZBD_ZONE_TYPE_CNV;
1127 			break;
1128 		default:
1129 			spdk_bdev_free_io(bdev_io);
1130 			cb_arg->completed = -EIO;
1131 			return;
1132 		}
1133 
1134 		zone_dest->len = spdk_bdev_get_zone_size(cb_arg->target->bdev) * block_size;
1135 		zone_dest->capacity = zone_src->capacity * block_size;
1136 		zone_dest->start = zone_src->zone_id * block_size;
1137 		zone_dest->wp = zone_src->write_pointer * block_size;
1138 
1139 		switch (zone_src->state) {
1140 		case SPDK_BDEV_ZONE_STATE_EMPTY:
1141 			zone_dest->cond = ZBD_ZONE_COND_EMPTY;
1142 			break;
1143 		case SPDK_BDEV_ZONE_STATE_IMP_OPEN:
1144 			zone_dest->cond = ZBD_ZONE_COND_IMP_OPEN;
1145 			break;
1146 		case SPDK_BDEV_ZONE_STATE_EXP_OPEN:
1147 			zone_dest->cond = ZBD_ZONE_COND_EXP_OPEN;
1148 			break;
1149 		case SPDK_BDEV_ZONE_STATE_FULL:
1150 			zone_dest->cond = ZBD_ZONE_COND_FULL;
1151 			break;
1152 		case SPDK_BDEV_ZONE_STATE_CLOSED:
1153 			zone_dest->cond = ZBD_ZONE_COND_CLOSED;
1154 			break;
1155 		case SPDK_BDEV_ZONE_STATE_READ_ONLY:
1156 			zone_dest->cond = ZBD_ZONE_COND_READONLY;
1157 			break;
1158 		case SPDK_BDEV_ZONE_STATE_OFFLINE:
1159 			zone_dest->cond = ZBD_ZONE_COND_OFFLINE;
1160 			break;
1161 		case SPDK_BDEV_ZONE_STATE_NOT_WP:
1162 			zone_dest->cond = ZBD_ZONE_COND_NOT_WP;
1163 			/* Set WP to end of zone for zone types w/o WP (e.g. Conv. zones in SMR) */
1164 			zone_dest->wp = zone_dest->start + zone_dest->capacity;
1165 			break;
1166 		default:
1167 			spdk_bdev_free_io(bdev_io);
1168 			cb_arg->completed = -EIO;
1169 			return;
1170 		}
1171 		handled_zones++;
1172 	}
1173 
1174 	spdk_bdev_free_io(bdev_io);
1175 	cb_arg->completed = handled_zones;
1176 }
1177 
1178 static void
1179 spdk_fio_bdev_get_zone_info(void *arg)
1180 {
1181 	struct spdk_fio_zone_cb_arg *cb_arg = arg;
1182 	struct spdk_fio_target *target = cb_arg->target;
1183 	int rc;
1184 
1185 	rc = spdk_bdev_get_zone_info(target->desc, target->ch, cb_arg->offset_blocks,
1186 				     cb_arg->nr_zones, cb_arg->spdk_zones,
1187 				     spdk_fio_bdev_get_zone_info_done, cb_arg);
1188 	if (rc < 0) {
1189 		cb_arg->completed = rc;
1190 	}
1191 }
1192 
1193 static int
1194 spdk_fio_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
1195 		      struct zbd_zone *zones, unsigned int nr_zones)
1196 {
1197 	struct spdk_fio_target *target;
1198 	struct spdk_fio_thread *fio_thread;
1199 	struct spdk_fio_zone_cb_arg cb_arg;
1200 	uint32_t block_size;
1201 	int rc;
1202 
1203 	if (nr_zones == 0) {
1204 		return 0;
1205 	}
1206 
1207 	/* spdk_fio_report_zones() is only called before the bdev I/O channels have been created.
1208 	 * Since we need an I/O channel for report_zones(), call spdk_fio_init() to initialize
1209 	 * the thread early.
1210 	 * spdk_fio_report_zones() might be called several times by fio, if e.g. the zone report
1211 	 * for all zones does not fit in the buffer that fio has allocated for the zone report.
1212 	 * It is safe to call spdk_fio_init(), even if the thread has already been initialized.
1213 	 */
1214 	rc = spdk_fio_init(td);
1215 	if (rc) {
1216 		return rc;
1217 	}
1218 	fio_thread = td->io_ops_data;
1219 	target = f->engine_data;
1220 
1221 	assert(fio_thread);
1222 	assert(target);
1223 
1224 	block_size = spdk_bdev_get_block_size(target->bdev);
1225 
1226 	cb_arg.target = target;
1227 	cb_arg.completed = 0;
1228 	cb_arg.offset_blocks = offset / block_size;
1229 	cb_arg.fio_zones = zones;
1230 	cb_arg.nr_zones = spdk_min(nr_zones, spdk_bdev_get_num_zones(target->bdev));
1231 
1232 	cb_arg.spdk_zones = calloc(1, sizeof(*cb_arg.spdk_zones) * cb_arg.nr_zones);
1233 	if (!cb_arg.spdk_zones) {
1234 		SPDK_ERRLOG("Could not allocate memory for zone report!\n");
1235 		rc = -ENOMEM;
1236 		goto cleanup_thread;
1237 	}
1238 
1239 	spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_get_zone_info, &cb_arg);
1240 	do {
1241 		spdk_fio_poll_thread(fio_thread);
1242 	} while (!cb_arg.completed);
1243 
1244 	/* Free cb_arg.spdk_zones. The report in fio format is stored in cb_arg.fio_zones/zones. */
1245 	free(cb_arg.spdk_zones);
1246 
1247 	rc = cb_arg.completed;
1248 	if (rc < 0) {
1249 		SPDK_ERRLOG("Failed to get zone info: %d\n", rc);
1250 		goto cleanup_thread;
1251 	}
1252 
1253 	/* Return the amount of zones successfully copied. */
1254 	return rc;
1255 
1256 cleanup_thread:
1257 	spdk_fio_cleanup(td);
1258 
1259 	return rc;
1260 }
1261 
1262 static void
1263 spdk_fio_bdev_zone_reset_done(struct spdk_bdev_io *bdev_io, bool success, void *arg)
1264 {
1265 	struct spdk_fio_zone_cb_arg *cb_arg = arg;
1266 
1267 	spdk_bdev_free_io(bdev_io);
1268 
1269 	if (!success) {
1270 		cb_arg->completed = -EIO;
1271 	} else {
1272 		cb_arg->completed = 1;
1273 	}
1274 }
1275 
1276 static void
1277 spdk_fio_bdev_zone_reset(void *arg)
1278 {
1279 	struct spdk_fio_zone_cb_arg *cb_arg = arg;
1280 	struct spdk_fio_target *target = cb_arg->target;
1281 	int rc;
1282 
1283 	rc = spdk_bdev_zone_management(target->desc, target->ch, cb_arg->offset_blocks,
1284 				       SPDK_BDEV_ZONE_RESET,
1285 				       spdk_fio_bdev_zone_reset_done, cb_arg);
1286 	if (rc < 0) {
1287 		cb_arg->completed = rc;
1288 	}
1289 }
1290 
1291 static int
1292 spdk_fio_reset_zones(struct spdk_fio_thread *fio_thread, struct spdk_fio_target *target,
1293 		     uint64_t offset, uint64_t length)
1294 {
1295 	uint64_t zone_size_bytes;
1296 	uint32_t block_size;
1297 	int rc;
1298 
1299 	assert(fio_thread);
1300 	assert(target);
1301 
1302 	block_size = spdk_bdev_get_block_size(target->bdev);
1303 	zone_size_bytes = spdk_bdev_get_zone_size(target->bdev) * block_size;
1304 
1305 	for (uint64_t cur = offset; cur < offset + length; cur += zone_size_bytes) {
1306 		struct spdk_fio_zone_cb_arg cb_arg = {
1307 			.target = target,
1308 			.completed = 0,
1309 			.offset_blocks = cur / block_size,
1310 		};
1311 
1312 		spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_zone_reset, &cb_arg);
1313 		do {
1314 			spdk_fio_poll_thread(fio_thread);
1315 		} while (!cb_arg.completed);
1316 
1317 		rc = cb_arg.completed;
1318 		if (rc < 0) {
1319 			SPDK_ERRLOG("Failed to reset zone: %d\n", rc);
1320 			return rc;
1321 		}
1322 	}
1323 
1324 	return 0;
1325 }
1326 
1327 static int
1328 spdk_fio_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset, uint64_t length)
1329 {
1330 	return spdk_fio_reset_zones(td->io_ops_data, f->engine_data, offset, length);
1331 }
1332 #endif
1333 
1334 #if FIO_IOOPS_VERSION >= 30
1335 static void
1336 spdk_fio_get_max_open_zones_oat(void *_ctx)
1337 {
1338 	struct spdk_fio_oat_ctx *ctx = _ctx;
1339 	struct fio_file *f = ctx->u.moza.f;
1340 	struct spdk_bdev *bdev;
1341 
1342 	bdev = spdk_bdev_get_by_name(f->file_name);
1343 	if (!bdev) {
1344 		SPDK_ERRLOG("Cannot get max open zones, no bdev with name: %s\n", f->file_name);
1345 		ctx->ret = -ENODEV;
1346 	} else {
1347 		*ctx->u.moza.max_open_zones = spdk_bdev_get_max_open_zones(bdev);
1348 		ctx->ret = 0;
1349 	}
1350 
1351 	spdk_fio_wake_oat_waiter(ctx);
1352 }
1353 
1354 static int
1355 spdk_fio_get_max_open_zones(struct thread_data *td, struct fio_file *f,
1356 			    unsigned int *max_open_zones)
1357 {
1358 	struct spdk_fio_oat_ctx ctx = { 0 };
1359 
1360 	ctx.u.moza.f = f;
1361 	ctx.u.moza.max_open_zones = max_open_zones;
1362 
1363 	spdk_fio_sync_run_oat(spdk_fio_get_max_open_zones_oat, &ctx);
1364 
1365 	return ctx.ret;
1366 }
1367 #endif
1368 
1369 static int
1370 spdk_fio_handle_options(struct thread_data *td, struct fio_file *f, struct spdk_bdev *bdev)
1371 {
1372 	struct spdk_fio_options *fio_options = td->eo;
1373 
1374 	if (fio_options->initial_zone_reset && spdk_bdev_is_zoned(bdev)) {
1375 #if FIO_HAS_ZBD
1376 		int rc = spdk_fio_init(td);
1377 		if (rc) {
1378 			return rc;
1379 		}
1380 		/* offset used to indicate conventional zones that need to be skipped (reset not allowed) */
1381 		rc = spdk_fio_reset_zones(td->io_ops_data, f->engine_data, td->o.start_offset,
1382 					  f->real_file_size - td->o.start_offset);
1383 		if (rc) {
1384 			spdk_fio_cleanup(td);
1385 			return rc;
1386 		}
1387 #else
1388 		SPDK_ERRLOG("fio version is too old to support zoned block devices\n");
1389 #endif
1390 	}
1391 
1392 	return 0;
1393 }
1394 
1395 static int
1396 spdk_fio_handle_options_per_target(struct thread_data *td, struct fio_file *f)
1397 {
1398 	struct spdk_fio_target *target = f->engine_data;
1399 	struct spdk_fio_options *fio_options = td->eo;
1400 
1401 	if (fio_options->zone_append && spdk_bdev_is_zoned(target->bdev)) {
1402 		if (spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
1403 			SPDK_DEBUGLOG(fio_bdev, "Using zone appends instead of writes on: '%s'\n",
1404 				      f->file_name);
1405 			target->zone_append_enabled = true;
1406 		} else {
1407 			SPDK_WARNLOG("Falling back to writes on: '%s' - bdev lacks zone append cmd\n",
1408 				     f->file_name);
1409 		}
1410 	}
1411 
1412 	return 0;
1413 }
1414 
1415 static struct fio_option options[] = {
1416 	{
1417 		.name		= "spdk_conf",
1418 		.lname		= "SPDK configuration file",
1419 		.type		= FIO_OPT_STR_STORE,
1420 		.off1		= offsetof(struct spdk_fio_options, conf),
1421 		.help		= "A SPDK JSON configuration file",
1422 		.category	= FIO_OPT_C_ENGINE,
1423 		.group		= FIO_OPT_G_INVALID,
1424 	},
1425 	{
1426 		.name           = "spdk_json_conf",
1427 		.lname          = "SPDK JSON configuration file",
1428 		.type           = FIO_OPT_STR_STORE,
1429 		.off1           = offsetof(struct spdk_fio_options, json_conf),
1430 		.help           = "A SPDK JSON configuration file",
1431 		.category       = FIO_OPT_C_ENGINE,
1432 		.group          = FIO_OPT_G_INVALID,
1433 	},
1434 	{
1435 		.name		= "spdk_mem",
1436 		.lname		= "SPDK memory in MB",
1437 		.type		= FIO_OPT_INT,
1438 		.off1		= offsetof(struct spdk_fio_options, mem_mb),
1439 		.help		= "Amount of memory in MB to allocate for SPDK",
1440 		.category	= FIO_OPT_C_ENGINE,
1441 		.group		= FIO_OPT_G_INVALID,
1442 	},
1443 	{
1444 		.name		= "spdk_single_seg",
1445 		.lname		= "SPDK switch to create just a single hugetlbfs file",
1446 		.type		= FIO_OPT_BOOL,
1447 		.off1		= offsetof(struct spdk_fio_options, mem_single_seg),
1448 		.help		= "If set to 1, SPDK will use just a single hugetlbfs file",
1449 		.def            = "0",
1450 		.category	= FIO_OPT_C_ENGINE,
1451 		.group		= FIO_OPT_G_INVALID,
1452 	},
1453 	{
1454 		.name           = "log_flags",
1455 		.lname          = "log flags",
1456 		.type           = FIO_OPT_STR_STORE,
1457 		.off1           = offsetof(struct spdk_fio_options, log_flags),
1458 		.help           = "SPDK log flags to enable",
1459 		.category       = FIO_OPT_C_ENGINE,
1460 		.group          = FIO_OPT_G_INVALID,
1461 	},
1462 	{
1463 		.name		= "initial_zone_reset",
1464 		.lname		= "Reset Zones on initialization",
1465 		.type		= FIO_OPT_INT,
1466 		.off1		= offsetof(struct spdk_fio_options, initial_zone_reset),
1467 		.def		= "0",
1468 		.help		= "Reset Zones on initialization (0=disable, 1=Reset All Zones)",
1469 		.category	= FIO_OPT_C_ENGINE,
1470 		.group		= FIO_OPT_G_INVALID,
1471 	},
1472 	{
1473 		.name		= "zone_append",
1474 		.lname		= "Use zone append instead of write",
1475 		.type		= FIO_OPT_INT,
1476 		.off1		= offsetof(struct spdk_fio_options, zone_append),
1477 		.def		= "0",
1478 		.help		= "Use zone append instead of write (1=zone append, 0=write)",
1479 		.category	= FIO_OPT_C_ENGINE,
1480 		.group		= FIO_OPT_G_INVALID,
1481 	},
1482 	{
1483 		.name           = "env_context",
1484 		.lname          = "Environment context options",
1485 		.type           = FIO_OPT_STR_STORE,
1486 		.off1           = offsetof(struct spdk_fio_options, env_context),
1487 		.help           = "Opaque context for use of the env implementation",
1488 		.category       = FIO_OPT_C_ENGINE,
1489 		.group          = FIO_OPT_G_INVALID,
1490 	},
1491 	{
1492 		.name		= "spdk_rpc_listen_addr",
1493 		.lname		= "SPDK RPC listen address",
1494 		.type		= FIO_OPT_STR_STORE,
1495 		.off1		= offsetof(struct spdk_fio_options, rpc_listen_addr),
1496 		.help		= "The address to listen the RPC operations",
1497 		.category	= FIO_OPT_C_ENGINE,
1498 		.group		= FIO_OPT_G_INVALID,
1499 	},
1500 	{
1501 		.name		= NULL,
1502 	},
1503 };
1504 
1505 /* FIO imports this structure using dlsym */
1506 struct ioengine_ops ioengine = {
1507 	.name			= "spdk_bdev",
1508 	.version		= FIO_IOOPS_VERSION,
1509 	.flags			= FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO,
1510 	.setup			= spdk_fio_setup,
1511 	.init			= spdk_fio_init,
1512 	/* .prep		= unused, */
1513 	.queue			= spdk_fio_queue,
1514 	/* .commit		= unused, */
1515 	.getevents		= spdk_fio_getevents,
1516 	.event			= spdk_fio_event,
1517 	/* .errdetails		= unused, */
1518 	/* .cancel		= unused, */
1519 	.cleanup		= spdk_fio_cleanup,
1520 	.open_file		= spdk_fio_open,
1521 	.close_file		= spdk_fio_close,
1522 	.invalidate		= spdk_fio_invalidate,
1523 	/* .unlink_file		= unused, */
1524 	/* .get_file_size	= unused, */
1525 	/* .terminate		= unused, */
1526 	.iomem_alloc		= spdk_fio_iomem_alloc,
1527 	.iomem_free		= spdk_fio_iomem_free,
1528 	.io_u_init		= spdk_fio_io_u_init,
1529 	.io_u_free		= spdk_fio_io_u_free,
1530 #if FIO_HAS_ZBD
1531 	.get_zoned_model	= spdk_fio_get_zoned_model,
1532 	.report_zones		= spdk_fio_report_zones,
1533 	.reset_wp		= spdk_fio_reset_wp,
1534 #endif
1535 #if FIO_IOOPS_VERSION >= 30
1536 	.get_max_open_zones	= spdk_fio_get_max_open_zones,
1537 #endif
1538 	.option_struct_size	= sizeof(struct spdk_fio_options),
1539 	.options		= options,
1540 };
1541 
1542 static void fio_init
1543 spdk_fio_register(void)
1544 {
1545 	register_ioengine(&ioengine);
1546 }
1547 
1548 static void
1549 spdk_fio_finish_env(void)
1550 {
1551 	pthread_mutex_lock(&g_init_mtx);
1552 	g_poll_loop = false;
1553 	pthread_cond_signal(&g_init_cond);
1554 	pthread_mutex_unlock(&g_init_mtx);
1555 	pthread_join(g_init_thread_id, NULL);
1556 
1557 	spdk_thread_lib_fini();
1558 	spdk_env_fini();
1559 }
1560 
1561 static void fio_exit
1562 spdk_fio_unregister(void)
1563 {
1564 	if (g_spdk_env_initialized) {
1565 		spdk_fio_finish_env();
1566 		g_spdk_env_initialized = false;
1567 	}
1568 	unregister_ioengine(&ioengine);
1569 }
1570 
1571 SPDK_LOG_REGISTER_COMPONENT(fio_bdev)
1572