197b01d09SBen Walker /* SPDX-License-Identifier: BSD-3-Clause 297b01d09SBen Walker * Copyright (C) 2017 Intel Corporation. 397b01d09SBen Walker * All rights reserved. 497b01d09SBen Walker * Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 597b01d09SBen Walker */ 697b01d09SBen Walker 797b01d09SBen Walker #include "spdk/stdinc.h" 897b01d09SBen Walker 997b01d09SBen Walker #include "spdk/bdev.h" 1097b01d09SBen Walker #include "spdk/bdev_zone.h" 1197b01d09SBen Walker #include "spdk/accel.h" 1297b01d09SBen Walker #include "spdk/env.h" 134586880fSJim Harris #include "spdk/file.h" 1497b01d09SBen Walker #include "spdk/init.h" 1597b01d09SBen Walker #include "spdk/thread.h" 1697b01d09SBen Walker #include "spdk/log.h" 1797b01d09SBen Walker #include "spdk/string.h" 1897b01d09SBen Walker #include "spdk/queue.h" 1997b01d09SBen Walker #include "spdk/util.h" 2097b01d09SBen Walker #include "spdk/rpc.h" 2197b01d09SBen Walker 2297b01d09SBen Walker #include "spdk_internal/event.h" 2397b01d09SBen Walker 2497b01d09SBen Walker #include "config-host.h" 2597b01d09SBen Walker #include "fio.h" 2697b01d09SBen Walker #include "optgroup.h" 2797b01d09SBen Walker 2897b01d09SBen Walker #ifdef for_each_rw_ddir 2997b01d09SBen Walker #define FIO_HAS_ZBD (FIO_IOOPS_VERSION >= 26) 3097b01d09SBen Walker #else 3197b01d09SBen Walker #define FIO_HAS_ZBD (0) 3297b01d09SBen Walker #endif 3397b01d09SBen Walker 3497b01d09SBen Walker /* FreeBSD is missing CLOCK_MONOTONIC_RAW, 3597b01d09SBen Walker * so alternative is provided. */ 3697b01d09SBen Walker #ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ 3797b01d09SBen Walker #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC 3897b01d09SBen Walker #endif 3997b01d09SBen Walker 4097b01d09SBen Walker struct spdk_fio_options { 4197b01d09SBen Walker void *pad; 4297b01d09SBen Walker char *conf; 4397b01d09SBen Walker char *json_conf; 4497b01d09SBen Walker char *env_context; 4597b01d09SBen Walker char *log_flags; 4697b01d09SBen Walker unsigned mem_mb; 4797b01d09SBen Walker int mem_single_seg; 4897b01d09SBen Walker int initial_zone_reset; 4997b01d09SBen Walker int zone_append; 5097b01d09SBen Walker char *rpc_listen_addr; 5197b01d09SBen Walker }; 5297b01d09SBen Walker 5397b01d09SBen Walker struct spdk_fio_request { 5497b01d09SBen Walker struct io_u *io; 5597b01d09SBen Walker struct thread_data *td; 5697b01d09SBen Walker }; 5797b01d09SBen Walker 5897b01d09SBen Walker struct spdk_fio_target { 5997b01d09SBen Walker struct spdk_bdev *bdev; 6097b01d09SBen Walker struct spdk_bdev_desc *desc; 6197b01d09SBen Walker struct spdk_io_channel *ch; 6297b01d09SBen Walker bool zone_append_enabled; 6397b01d09SBen Walker 6497b01d09SBen Walker TAILQ_ENTRY(spdk_fio_target) link; 6597b01d09SBen Walker }; 6697b01d09SBen Walker 6797b01d09SBen Walker struct spdk_fio_thread { 6897b01d09SBen Walker struct thread_data *td; /* fio thread context */ 6997b01d09SBen Walker struct spdk_thread *thread; /* spdk thread context */ 7097b01d09SBen Walker 7197b01d09SBen Walker TAILQ_HEAD(, spdk_fio_target) targets; 7297b01d09SBen Walker bool failed; /* true if the thread failed to initialize */ 7397b01d09SBen Walker 7497b01d09SBen Walker struct io_u **iocq; /* io completion queue */ 7597b01d09SBen Walker unsigned int iocq_count; /* number of iocq entries filled by last getevents */ 7697b01d09SBen Walker unsigned int iocq_size; /* number of iocq entries allocated */ 7797b01d09SBen Walker 7897b01d09SBen Walker TAILQ_ENTRY(spdk_fio_thread) link; 7997b01d09SBen Walker }; 8097b01d09SBen Walker 8197b01d09SBen Walker struct spdk_fio_zone_cb_arg { 8297b01d09SBen Walker struct spdk_fio_target *target; 8397b01d09SBen Walker struct spdk_bdev_zone_info *spdk_zones; 8497b01d09SBen Walker int completed; 8597b01d09SBen Walker uint64_t offset_blocks; 8697b01d09SBen Walker struct zbd_zone *fio_zones; 8797b01d09SBen Walker unsigned int nr_zones; 8897b01d09SBen Walker }; 8997b01d09SBen Walker 9097b01d09SBen Walker /* On App Thread (oat) context used for making sync calls from async calls. */ 9197b01d09SBen Walker struct spdk_fio_oat_ctx { 9297b01d09SBen Walker union { 9397b01d09SBen Walker struct spdk_fio_setup_args { 9497b01d09SBen Walker struct thread_data *td; 9597b01d09SBen Walker } sa; 9697b01d09SBen Walker struct spdk_fio_bdev_get_zoned_model_args { 9797b01d09SBen Walker struct fio_file *f; 9897b01d09SBen Walker enum zbd_zoned_model *model; 9997b01d09SBen Walker } zma; 10097b01d09SBen Walker struct spdk_fio_bdev_get_max_open_zones_args { 10197b01d09SBen Walker struct fio_file *f; 10297b01d09SBen Walker unsigned int *max_open_zones; 10397b01d09SBen Walker } moza; 10497b01d09SBen Walker } u; 10597b01d09SBen Walker pthread_mutex_t mutex; 10697b01d09SBen Walker pthread_cond_t cond; 10797b01d09SBen Walker int ret; 10897b01d09SBen Walker }; 10997b01d09SBen Walker 11097b01d09SBen Walker static bool g_spdk_env_initialized = false; 11197b01d09SBen Walker static const char *g_json_config_file = NULL; 1124586880fSJim Harris static void *g_json_data; 1134586880fSJim Harris static size_t g_json_data_size; 11413481a59SKrzysztof Karas static const char *g_rpc_listen_addr = NULL; 11597b01d09SBen Walker 11697b01d09SBen Walker static int spdk_fio_init(struct thread_data *td); 11797b01d09SBen Walker static void spdk_fio_cleanup(struct thread_data *td); 11897b01d09SBen Walker static size_t spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread); 11997b01d09SBen Walker static int spdk_fio_handle_options(struct thread_data *td, struct fio_file *f, 12097b01d09SBen Walker struct spdk_bdev *bdev); 12197b01d09SBen Walker static int spdk_fio_handle_options_per_target(struct thread_data *td, struct fio_file *f); 12297b01d09SBen Walker static void spdk_fio_setup_oat(void *ctx); 12397b01d09SBen Walker 12497b01d09SBen Walker static pthread_t g_init_thread_id = 0; 12597b01d09SBen Walker static pthread_mutex_t g_init_mtx = PTHREAD_MUTEX_INITIALIZER; 12697b01d09SBen Walker static pthread_cond_t g_init_cond; 12797b01d09SBen Walker static bool g_poll_loop = true; 12897b01d09SBen Walker static TAILQ_HEAD(, spdk_fio_thread) g_threads = TAILQ_HEAD_INITIALIZER(g_threads); 12997b01d09SBen Walker 13097b01d09SBen Walker /* Default polling timeout (ns) */ 13197b01d09SBen Walker #define SPDK_FIO_POLLING_TIMEOUT 1000000000ULL 13297b01d09SBen Walker 13397b01d09SBen Walker static __thread bool g_internal_thread = false; 13497b01d09SBen Walker 13597b01d09SBen Walker /* Run msg_fn on app thread ("oat") and wait for it to call spdk_fio_wake_oat_waiter() */ 13697b01d09SBen Walker static void 13797b01d09SBen Walker spdk_fio_sync_run_oat(void (*msg_fn)(void *), struct spdk_fio_oat_ctx *ctx) 13897b01d09SBen Walker { 13997b01d09SBen Walker assert(!spdk_thread_is_app_thread(NULL)); 14097b01d09SBen Walker 14197b01d09SBen Walker pthread_mutex_init(&ctx->mutex, NULL); 14297b01d09SBen Walker pthread_cond_init(&ctx->cond, NULL); 14397b01d09SBen Walker pthread_mutex_lock(&ctx->mutex); 14497b01d09SBen Walker 14597b01d09SBen Walker spdk_thread_send_msg(spdk_thread_get_app_thread(), msg_fn, ctx); 14697b01d09SBen Walker 14797b01d09SBen Walker /* Wake up the poll loop in spdk_init_thread_poll() */ 14897b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 14997b01d09SBen Walker pthread_cond_signal(&g_init_cond); 15097b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 15197b01d09SBen Walker 15297b01d09SBen Walker /* Wait for msg_fn() to call spdk_fio_wake_oat_waiter() */ 15397b01d09SBen Walker pthread_cond_wait(&ctx->cond, &ctx->mutex); 15497b01d09SBen Walker pthread_mutex_unlock(&ctx->mutex); 15597b01d09SBen Walker 15697b01d09SBen Walker pthread_mutex_destroy(&ctx->mutex); 15797b01d09SBen Walker pthread_cond_destroy(&ctx->cond); 15897b01d09SBen Walker } 15997b01d09SBen Walker 16097b01d09SBen Walker static void 16197b01d09SBen Walker spdk_fio_wake_oat_waiter(struct spdk_fio_oat_ctx *ctx) 16297b01d09SBen Walker { 16397b01d09SBen Walker pthread_mutex_lock(&ctx->mutex); 16497b01d09SBen Walker pthread_cond_signal(&ctx->cond); 16597b01d09SBen Walker pthread_mutex_unlock(&ctx->mutex); 16697b01d09SBen Walker } 16797b01d09SBen Walker 16897b01d09SBen Walker static int 16997b01d09SBen Walker spdk_fio_schedule_thread(struct spdk_thread *thread) 17097b01d09SBen Walker { 17197b01d09SBen Walker struct spdk_fio_thread *fio_thread; 17297b01d09SBen Walker 17397b01d09SBen Walker if (g_internal_thread) { 17497b01d09SBen Walker /* Do nothing. */ 17597b01d09SBen Walker return 0; 17697b01d09SBen Walker } 17797b01d09SBen Walker 17897b01d09SBen Walker fio_thread = spdk_thread_get_ctx(thread); 17997b01d09SBen Walker 18097b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 18197b01d09SBen Walker TAILQ_INSERT_TAIL(&g_threads, fio_thread, link); 18297b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 18397b01d09SBen Walker 18497b01d09SBen Walker return 0; 18597b01d09SBen Walker } 18697b01d09SBen Walker 18797b01d09SBen Walker static int 18897b01d09SBen Walker spdk_fio_init_thread(struct thread_data *td) 18997b01d09SBen Walker { 19097b01d09SBen Walker struct spdk_fio_thread *fio_thread; 19197b01d09SBen Walker struct spdk_thread *thread; 19297b01d09SBen Walker 19397b01d09SBen Walker g_internal_thread = true; 19497b01d09SBen Walker thread = spdk_thread_create("fio_thread", NULL); 19597b01d09SBen Walker g_internal_thread = false; 19697b01d09SBen Walker if (!thread) { 19797b01d09SBen Walker SPDK_ERRLOG("failed to allocate thread\n"); 19897b01d09SBen Walker return -1; 19997b01d09SBen Walker } 20097b01d09SBen Walker 20197b01d09SBen Walker fio_thread = spdk_thread_get_ctx(thread); 20297b01d09SBen Walker fio_thread->td = td; 20397b01d09SBen Walker fio_thread->thread = thread; 20497b01d09SBen Walker td->io_ops_data = fio_thread; 20597b01d09SBen Walker 20697b01d09SBen Walker spdk_set_thread(thread); 20797b01d09SBen Walker 20897b01d09SBen Walker fio_thread->iocq_size = td->o.iodepth; 20997b01d09SBen Walker fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); 21097b01d09SBen Walker assert(fio_thread->iocq != NULL); 21197b01d09SBen Walker 21297b01d09SBen Walker TAILQ_INIT(&fio_thread->targets); 21397b01d09SBen Walker 21497b01d09SBen Walker return 0; 21597b01d09SBen Walker } 21697b01d09SBen Walker 21797b01d09SBen Walker static void 21897b01d09SBen Walker spdk_fio_bdev_close_targets(void *arg) 21997b01d09SBen Walker { 22097b01d09SBen Walker struct spdk_fio_thread *fio_thread = arg; 22197b01d09SBen Walker struct spdk_fio_target *target, *tmp; 22297b01d09SBen Walker 22397b01d09SBen Walker TAILQ_FOREACH_SAFE(target, &fio_thread->targets, link, tmp) { 22497b01d09SBen Walker TAILQ_REMOVE(&fio_thread->targets, target, link); 22597b01d09SBen Walker spdk_put_io_channel(target->ch); 22697b01d09SBen Walker spdk_bdev_close(target->desc); 22797b01d09SBen Walker free(target); 22897b01d09SBen Walker } 22997b01d09SBen Walker } 23097b01d09SBen Walker 23197b01d09SBen Walker static void 23297b01d09SBen Walker spdk_fio_cleanup_thread(struct spdk_fio_thread *fio_thread) 23397b01d09SBen Walker { 23497b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_close_targets, fio_thread); 23597b01d09SBen Walker 23697b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 23797b01d09SBen Walker TAILQ_INSERT_TAIL(&g_threads, fio_thread, link); 23897b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 23997b01d09SBen Walker } 24097b01d09SBen Walker 24197b01d09SBen Walker static void 24297b01d09SBen Walker spdk_fio_calc_timeout(struct spdk_fio_thread *fio_thread, struct timespec *ts) 24397b01d09SBen Walker { 24497b01d09SBen Walker uint64_t timeout, now; 24597b01d09SBen Walker 24697b01d09SBen Walker if (spdk_thread_has_active_pollers(fio_thread->thread)) { 24797b01d09SBen Walker return; 24897b01d09SBen Walker } 24997b01d09SBen Walker 25097b01d09SBen Walker timeout = spdk_thread_next_poller_expiration(fio_thread->thread); 25197b01d09SBen Walker now = spdk_get_ticks(); 25297b01d09SBen Walker 25397b01d09SBen Walker if (timeout == 0) { 25497b01d09SBen Walker timeout = now + (SPDK_FIO_POLLING_TIMEOUT * spdk_get_ticks_hz()) / SPDK_SEC_TO_NSEC; 25597b01d09SBen Walker } 25697b01d09SBen Walker 25797b01d09SBen Walker if (timeout > now) { 25897b01d09SBen Walker timeout = ((timeout - now) * SPDK_SEC_TO_NSEC) / spdk_get_ticks_hz() + 25997b01d09SBen Walker ts->tv_sec * SPDK_SEC_TO_NSEC + ts->tv_nsec; 26097b01d09SBen Walker 26197b01d09SBen Walker ts->tv_sec = timeout / SPDK_SEC_TO_NSEC; 26297b01d09SBen Walker ts->tv_nsec = timeout % SPDK_SEC_TO_NSEC; 26397b01d09SBen Walker } 26497b01d09SBen Walker } 26597b01d09SBen Walker 26697b01d09SBen Walker static void 26797b01d09SBen Walker spdk_fio_bdev_init_done(int rc, void *cb_arg) 26897b01d09SBen Walker { 26997b01d09SBen Walker *(bool *)cb_arg = true; 27097b01d09SBen Walker 2714586880fSJim Harris free(g_json_data); 2724586880fSJim Harris if (rc) { 2734586880fSJim Harris SPDK_ERRLOG("RUNTIME RPCs failed\n"); 2744586880fSJim Harris exit(1); 2754586880fSJim Harris } 2764586880fSJim Harris } 2774586880fSJim Harris 2784586880fSJim Harris static void 2794586880fSJim Harris spdk_fio_bdev_subsystem_init_done(int rc, void *cb_arg) 2804586880fSJim Harris { 2814586880fSJim Harris if (rc) { 2824586880fSJim Harris SPDK_ERRLOG("subsystem init failed\n"); 2834586880fSJim Harris exit(1); 2844586880fSJim Harris } 2854586880fSJim Harris 28697b01d09SBen Walker spdk_rpc_set_state(SPDK_RPC_RUNTIME); 2874586880fSJim Harris spdk_subsystem_load_config(g_json_data, g_json_data_size, 2884586880fSJim Harris spdk_fio_bdev_init_done, cb_arg, true); 2894586880fSJim Harris } 2904586880fSJim Harris 2914586880fSJim Harris static void 2924586880fSJim Harris spdk_fio_bdev_startup_done(int rc, void *cb_arg) 2934586880fSJim Harris { 2944586880fSJim Harris if (rc) { 2954586880fSJim Harris SPDK_ERRLOG("STARTUP RPCs failed\n"); 2964586880fSJim Harris exit(1); 2974586880fSJim Harris } 2984586880fSJim Harris 2994586880fSJim Harris if (g_rpc_listen_addr != NULL) { 3004586880fSJim Harris if (spdk_rpc_initialize(g_rpc_listen_addr, NULL) != 0) { 3014586880fSJim Harris SPDK_ERRLOG("could not initialize RPC address %s\n", g_rpc_listen_addr); 3024586880fSJim Harris exit(1); 30397b01d09SBen Walker } 30497b01d09SBen Walker } 3054586880fSJim Harris 3064586880fSJim Harris spdk_subsystem_init(spdk_fio_bdev_subsystem_init_done, cb_arg); 30713481a59SKrzysztof Karas } 30897b01d09SBen Walker 30997b01d09SBen Walker static void 31097b01d09SBen Walker spdk_fio_bdev_init_start(void *arg) 31197b01d09SBen Walker { 31297b01d09SBen Walker bool *done = arg; 31397b01d09SBen Walker 3144586880fSJim Harris g_json_data = spdk_posix_file_load_from_name(g_json_config_file, &g_json_data_size); 3154586880fSJim Harris 3164586880fSJim Harris if (g_json_data == NULL) { 3174586880fSJim Harris SPDK_ERRLOG("could not allocate buffer for json config file\n"); 3184586880fSJim Harris exit(1); 3194586880fSJim Harris } 3204586880fSJim Harris 3214586880fSJim Harris /* Load SPDK_RPC_STARTUP RPCs from config file */ 3224586880fSJim Harris assert(spdk_rpc_get_state() == SPDK_RPC_STARTUP); 3234586880fSJim Harris spdk_subsystem_load_config(g_json_data, g_json_data_size, 3244586880fSJim Harris spdk_fio_bdev_startup_done, done, true); 32597b01d09SBen Walker } 32697b01d09SBen Walker 32797b01d09SBen Walker static void 32897b01d09SBen Walker spdk_fio_bdev_fini_done(void *cb_arg) 32997b01d09SBen Walker { 33097b01d09SBen Walker *(bool *)cb_arg = true; 33197b01d09SBen Walker 33297b01d09SBen Walker spdk_rpc_finish(); 33397b01d09SBen Walker } 33497b01d09SBen Walker 33597b01d09SBen Walker static void 33697b01d09SBen Walker spdk_fio_bdev_fini_start(void *arg) 33797b01d09SBen Walker { 33897b01d09SBen Walker bool *done = arg; 33997b01d09SBen Walker 34097b01d09SBen Walker spdk_subsystem_fini(spdk_fio_bdev_fini_done, done); 34197b01d09SBen Walker } 34297b01d09SBen Walker 34397b01d09SBen Walker static void * 34497b01d09SBen Walker spdk_init_thread_poll(void *arg) 34597b01d09SBen Walker { 34697b01d09SBen Walker struct spdk_fio_options *eo = arg; 34797b01d09SBen Walker struct spdk_fio_thread *fio_thread; 34897b01d09SBen Walker struct spdk_fio_thread *thread, *tmp; 34997b01d09SBen Walker struct spdk_env_opts opts; 35097b01d09SBen Walker bool done; 35197b01d09SBen Walker int rc; 35297b01d09SBen Walker struct timespec ts; 35397b01d09SBen Walker struct thread_data td = {}; 35497b01d09SBen Walker 35597b01d09SBen Walker /* Create a dummy thread data for use on the initialization thread. */ 35697b01d09SBen Walker td.o.iodepth = 32; 35797b01d09SBen Walker td.eo = eo; 35897b01d09SBen Walker 35997b01d09SBen Walker /* Parse the SPDK configuration file */ 36097b01d09SBen Walker eo = arg; 36197b01d09SBen Walker 36297b01d09SBen Walker if (eo->conf && eo->json_conf) { 36397b01d09SBen Walker SPDK_ERRLOG("Cannot provide two types of configuration files\n"); 36497b01d09SBen Walker rc = EINVAL; 36597b01d09SBen Walker goto err_exit; 36697b01d09SBen Walker } else if (eo->conf && strlen(eo->conf)) { 36797b01d09SBen Walker g_json_config_file = eo->conf; 36897b01d09SBen Walker } else if (eo->json_conf && strlen(eo->json_conf)) { 36997b01d09SBen Walker g_json_config_file = eo->json_conf; 37097b01d09SBen Walker } else { 37197b01d09SBen Walker SPDK_ERRLOG("No configuration file provided\n"); 37297b01d09SBen Walker rc = EINVAL; 37397b01d09SBen Walker goto err_exit; 37497b01d09SBen Walker } 37597b01d09SBen Walker 37697b01d09SBen Walker /* Initialize the RPC listen address */ 37797b01d09SBen Walker if (eo->rpc_listen_addr) { 37897b01d09SBen Walker g_rpc_listen_addr = eo->rpc_listen_addr; 37997b01d09SBen Walker } 38097b01d09SBen Walker 38197b01d09SBen Walker /* Initialize the environment library */ 38257fd99b9SJim Harris opts.opts_size = sizeof(opts); 38397b01d09SBen Walker spdk_env_opts_init(&opts); 38497b01d09SBen Walker opts.name = "fio"; 38597b01d09SBen Walker 38697b01d09SBen Walker if (eo->mem_mb) { 38797b01d09SBen Walker opts.mem_size = eo->mem_mb; 38897b01d09SBen Walker } 38997b01d09SBen Walker opts.hugepage_single_segments = eo->mem_single_seg; 39097b01d09SBen Walker if (eo->env_context) { 39197b01d09SBen Walker opts.env_context = eo->env_context; 39297b01d09SBen Walker } 39397b01d09SBen Walker 39497b01d09SBen Walker if (spdk_env_init(&opts) < 0) { 39597b01d09SBen Walker SPDK_ERRLOG("Unable to initialize SPDK env\n"); 39697b01d09SBen Walker rc = EINVAL; 39797b01d09SBen Walker goto err_exit; 39897b01d09SBen Walker } 39997b01d09SBen Walker spdk_unaffinitize_thread(); 40097b01d09SBen Walker 40197b01d09SBen Walker if (eo->log_flags) { 402*b37db069SXuQi char *sp = NULL; 403*b37db069SXuQi char *tok = strtok_r(eo->log_flags, ",", &sp); 40497b01d09SBen Walker do { 40597b01d09SBen Walker rc = spdk_log_set_flag(tok); 40697b01d09SBen Walker if (rc < 0) { 40797b01d09SBen Walker SPDK_ERRLOG("unknown spdk log flag %s\n", tok); 40897b01d09SBen Walker rc = EINVAL; 40997b01d09SBen Walker goto err_exit; 41097b01d09SBen Walker } 411*b37db069SXuQi } while ((tok = strtok_r(NULL, ",", &sp)) != NULL); 41297b01d09SBen Walker #ifdef DEBUG 41397b01d09SBen Walker spdk_log_set_print_level(SPDK_LOG_DEBUG); 41497b01d09SBen Walker #endif 41597b01d09SBen Walker } 41697b01d09SBen Walker 41797b01d09SBen Walker spdk_thread_lib_init(spdk_fio_schedule_thread, sizeof(struct spdk_fio_thread)); 41897b01d09SBen Walker 41997b01d09SBen Walker /* Create an SPDK thread temporarily */ 42097b01d09SBen Walker rc = spdk_fio_init_thread(&td); 42197b01d09SBen Walker if (rc < 0) { 42297b01d09SBen Walker SPDK_ERRLOG("Failed to create initialization thread\n"); 42397b01d09SBen Walker goto err_exit; 42497b01d09SBen Walker } 42597b01d09SBen Walker 42697b01d09SBen Walker fio_thread = td.io_ops_data; 42797b01d09SBen Walker 42897b01d09SBen Walker /* Initialize the bdev layer */ 42997b01d09SBen Walker done = false; 43097b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_init_start, &done); 43197b01d09SBen Walker 43297b01d09SBen Walker do { 43397b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 43497b01d09SBen Walker } while (!done); 43597b01d09SBen Walker 43697b01d09SBen Walker /* 43797b01d09SBen Walker * Continue polling until there are no more events. 43897b01d09SBen Walker * This handles any final events posted by pollers. 43997b01d09SBen Walker */ 44097b01d09SBen Walker while (spdk_fio_poll_thread(fio_thread) > 0) {}; 44197b01d09SBen Walker 44297b01d09SBen Walker /* Set condition variable */ 44397b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 44497b01d09SBen Walker pthread_cond_signal(&g_init_cond); 44597b01d09SBen Walker 44697b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 44797b01d09SBen Walker 44897b01d09SBen Walker while (g_poll_loop) { 44997b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 45097b01d09SBen Walker 45197b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 45297b01d09SBen Walker if (!TAILQ_EMPTY(&g_threads)) { 45397b01d09SBen Walker TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) { 45497b01d09SBen Walker if (spdk_thread_is_exited(thread->thread)) { 45597b01d09SBen Walker TAILQ_REMOVE(&g_threads, thread, link); 45697b01d09SBen Walker free(thread->iocq); 45797b01d09SBen Walker spdk_thread_destroy(thread->thread); 45897b01d09SBen Walker } else { 45997b01d09SBen Walker spdk_fio_poll_thread(thread); 46097b01d09SBen Walker } 46197b01d09SBen Walker } 46297b01d09SBen Walker 46397b01d09SBen Walker /* If there are exiting threads to poll, don't sleep. */ 46497b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 46597b01d09SBen Walker continue; 46697b01d09SBen Walker } 46797b01d09SBen Walker 46897b01d09SBen Walker /* Figure out how long to sleep. */ 46997b01d09SBen Walker clock_gettime(CLOCK_MONOTONIC, &ts); 47097b01d09SBen Walker spdk_fio_calc_timeout(fio_thread, &ts); 47197b01d09SBen Walker 47297b01d09SBen Walker rc = pthread_cond_timedwait(&g_init_cond, &g_init_mtx, &ts); 47397b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 47497b01d09SBen Walker 47597b01d09SBen Walker if (rc != 0 && rc != ETIMEDOUT) { 47697b01d09SBen Walker break; 47797b01d09SBen Walker } 47897b01d09SBen Walker } 47997b01d09SBen Walker 48097b01d09SBen Walker spdk_fio_cleanup_thread(fio_thread); 48197b01d09SBen Walker 48297b01d09SBen Walker /* Finalize the bdev layer */ 48397b01d09SBen Walker done = false; 48497b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_fini_start, &done); 48597b01d09SBen Walker 48697b01d09SBen Walker do { 48797b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 48897b01d09SBen Walker 48997b01d09SBen Walker TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) { 49097b01d09SBen Walker spdk_fio_poll_thread(thread); 49197b01d09SBen Walker } 49297b01d09SBen Walker } while (!done); 49397b01d09SBen Walker 49497b01d09SBen Walker /* Now exit all the threads */ 49597b01d09SBen Walker TAILQ_FOREACH(thread, &g_threads, link) { 49697b01d09SBen Walker spdk_set_thread(thread->thread); 49797b01d09SBen Walker spdk_thread_exit(thread->thread); 49897b01d09SBen Walker spdk_set_thread(NULL); 49997b01d09SBen Walker } 50097b01d09SBen Walker 50197b01d09SBen Walker /* And wait for them to gracefully exit */ 50297b01d09SBen Walker while (!TAILQ_EMPTY(&g_threads)) { 50397b01d09SBen Walker TAILQ_FOREACH_SAFE(thread, &g_threads, link, tmp) { 50497b01d09SBen Walker if (spdk_thread_is_exited(thread->thread)) { 50597b01d09SBen Walker TAILQ_REMOVE(&g_threads, thread, link); 50697b01d09SBen Walker free(thread->iocq); 50797b01d09SBen Walker spdk_thread_destroy(thread->thread); 50897b01d09SBen Walker } else { 50997b01d09SBen Walker spdk_thread_poll(thread->thread, 0, 0); 51097b01d09SBen Walker } 51197b01d09SBen Walker } 51297b01d09SBen Walker } 51397b01d09SBen Walker 51497b01d09SBen Walker pthread_exit(NULL); 51597b01d09SBen Walker 51697b01d09SBen Walker err_exit: 51797b01d09SBen Walker exit(rc); 51897b01d09SBen Walker return NULL; 51997b01d09SBen Walker } 52097b01d09SBen Walker 52197b01d09SBen Walker static int 52297b01d09SBen Walker spdk_fio_init_env(struct thread_data *td) 52397b01d09SBen Walker { 52497b01d09SBen Walker pthread_condattr_t attr; 52597b01d09SBen Walker int rc = -1; 52697b01d09SBen Walker 52797b01d09SBen Walker if (pthread_condattr_init(&attr)) { 52897b01d09SBen Walker SPDK_ERRLOG("Unable to initialize condition variable\n"); 52997b01d09SBen Walker return -1; 53097b01d09SBen Walker } 53197b01d09SBen Walker 53297b01d09SBen Walker if (pthread_condattr_setclock(&attr, CLOCK_MONOTONIC)) { 53397b01d09SBen Walker SPDK_ERRLOG("Unable to initialize condition variable\n"); 53497b01d09SBen Walker goto out; 53597b01d09SBen Walker } 53697b01d09SBen Walker 53797b01d09SBen Walker if (pthread_cond_init(&g_init_cond, &attr)) { 53897b01d09SBen Walker SPDK_ERRLOG("Unable to initialize condition variable\n"); 53997b01d09SBen Walker goto out; 54097b01d09SBen Walker } 54197b01d09SBen Walker 54297b01d09SBen Walker /* 54397b01d09SBen Walker * Spawn a thread to handle initialization operations and to poll things 54497b01d09SBen Walker * like the admin queues periodically. 54597b01d09SBen Walker */ 54697b01d09SBen Walker rc = pthread_create(&g_init_thread_id, NULL, &spdk_init_thread_poll, td->eo); 54797b01d09SBen Walker if (rc != 0) { 54897b01d09SBen Walker SPDK_ERRLOG("Unable to spawn thread to poll admin queue. It won't be polled.\n"); 54997b01d09SBen Walker } 55097b01d09SBen Walker 55197b01d09SBen Walker /* Wait for background thread to advance past the initialization */ 55297b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 55397b01d09SBen Walker pthread_cond_wait(&g_init_cond, &g_init_mtx); 55497b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 55597b01d09SBen Walker out: 55697b01d09SBen Walker pthread_condattr_destroy(&attr); 55797b01d09SBen Walker return rc; 55897b01d09SBen Walker } 55997b01d09SBen Walker 56097b01d09SBen Walker static bool 56197b01d09SBen Walker fio_redirected_to_dev_null(void) 56297b01d09SBen Walker { 56397b01d09SBen Walker char path[PATH_MAX] = ""; 56497b01d09SBen Walker ssize_t ret; 56597b01d09SBen Walker 56697b01d09SBen Walker ret = readlink("/proc/self/fd/1", path, sizeof(path)); 56797b01d09SBen Walker 56897b01d09SBen Walker if (ret == -1 || strcmp(path, "/dev/null") != 0) { 56997b01d09SBen Walker return false; 57097b01d09SBen Walker } 57197b01d09SBen Walker 57297b01d09SBen Walker ret = readlink("/proc/self/fd/2", path, sizeof(path)); 57397b01d09SBen Walker 57497b01d09SBen Walker if (ret == -1 || strcmp(path, "/dev/null") != 0) { 57597b01d09SBen Walker return false; 57697b01d09SBen Walker } 57797b01d09SBen Walker 57897b01d09SBen Walker return true; 57997b01d09SBen Walker } 58097b01d09SBen Walker 58197b01d09SBen Walker static int 58297b01d09SBen Walker spdk_fio_init_spdk_env(struct thread_data *td) 58397b01d09SBen Walker { 58497b01d09SBen Walker static pthread_mutex_t setup_lock = PTHREAD_MUTEX_INITIALIZER; 58597b01d09SBen Walker 58697b01d09SBen Walker pthread_mutex_lock(&setup_lock); 58797b01d09SBen Walker if (!g_spdk_env_initialized) { 58897b01d09SBen Walker if (spdk_fio_init_env(td)) { 58997b01d09SBen Walker pthread_mutex_unlock(&setup_lock); 59097b01d09SBen Walker SPDK_ERRLOG("failed to initialize\n"); 59197b01d09SBen Walker return -1; 59297b01d09SBen Walker } 59397b01d09SBen Walker 59497b01d09SBen Walker g_spdk_env_initialized = true; 59597b01d09SBen Walker } 59697b01d09SBen Walker pthread_mutex_unlock(&setup_lock); 59797b01d09SBen Walker 59897b01d09SBen Walker return 0; 59997b01d09SBen Walker } 60097b01d09SBen Walker 60197b01d09SBen Walker /* Called for each thread to fill in the 'real_file_size' member for 60297b01d09SBen Walker * each file associated with this thread. This is called prior to 60397b01d09SBen Walker * the init operation (spdk_fio_init()) below. This call will occur 60497b01d09SBen Walker * on the initial start up thread if 'create_serialize' is true, or 60597b01d09SBen Walker * on the thread actually associated with 'thread_data' if 'create_serialize' 60697b01d09SBen Walker * is false. 60797b01d09SBen Walker */ 60897b01d09SBen Walker static int 60997b01d09SBen Walker spdk_fio_setup(struct thread_data *td) 61097b01d09SBen Walker { 61197b01d09SBen Walker struct spdk_fio_oat_ctx ctx = { 0 }; 61297b01d09SBen Walker 61397b01d09SBen Walker /* 61497b01d09SBen Walker * If we're running in a daemonized FIO instance, it's possible 61597b01d09SBen Walker * fd 1/2 were re-used for something important by FIO. Newer fio 61697b01d09SBen Walker * versions are careful to redirect those to /dev/null, but if we're 61797b01d09SBen Walker * not, we'll abort early, so we don't accidentally write messages to 61897b01d09SBen Walker * an important file, etc. 61997b01d09SBen Walker */ 62097b01d09SBen Walker if (is_backend && !fio_redirected_to_dev_null()) { 62197b01d09SBen Walker char buf[1024]; 62297b01d09SBen Walker snprintf(buf, sizeof(buf), 62397b01d09SBen Walker "SPDK FIO plugin is in daemon mode, but stdout/stderr " 62497b01d09SBen Walker "aren't redirected to /dev/null. Aborting."); 62597b01d09SBen Walker fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf)); 62697b01d09SBen Walker return -1; 62797b01d09SBen Walker } 62897b01d09SBen Walker 62997b01d09SBen Walker if (!td->o.use_thread) { 63097b01d09SBen Walker SPDK_ERRLOG("must set thread=1 when using spdk plugin\n"); 63197b01d09SBen Walker return -1; 63297b01d09SBen Walker } 63397b01d09SBen Walker 63497b01d09SBen Walker if (spdk_fio_init_spdk_env(td) != 0) { 63597b01d09SBen Walker return -1; 63697b01d09SBen Walker } 63797b01d09SBen Walker 63897b01d09SBen Walker ctx.u.sa.td = td; 63997b01d09SBen Walker spdk_fio_sync_run_oat(spdk_fio_setup_oat, &ctx); 64097b01d09SBen Walker return ctx.ret; 64197b01d09SBen Walker } 64297b01d09SBen Walker 64397b01d09SBen Walker static int 64497b01d09SBen Walker _spdk_fio_add_file(void *ctx, struct spdk_bdev *bdev) 64597b01d09SBen Walker { 64697b01d09SBen Walker struct thread_data *td = ctx; 64797b01d09SBen Walker 64897b01d09SBen Walker add_file(td, spdk_bdev_get_name(bdev), 0, 1); 64997b01d09SBen Walker return 0; 65097b01d09SBen Walker } 65197b01d09SBen Walker 65297b01d09SBen Walker static void 65397b01d09SBen Walker spdk_fio_setup_oat(void *_ctx) 65497b01d09SBen Walker { 65597b01d09SBen Walker struct spdk_fio_oat_ctx *ctx = _ctx; 65697b01d09SBen Walker struct thread_data *td = ctx->u.sa.td; 65797b01d09SBen Walker unsigned int i; 65897b01d09SBen Walker struct fio_file *f; 65997b01d09SBen Walker 66097b01d09SBen Walker if (td->o.nr_files == 1 && strcmp(td->files[0]->file_name, "*") == 0) { 66197b01d09SBen Walker /* add all available bdevs as fio targets */ 66297b01d09SBen Walker spdk_for_each_bdev_leaf(td, _spdk_fio_add_file); 66397b01d09SBen Walker } 66497b01d09SBen Walker 66597b01d09SBen Walker for_each_file(td, f, i) { 66697b01d09SBen Walker struct spdk_bdev *bdev; 66797b01d09SBen Walker 66897b01d09SBen Walker if (strcmp(f->file_name, "*") == 0) { 66997b01d09SBen Walker /* Explicitly set file size to 0 here to make sure fio doesn't try to 67097b01d09SBen Walker * actually send I/O to this "*" file. 67197b01d09SBen Walker */ 67297b01d09SBen Walker f->real_file_size = 0; 67397b01d09SBen Walker continue; 67497b01d09SBen Walker } 67597b01d09SBen Walker 67697b01d09SBen Walker bdev = spdk_bdev_get_by_name(f->file_name); 67797b01d09SBen Walker if (!bdev) { 67897b01d09SBen Walker SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name); 67997b01d09SBen Walker ctx->ret = -1; 68097b01d09SBen Walker goto out; 68197b01d09SBen Walker } 68297b01d09SBen Walker 68397b01d09SBen Walker f->real_file_size = spdk_bdev_get_num_blocks(bdev) * 68497b01d09SBen Walker spdk_bdev_get_block_size(bdev); 68597b01d09SBen Walker f->filetype = FIO_TYPE_BLOCK; 68697b01d09SBen Walker fio_file_set_size_known(f); 68797b01d09SBen Walker 68897b01d09SBen Walker ctx->ret = spdk_fio_handle_options(td, f, bdev); 68997b01d09SBen Walker if (ctx->ret) { 69097b01d09SBen Walker goto out; 69197b01d09SBen Walker } 69297b01d09SBen Walker } 69397b01d09SBen Walker 69497b01d09SBen Walker ctx->ret = 0; 69597b01d09SBen Walker out: 69697b01d09SBen Walker spdk_fio_wake_oat_waiter(ctx); 69797b01d09SBen Walker } 69897b01d09SBen Walker 69997b01d09SBen Walker static void 70097b01d09SBen Walker fio_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 70197b01d09SBen Walker void *event_ctx) 70297b01d09SBen Walker { 70397b01d09SBen Walker SPDK_WARNLOG("Unsupported bdev event: type %d\n", type); 70497b01d09SBen Walker } 70597b01d09SBen Walker 70697b01d09SBen Walker static void 70797b01d09SBen Walker spdk_fio_bdev_open(void *arg) 70897b01d09SBen Walker { 70997b01d09SBen Walker struct thread_data *td = arg; 71097b01d09SBen Walker struct spdk_fio_thread *fio_thread; 71197b01d09SBen Walker unsigned int i; 71297b01d09SBen Walker struct fio_file *f; 71397b01d09SBen Walker int rc; 71497b01d09SBen Walker 71597b01d09SBen Walker fio_thread = td->io_ops_data; 71697b01d09SBen Walker 71797b01d09SBen Walker for_each_file(td, f, i) { 71897b01d09SBen Walker struct spdk_fio_target *target; 71997b01d09SBen Walker 72097b01d09SBen Walker if (strcmp(f->file_name, "*") == 0) { 72197b01d09SBen Walker continue; 72297b01d09SBen Walker } 72397b01d09SBen Walker 72497b01d09SBen Walker target = calloc(1, sizeof(*target)); 72597b01d09SBen Walker if (!target) { 72697b01d09SBen Walker SPDK_ERRLOG("Unable to allocate memory for I/O target.\n"); 72797b01d09SBen Walker fio_thread->failed = true; 72897b01d09SBen Walker return; 72997b01d09SBen Walker } 73097b01d09SBen Walker 73197b01d09SBen Walker rc = spdk_bdev_open_ext(f->file_name, true, fio_bdev_event_cb, NULL, 73297b01d09SBen Walker &target->desc); 73397b01d09SBen Walker if (rc) { 73497b01d09SBen Walker SPDK_ERRLOG("Unable to open bdev %s\n", f->file_name); 73597b01d09SBen Walker free(target); 73697b01d09SBen Walker fio_thread->failed = true; 73797b01d09SBen Walker return; 73897b01d09SBen Walker } 73997b01d09SBen Walker 74097b01d09SBen Walker target->bdev = spdk_bdev_desc_get_bdev(target->desc); 74197b01d09SBen Walker 74297b01d09SBen Walker target->ch = spdk_bdev_get_io_channel(target->desc); 74397b01d09SBen Walker if (!target->ch) { 74497b01d09SBen Walker SPDK_ERRLOG("Unable to get I/O channel for bdev.\n"); 74597b01d09SBen Walker spdk_bdev_close(target->desc); 74697b01d09SBen Walker free(target); 74797b01d09SBen Walker fio_thread->failed = true; 74897b01d09SBen Walker return; 74997b01d09SBen Walker } 75097b01d09SBen Walker 75197b01d09SBen Walker f->engine_data = target; 75297b01d09SBen Walker 75397b01d09SBen Walker rc = spdk_fio_handle_options_per_target(td, f); 75497b01d09SBen Walker if (rc) { 75597b01d09SBen Walker SPDK_ERRLOG("Failed to handle options for: %s\n", f->file_name); 75697b01d09SBen Walker f->engine_data = NULL; 75797b01d09SBen Walker spdk_put_io_channel(target->ch); 75897b01d09SBen Walker spdk_bdev_close(target->desc); 75997b01d09SBen Walker free(target); 76097b01d09SBen Walker fio_thread->failed = true; 76197b01d09SBen Walker return; 76297b01d09SBen Walker } 76397b01d09SBen Walker 76497b01d09SBen Walker TAILQ_INSERT_TAIL(&fio_thread->targets, target, link); 76597b01d09SBen Walker } 76697b01d09SBen Walker } 76797b01d09SBen Walker 76897b01d09SBen Walker /* Called for each thread, on that thread, shortly after the thread 76997b01d09SBen Walker * starts. 77097b01d09SBen Walker * 77197b01d09SBen Walker * Also called by spdk_fio_report_zones(), since we need an I/O channel 77297b01d09SBen Walker * in order to get the zone report. (fio calls the .report_zones callback 77397b01d09SBen Walker * before it calls the .init callback.) 77497b01d09SBen Walker * Therefore, if fio was run with --zonemode=zbd, the thread will already 77597b01d09SBen Walker * be initialized by the time that fio calls the .init callback. 77697b01d09SBen Walker */ 77797b01d09SBen Walker static int 77897b01d09SBen Walker spdk_fio_init(struct thread_data *td) 77997b01d09SBen Walker { 78097b01d09SBen Walker struct spdk_fio_thread *fio_thread; 78197b01d09SBen Walker int rc; 78297b01d09SBen Walker 78397b01d09SBen Walker if (spdk_fio_init_spdk_env(td) != 0) { 78497b01d09SBen Walker return -1; 78597b01d09SBen Walker } 78697b01d09SBen Walker 78797b01d09SBen Walker /* If thread has already been initialized, do nothing. */ 78897b01d09SBen Walker if (td->io_ops_data) { 78997b01d09SBen Walker return 0; 79097b01d09SBen Walker } 79197b01d09SBen Walker 79297b01d09SBen Walker rc = spdk_fio_init_thread(td); 79397b01d09SBen Walker if (rc) { 79497b01d09SBen Walker return rc; 79597b01d09SBen Walker } 79697b01d09SBen Walker 79797b01d09SBen Walker fio_thread = td->io_ops_data; 79897b01d09SBen Walker assert(fio_thread); 79997b01d09SBen Walker fio_thread->failed = false; 80097b01d09SBen Walker 80197b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_open, td); 80297b01d09SBen Walker 80397b01d09SBen Walker while (spdk_fio_poll_thread(fio_thread) > 0) {} 80497b01d09SBen Walker 80597b01d09SBen Walker if (fio_thread->failed) { 80697b01d09SBen Walker return -1; 80797b01d09SBen Walker } 80897b01d09SBen Walker 80997b01d09SBen Walker return 0; 81097b01d09SBen Walker } 81197b01d09SBen Walker 81297b01d09SBen Walker static void 81397b01d09SBen Walker spdk_fio_cleanup(struct thread_data *td) 81497b01d09SBen Walker { 81597b01d09SBen Walker struct spdk_fio_thread *fio_thread = td->io_ops_data; 81697b01d09SBen Walker 81797b01d09SBen Walker spdk_fio_cleanup_thread(fio_thread); 81897b01d09SBen Walker td->io_ops_data = NULL; 81997b01d09SBen Walker } 82097b01d09SBen Walker 82197b01d09SBen Walker static int 82297b01d09SBen Walker spdk_fio_open(struct thread_data *td, struct fio_file *f) 82397b01d09SBen Walker { 82497b01d09SBen Walker 82597b01d09SBen Walker return 0; 82697b01d09SBen Walker } 82797b01d09SBen Walker 82897b01d09SBen Walker static int 82997b01d09SBen Walker spdk_fio_close(struct thread_data *td, struct fio_file *f) 83097b01d09SBen Walker { 83197b01d09SBen Walker return 0; 83297b01d09SBen Walker } 83397b01d09SBen Walker 83497b01d09SBen Walker static int 83597b01d09SBen Walker spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) 83697b01d09SBen Walker { 837c2c1a767SJim Harris struct spdk_fio_thread *fio_thread = td->io_ops_data; 838c2c1a767SJim Harris struct spdk_fio_target *fio_target; 839c2c1a767SJim Harris int32_t numa_id = SPDK_ENV_NUMA_ID_ANY, tmp_numa_id; 840c2c1a767SJim Harris 841c2c1a767SJim Harris /* If all bdevs used by this fio_thread have the same numa socket 842c2c1a767SJim Harris * id, allocate from that socket. If they come from different numa 843c2c1a767SJim Harris * sockets, then don't try to optimize and just use NUMA_ID_ANY. 844c2c1a767SJim Harris */ 845c2c1a767SJim Harris TAILQ_FOREACH(fio_target, &fio_thread->targets, link) { 846c2c1a767SJim Harris tmp_numa_id = spdk_bdev_get_numa_id(fio_target->bdev); 847c2c1a767SJim Harris if (numa_id == SPDK_ENV_NUMA_ID_ANY) { 848c2c1a767SJim Harris numa_id = tmp_numa_id; 84944970291SJim Harris } else if (tmp_numa_id != numa_id && 85044970291SJim Harris tmp_numa_id != SPDK_ENV_NUMA_ID_ANY) { 851c2c1a767SJim Harris numa_id = SPDK_ENV_NUMA_ID_ANY; 852c2c1a767SJim Harris break; 853c2c1a767SJim Harris } 854c2c1a767SJim Harris } 855c2c1a767SJim Harris 856c2c1a767SJim Harris td->orig_buffer = spdk_dma_zmalloc_socket(total_mem, 0x1000, NULL, numa_id); 85797b01d09SBen Walker return td->orig_buffer == NULL; 85897b01d09SBen Walker } 85997b01d09SBen Walker 86097b01d09SBen Walker static void 86197b01d09SBen Walker spdk_fio_iomem_free(struct thread_data *td) 86297b01d09SBen Walker { 86397b01d09SBen Walker spdk_dma_free(td->orig_buffer); 86497b01d09SBen Walker } 86597b01d09SBen Walker 86697b01d09SBen Walker static int 86797b01d09SBen Walker spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) 86897b01d09SBen Walker { 86997b01d09SBen Walker struct spdk_fio_request *fio_req; 87097b01d09SBen Walker 87197b01d09SBen Walker io_u->engine_data = NULL; 87297b01d09SBen Walker 87397b01d09SBen Walker fio_req = calloc(1, sizeof(*fio_req)); 87497b01d09SBen Walker if (fio_req == NULL) { 87597b01d09SBen Walker return 1; 87697b01d09SBen Walker } 87797b01d09SBen Walker fio_req->io = io_u; 87897b01d09SBen Walker fio_req->td = td; 87997b01d09SBen Walker 88097b01d09SBen Walker io_u->engine_data = fio_req; 88197b01d09SBen Walker 88297b01d09SBen Walker return 0; 88397b01d09SBen Walker } 88497b01d09SBen Walker 88597b01d09SBen Walker static void 88697b01d09SBen Walker spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) 88797b01d09SBen Walker { 88897b01d09SBen Walker struct spdk_fio_request *fio_req = io_u->engine_data; 88997b01d09SBen Walker 89097b01d09SBen Walker if (fio_req) { 89197b01d09SBen Walker assert(fio_req->io == io_u); 89297b01d09SBen Walker free(fio_req); 89397b01d09SBen Walker io_u->engine_data = NULL; 89497b01d09SBen Walker } 89597b01d09SBen Walker } 89697b01d09SBen Walker 89797b01d09SBen Walker static void 89897b01d09SBen Walker spdk_fio_completion_cb(struct spdk_bdev_io *bdev_io, 89997b01d09SBen Walker bool success, 90097b01d09SBen Walker void *cb_arg) 90197b01d09SBen Walker { 90297b01d09SBen Walker struct spdk_fio_request *fio_req = cb_arg; 90397b01d09SBen Walker struct thread_data *td = fio_req->td; 90497b01d09SBen Walker struct spdk_fio_thread *fio_thread = td->io_ops_data; 90597b01d09SBen Walker 90697b01d09SBen Walker assert(fio_thread->iocq_count < fio_thread->iocq_size); 90797b01d09SBen Walker fio_req->io->error = success ? 0 : EIO; 90897b01d09SBen Walker fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; 90997b01d09SBen Walker 91097b01d09SBen Walker spdk_bdev_free_io(bdev_io); 91197b01d09SBen Walker } 91297b01d09SBen Walker 91397b01d09SBen Walker #if FIO_IOOPS_VERSION >= 24 91497b01d09SBen Walker typedef enum fio_q_status fio_q_status_t; 91597b01d09SBen Walker #else 91697b01d09SBen Walker typedef int fio_q_status_t; 91797b01d09SBen Walker #endif 91897b01d09SBen Walker 91997b01d09SBen Walker static uint64_t 92097b01d09SBen Walker spdk_fio_zone_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *zone_start, 92197b01d09SBen Walker uint64_t num_bytes, uint64_t *num_blocks) 92297b01d09SBen Walker { 92397b01d09SBen Walker uint32_t block_size = spdk_bdev_get_block_size(bdev); 92497b01d09SBen Walker *zone_start = spdk_bdev_get_zone_id(bdev, offset_bytes / block_size); 92597b01d09SBen Walker *num_blocks = num_bytes / block_size; 92697b01d09SBen Walker return (offset_bytes % block_size) | (num_bytes % block_size); 92797b01d09SBen Walker } 92897b01d09SBen Walker 92997b01d09SBen Walker static fio_q_status_t 93097b01d09SBen Walker spdk_fio_queue(struct thread_data *td, struct io_u *io_u) 93197b01d09SBen Walker { 93297b01d09SBen Walker int rc = 1; 93397b01d09SBen Walker struct spdk_fio_request *fio_req = io_u->engine_data; 93497b01d09SBen Walker struct spdk_fio_target *target = io_u->file->engine_data; 93597b01d09SBen Walker 93697b01d09SBen Walker assert(fio_req->td == td); 93797b01d09SBen Walker 93897b01d09SBen Walker if (!target) { 93997b01d09SBen Walker SPDK_ERRLOG("Unable to look up correct I/O target.\n"); 94097b01d09SBen Walker fio_req->io->error = ENODEV; 94197b01d09SBen Walker return FIO_Q_COMPLETED; 94297b01d09SBen Walker } 94397b01d09SBen Walker 94497b01d09SBen Walker switch (io_u->ddir) { 94597b01d09SBen Walker case DDIR_READ: 94697b01d09SBen Walker rc = spdk_bdev_read(target->desc, target->ch, 94797b01d09SBen Walker io_u->buf, io_u->offset, io_u->xfer_buflen, 94897b01d09SBen Walker spdk_fio_completion_cb, fio_req); 94997b01d09SBen Walker break; 95097b01d09SBen Walker case DDIR_WRITE: 95197b01d09SBen Walker if (!target->zone_append_enabled) { 95297b01d09SBen Walker rc = spdk_bdev_write(target->desc, target->ch, 95397b01d09SBen Walker io_u->buf, io_u->offset, io_u->xfer_buflen, 95497b01d09SBen Walker spdk_fio_completion_cb, fio_req); 95597b01d09SBen Walker } else { 95697b01d09SBen Walker uint64_t zone_start, num_blocks; 95797b01d09SBen Walker if (spdk_fio_zone_bytes_to_blocks(target->bdev, io_u->offset, &zone_start, 95897b01d09SBen Walker io_u->xfer_buflen, &num_blocks) != 0) { 95997b01d09SBen Walker rc = -EINVAL; 96097b01d09SBen Walker break; 96197b01d09SBen Walker } 96297b01d09SBen Walker rc = spdk_bdev_zone_append(target->desc, target->ch, io_u->buf, 96397b01d09SBen Walker zone_start, num_blocks, spdk_fio_completion_cb, 96497b01d09SBen Walker fio_req); 96597b01d09SBen Walker } 96697b01d09SBen Walker break; 96797b01d09SBen Walker case DDIR_TRIM: 96897b01d09SBen Walker rc = spdk_bdev_unmap(target->desc, target->ch, 96997b01d09SBen Walker io_u->offset, io_u->xfer_buflen, 97097b01d09SBen Walker spdk_fio_completion_cb, fio_req); 97197b01d09SBen Walker break; 97297b01d09SBen Walker case DDIR_SYNC: 97397b01d09SBen Walker rc = spdk_bdev_flush(target->desc, target->ch, 97497b01d09SBen Walker io_u->offset, io_u->xfer_buflen, 97597b01d09SBen Walker spdk_fio_completion_cb, fio_req); 97697b01d09SBen Walker break; 97797b01d09SBen Walker default: 97897b01d09SBen Walker assert(false); 97997b01d09SBen Walker break; 98097b01d09SBen Walker } 98197b01d09SBen Walker 98297b01d09SBen Walker if (rc == -ENOMEM) { 98397b01d09SBen Walker return FIO_Q_BUSY; 98497b01d09SBen Walker } 98597b01d09SBen Walker 98697b01d09SBen Walker if (rc != 0) { 98797b01d09SBen Walker fio_req->io->error = abs(rc); 98897b01d09SBen Walker return FIO_Q_COMPLETED; 98997b01d09SBen Walker } 99097b01d09SBen Walker 99197b01d09SBen Walker return FIO_Q_QUEUED; 99297b01d09SBen Walker } 99397b01d09SBen Walker 99497b01d09SBen Walker static struct io_u * 99597b01d09SBen Walker spdk_fio_event(struct thread_data *td, int event) 99697b01d09SBen Walker { 99797b01d09SBen Walker struct spdk_fio_thread *fio_thread = td->io_ops_data; 99897b01d09SBen Walker 99997b01d09SBen Walker assert(event >= 0); 100097b01d09SBen Walker assert((unsigned)event < fio_thread->iocq_count); 100197b01d09SBen Walker return fio_thread->iocq[event]; 100297b01d09SBen Walker } 100397b01d09SBen Walker 100497b01d09SBen Walker static size_t 100597b01d09SBen Walker spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread) 100697b01d09SBen Walker { 100797b01d09SBen Walker return spdk_thread_poll(fio_thread->thread, 0, 0); 100897b01d09SBen Walker } 100997b01d09SBen Walker 101097b01d09SBen Walker static int 101197b01d09SBen Walker spdk_fio_getevents(struct thread_data *td, unsigned int min, 101297b01d09SBen Walker unsigned int max, const struct timespec *t) 101397b01d09SBen Walker { 101497b01d09SBen Walker struct spdk_fio_thread *fio_thread = td->io_ops_data; 101597b01d09SBen Walker struct timespec t0, t1; 101697b01d09SBen Walker uint64_t timeout = 0; 101797b01d09SBen Walker 101897b01d09SBen Walker if (t) { 101997b01d09SBen Walker timeout = t->tv_sec * SPDK_SEC_TO_NSEC + t->tv_nsec; 102097b01d09SBen Walker clock_gettime(CLOCK_MONOTONIC_RAW, &t0); 102197b01d09SBen Walker } 102297b01d09SBen Walker 102397b01d09SBen Walker fio_thread->iocq_count = 0; 102497b01d09SBen Walker 102597b01d09SBen Walker for (;;) { 102697b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 102797b01d09SBen Walker 102897b01d09SBen Walker if (fio_thread->iocq_count >= min) { 102997b01d09SBen Walker return fio_thread->iocq_count; 103097b01d09SBen Walker } 103197b01d09SBen Walker 103297b01d09SBen Walker if (t) { 103397b01d09SBen Walker clock_gettime(CLOCK_MONOTONIC_RAW, &t1); 103497b01d09SBen Walker uint64_t elapse = ((t1.tv_sec - t0.tv_sec) * SPDK_SEC_TO_NSEC) 103597b01d09SBen Walker + t1.tv_nsec - t0.tv_nsec; 103697b01d09SBen Walker if (elapse > timeout) { 103797b01d09SBen Walker break; 103897b01d09SBen Walker } 103997b01d09SBen Walker } 104097b01d09SBen Walker } 104197b01d09SBen Walker 104297b01d09SBen Walker return fio_thread->iocq_count; 104397b01d09SBen Walker } 104497b01d09SBen Walker 104597b01d09SBen Walker static int 104697b01d09SBen Walker spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) 104797b01d09SBen Walker { 104897b01d09SBen Walker /* TODO: This should probably send a flush to the device, but for now just return successful. */ 104997b01d09SBen Walker return 0; 105097b01d09SBen Walker } 105197b01d09SBen Walker 105297b01d09SBen Walker #if FIO_HAS_ZBD 105397b01d09SBen Walker /* Runs on app thread (oat) */ 105497b01d09SBen Walker static void 105597b01d09SBen Walker spdk_fio_get_zoned_model_oat(void *arg) 105697b01d09SBen Walker { 105797b01d09SBen Walker struct spdk_fio_oat_ctx *ctx = arg; 105897b01d09SBen Walker struct fio_file *f = ctx->u.zma.f; 105997b01d09SBen Walker enum zbd_zoned_model *model = ctx->u.zma.model; 106097b01d09SBen Walker struct spdk_bdev *bdev; 106197b01d09SBen Walker 106297b01d09SBen Walker if (f->filetype != FIO_TYPE_BLOCK) { 106397b01d09SBen Walker SPDK_ERRLOG("Unsupported filetype: %d\n", f->filetype); 106497b01d09SBen Walker ctx->ret = -EINVAL; 106597b01d09SBen Walker goto out; 106697b01d09SBen Walker } 106797b01d09SBen Walker 106897b01d09SBen Walker bdev = spdk_bdev_get_by_name(f->file_name); 106997b01d09SBen Walker if (!bdev) { 107097b01d09SBen Walker SPDK_ERRLOG("Cannot get zoned model, no bdev with name: %s\n", f->file_name); 107197b01d09SBen Walker ctx->ret = -ENODEV; 107297b01d09SBen Walker goto out; 107397b01d09SBen Walker } 107497b01d09SBen Walker 107597b01d09SBen Walker if (spdk_bdev_is_zoned(bdev)) { 107697b01d09SBen Walker *model = ZBD_HOST_MANAGED; 107797b01d09SBen Walker } else { 107897b01d09SBen Walker *model = ZBD_NONE; 107997b01d09SBen Walker } 108097b01d09SBen Walker 108197b01d09SBen Walker ctx->ret = 0; 108297b01d09SBen Walker out: 108397b01d09SBen Walker spdk_fio_wake_oat_waiter(ctx); 108497b01d09SBen Walker } 108597b01d09SBen Walker 108697b01d09SBen Walker static int 108797b01d09SBen Walker spdk_fio_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model) 108897b01d09SBen Walker { 108997b01d09SBen Walker struct spdk_fio_oat_ctx ctx = { 0 }; 109097b01d09SBen Walker 109197b01d09SBen Walker ctx.u.zma.f = f; 109297b01d09SBen Walker ctx.u.zma.model = model; 109397b01d09SBen Walker 109497b01d09SBen Walker spdk_fio_sync_run_oat(spdk_fio_get_zoned_model_oat, &ctx); 109597b01d09SBen Walker 109697b01d09SBen Walker return ctx.ret; 109797b01d09SBen Walker } 109897b01d09SBen Walker 109997b01d09SBen Walker 110097b01d09SBen Walker static void 110197b01d09SBen Walker spdk_fio_bdev_get_zone_info_done(struct spdk_bdev_io *bdev_io, bool success, void *arg) 110297b01d09SBen Walker { 110397b01d09SBen Walker struct spdk_fio_zone_cb_arg *cb_arg = arg; 110497b01d09SBen Walker unsigned int i; 110597b01d09SBen Walker int handled_zones = 0; 110697b01d09SBen Walker 110797b01d09SBen Walker if (!success) { 110897b01d09SBen Walker spdk_bdev_free_io(bdev_io); 110997b01d09SBen Walker cb_arg->completed = -EIO; 111097b01d09SBen Walker return; 111197b01d09SBen Walker } 111297b01d09SBen Walker 111397b01d09SBen Walker for (i = 0; i < cb_arg->nr_zones; i++) { 111497b01d09SBen Walker struct spdk_bdev_zone_info *zone_src = &cb_arg->spdk_zones[handled_zones]; 111597b01d09SBen Walker struct zbd_zone *zone_dest = &cb_arg->fio_zones[handled_zones]; 111697b01d09SBen Walker uint32_t block_size = spdk_bdev_get_block_size(cb_arg->target->bdev); 111797b01d09SBen Walker 111897b01d09SBen Walker switch (zone_src->type) { 111997b01d09SBen Walker case SPDK_BDEV_ZONE_TYPE_SEQWR: 112097b01d09SBen Walker zone_dest->type = ZBD_ZONE_TYPE_SWR; 112197b01d09SBen Walker break; 112297b01d09SBen Walker case SPDK_BDEV_ZONE_TYPE_SEQWP: 112397b01d09SBen Walker zone_dest->type = ZBD_ZONE_TYPE_SWP; 112497b01d09SBen Walker break; 112597b01d09SBen Walker case SPDK_BDEV_ZONE_TYPE_CNV: 112697b01d09SBen Walker zone_dest->type = ZBD_ZONE_TYPE_CNV; 112797b01d09SBen Walker break; 112897b01d09SBen Walker default: 112997b01d09SBen Walker spdk_bdev_free_io(bdev_io); 113097b01d09SBen Walker cb_arg->completed = -EIO; 113197b01d09SBen Walker return; 113297b01d09SBen Walker } 113397b01d09SBen Walker 113497b01d09SBen Walker zone_dest->len = spdk_bdev_get_zone_size(cb_arg->target->bdev) * block_size; 113597b01d09SBen Walker zone_dest->capacity = zone_src->capacity * block_size; 113697b01d09SBen Walker zone_dest->start = zone_src->zone_id * block_size; 113797b01d09SBen Walker zone_dest->wp = zone_src->write_pointer * block_size; 113897b01d09SBen Walker 113997b01d09SBen Walker switch (zone_src->state) { 114097b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_EMPTY: 114197b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_EMPTY; 114297b01d09SBen Walker break; 114397b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_IMP_OPEN: 114497b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_IMP_OPEN; 114597b01d09SBen Walker break; 114697b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_EXP_OPEN: 114797b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_EXP_OPEN; 114897b01d09SBen Walker break; 114997b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_FULL: 115097b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_FULL; 115197b01d09SBen Walker break; 115297b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_CLOSED: 115397b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_CLOSED; 115497b01d09SBen Walker break; 115597b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_READ_ONLY: 115697b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_READONLY; 115797b01d09SBen Walker break; 115897b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_OFFLINE: 115997b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_OFFLINE; 116097b01d09SBen Walker break; 116197b01d09SBen Walker case SPDK_BDEV_ZONE_STATE_NOT_WP: 116297b01d09SBen Walker zone_dest->cond = ZBD_ZONE_COND_NOT_WP; 116397b01d09SBen Walker /* Set WP to end of zone for zone types w/o WP (e.g. Conv. zones in SMR) */ 116497b01d09SBen Walker zone_dest->wp = zone_dest->start + zone_dest->capacity; 116597b01d09SBen Walker break; 116697b01d09SBen Walker default: 116797b01d09SBen Walker spdk_bdev_free_io(bdev_io); 116897b01d09SBen Walker cb_arg->completed = -EIO; 116997b01d09SBen Walker return; 117097b01d09SBen Walker } 117197b01d09SBen Walker handled_zones++; 117297b01d09SBen Walker } 117397b01d09SBen Walker 117497b01d09SBen Walker spdk_bdev_free_io(bdev_io); 117597b01d09SBen Walker cb_arg->completed = handled_zones; 117697b01d09SBen Walker } 117797b01d09SBen Walker 117897b01d09SBen Walker static void 117997b01d09SBen Walker spdk_fio_bdev_get_zone_info(void *arg) 118097b01d09SBen Walker { 118197b01d09SBen Walker struct spdk_fio_zone_cb_arg *cb_arg = arg; 118297b01d09SBen Walker struct spdk_fio_target *target = cb_arg->target; 118397b01d09SBen Walker int rc; 118497b01d09SBen Walker 118597b01d09SBen Walker rc = spdk_bdev_get_zone_info(target->desc, target->ch, cb_arg->offset_blocks, 118697b01d09SBen Walker cb_arg->nr_zones, cb_arg->spdk_zones, 118797b01d09SBen Walker spdk_fio_bdev_get_zone_info_done, cb_arg); 118897b01d09SBen Walker if (rc < 0) { 118997b01d09SBen Walker cb_arg->completed = rc; 119097b01d09SBen Walker } 119197b01d09SBen Walker } 119297b01d09SBen Walker 119397b01d09SBen Walker static int 119497b01d09SBen Walker spdk_fio_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset, 119597b01d09SBen Walker struct zbd_zone *zones, unsigned int nr_zones) 119697b01d09SBen Walker { 119797b01d09SBen Walker struct spdk_fio_target *target; 119897b01d09SBen Walker struct spdk_fio_thread *fio_thread; 119997b01d09SBen Walker struct spdk_fio_zone_cb_arg cb_arg; 120097b01d09SBen Walker uint32_t block_size; 120197b01d09SBen Walker int rc; 120297b01d09SBen Walker 120397b01d09SBen Walker if (nr_zones == 0) { 120497b01d09SBen Walker return 0; 120597b01d09SBen Walker } 120697b01d09SBen Walker 120797b01d09SBen Walker /* spdk_fio_report_zones() is only called before the bdev I/O channels have been created. 120897b01d09SBen Walker * Since we need an I/O channel for report_zones(), call spdk_fio_init() to initialize 120997b01d09SBen Walker * the thread early. 121097b01d09SBen Walker * spdk_fio_report_zones() might be called several times by fio, if e.g. the zone report 121197b01d09SBen Walker * for all zones does not fit in the buffer that fio has allocated for the zone report. 121297b01d09SBen Walker * It is safe to call spdk_fio_init(), even if the thread has already been initialized. 121397b01d09SBen Walker */ 121497b01d09SBen Walker rc = spdk_fio_init(td); 121597b01d09SBen Walker if (rc) { 121697b01d09SBen Walker return rc; 121797b01d09SBen Walker } 121897b01d09SBen Walker fio_thread = td->io_ops_data; 121997b01d09SBen Walker target = f->engine_data; 122097b01d09SBen Walker 122197b01d09SBen Walker assert(fio_thread); 122297b01d09SBen Walker assert(target); 122397b01d09SBen Walker 122497b01d09SBen Walker block_size = spdk_bdev_get_block_size(target->bdev); 122597b01d09SBen Walker 122697b01d09SBen Walker cb_arg.target = target; 122797b01d09SBen Walker cb_arg.completed = 0; 122897b01d09SBen Walker cb_arg.offset_blocks = offset / block_size; 122997b01d09SBen Walker cb_arg.fio_zones = zones; 123097b01d09SBen Walker cb_arg.nr_zones = spdk_min(nr_zones, spdk_bdev_get_num_zones(target->bdev)); 123197b01d09SBen Walker 123297b01d09SBen Walker cb_arg.spdk_zones = calloc(1, sizeof(*cb_arg.spdk_zones) * cb_arg.nr_zones); 123397b01d09SBen Walker if (!cb_arg.spdk_zones) { 123497b01d09SBen Walker SPDK_ERRLOG("Could not allocate memory for zone report!\n"); 123597b01d09SBen Walker rc = -ENOMEM; 123697b01d09SBen Walker goto cleanup_thread; 123797b01d09SBen Walker } 123897b01d09SBen Walker 123997b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_get_zone_info, &cb_arg); 124097b01d09SBen Walker do { 124197b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 124297b01d09SBen Walker } while (!cb_arg.completed); 124397b01d09SBen Walker 124497b01d09SBen Walker /* Free cb_arg.spdk_zones. The report in fio format is stored in cb_arg.fio_zones/zones. */ 124597b01d09SBen Walker free(cb_arg.spdk_zones); 124697b01d09SBen Walker 124797b01d09SBen Walker rc = cb_arg.completed; 124897b01d09SBen Walker if (rc < 0) { 124997b01d09SBen Walker SPDK_ERRLOG("Failed to get zone info: %d\n", rc); 125097b01d09SBen Walker goto cleanup_thread; 125197b01d09SBen Walker } 125297b01d09SBen Walker 125397b01d09SBen Walker /* Return the amount of zones successfully copied. */ 125497b01d09SBen Walker return rc; 125597b01d09SBen Walker 125697b01d09SBen Walker cleanup_thread: 125797b01d09SBen Walker spdk_fio_cleanup(td); 125897b01d09SBen Walker 125997b01d09SBen Walker return rc; 126097b01d09SBen Walker } 126197b01d09SBen Walker 126297b01d09SBen Walker static void 126397b01d09SBen Walker spdk_fio_bdev_zone_reset_done(struct spdk_bdev_io *bdev_io, bool success, void *arg) 126497b01d09SBen Walker { 126597b01d09SBen Walker struct spdk_fio_zone_cb_arg *cb_arg = arg; 126697b01d09SBen Walker 126797b01d09SBen Walker spdk_bdev_free_io(bdev_io); 126897b01d09SBen Walker 126997b01d09SBen Walker if (!success) { 127097b01d09SBen Walker cb_arg->completed = -EIO; 127197b01d09SBen Walker } else { 127297b01d09SBen Walker cb_arg->completed = 1; 127397b01d09SBen Walker } 127497b01d09SBen Walker } 127597b01d09SBen Walker 127697b01d09SBen Walker static void 127797b01d09SBen Walker spdk_fio_bdev_zone_reset(void *arg) 127897b01d09SBen Walker { 127997b01d09SBen Walker struct spdk_fio_zone_cb_arg *cb_arg = arg; 128097b01d09SBen Walker struct spdk_fio_target *target = cb_arg->target; 128197b01d09SBen Walker int rc; 128297b01d09SBen Walker 128397b01d09SBen Walker rc = spdk_bdev_zone_management(target->desc, target->ch, cb_arg->offset_blocks, 128497b01d09SBen Walker SPDK_BDEV_ZONE_RESET, 128597b01d09SBen Walker spdk_fio_bdev_zone_reset_done, cb_arg); 128697b01d09SBen Walker if (rc < 0) { 128797b01d09SBen Walker cb_arg->completed = rc; 128897b01d09SBen Walker } 128997b01d09SBen Walker } 129097b01d09SBen Walker 129197b01d09SBen Walker static int 129297b01d09SBen Walker spdk_fio_reset_zones(struct spdk_fio_thread *fio_thread, struct spdk_fio_target *target, 129397b01d09SBen Walker uint64_t offset, uint64_t length) 129497b01d09SBen Walker { 129597b01d09SBen Walker uint64_t zone_size_bytes; 129697b01d09SBen Walker uint32_t block_size; 129797b01d09SBen Walker int rc; 129897b01d09SBen Walker 129997b01d09SBen Walker assert(fio_thread); 130097b01d09SBen Walker assert(target); 130197b01d09SBen Walker 130297b01d09SBen Walker block_size = spdk_bdev_get_block_size(target->bdev); 130397b01d09SBen Walker zone_size_bytes = spdk_bdev_get_zone_size(target->bdev) * block_size; 130497b01d09SBen Walker 130597b01d09SBen Walker for (uint64_t cur = offset; cur < offset + length; cur += zone_size_bytes) { 130697b01d09SBen Walker struct spdk_fio_zone_cb_arg cb_arg = { 130797b01d09SBen Walker .target = target, 130897b01d09SBen Walker .completed = 0, 130997b01d09SBen Walker .offset_blocks = cur / block_size, 131097b01d09SBen Walker }; 131197b01d09SBen Walker 131297b01d09SBen Walker spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_zone_reset, &cb_arg); 131397b01d09SBen Walker do { 131497b01d09SBen Walker spdk_fio_poll_thread(fio_thread); 131597b01d09SBen Walker } while (!cb_arg.completed); 131697b01d09SBen Walker 131797b01d09SBen Walker rc = cb_arg.completed; 131897b01d09SBen Walker if (rc < 0) { 131997b01d09SBen Walker SPDK_ERRLOG("Failed to reset zone: %d\n", rc); 132097b01d09SBen Walker return rc; 132197b01d09SBen Walker } 132297b01d09SBen Walker } 132397b01d09SBen Walker 132497b01d09SBen Walker return 0; 132597b01d09SBen Walker } 132697b01d09SBen Walker 132797b01d09SBen Walker static int 132897b01d09SBen Walker spdk_fio_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset, uint64_t length) 132997b01d09SBen Walker { 133097b01d09SBen Walker return spdk_fio_reset_zones(td->io_ops_data, f->engine_data, offset, length); 133197b01d09SBen Walker } 133297b01d09SBen Walker #endif 133397b01d09SBen Walker 133497b01d09SBen Walker #if FIO_IOOPS_VERSION >= 30 133597b01d09SBen Walker static void 133697b01d09SBen Walker spdk_fio_get_max_open_zones_oat(void *_ctx) 133797b01d09SBen Walker { 133897b01d09SBen Walker struct spdk_fio_oat_ctx *ctx = _ctx; 133997b01d09SBen Walker struct fio_file *f = ctx->u.moza.f; 134097b01d09SBen Walker struct spdk_bdev *bdev; 134197b01d09SBen Walker 134297b01d09SBen Walker bdev = spdk_bdev_get_by_name(f->file_name); 134397b01d09SBen Walker if (!bdev) { 134497b01d09SBen Walker SPDK_ERRLOG("Cannot get max open zones, no bdev with name: %s\n", f->file_name); 134597b01d09SBen Walker ctx->ret = -ENODEV; 134697b01d09SBen Walker } else { 134797b01d09SBen Walker *ctx->u.moza.max_open_zones = spdk_bdev_get_max_open_zones(bdev); 134897b01d09SBen Walker ctx->ret = 0; 134997b01d09SBen Walker } 135097b01d09SBen Walker 135197b01d09SBen Walker spdk_fio_wake_oat_waiter(ctx); 135297b01d09SBen Walker } 135397b01d09SBen Walker 135497b01d09SBen Walker static int 135597b01d09SBen Walker spdk_fio_get_max_open_zones(struct thread_data *td, struct fio_file *f, 135697b01d09SBen Walker unsigned int *max_open_zones) 135797b01d09SBen Walker { 135897b01d09SBen Walker struct spdk_fio_oat_ctx ctx = { 0 }; 135997b01d09SBen Walker 136097b01d09SBen Walker ctx.u.moza.f = f; 136197b01d09SBen Walker ctx.u.moza.max_open_zones = max_open_zones; 136297b01d09SBen Walker 136397b01d09SBen Walker spdk_fio_sync_run_oat(spdk_fio_get_max_open_zones_oat, &ctx); 136497b01d09SBen Walker 136597b01d09SBen Walker return ctx.ret; 136697b01d09SBen Walker } 136797b01d09SBen Walker #endif 136897b01d09SBen Walker 136997b01d09SBen Walker static int 137097b01d09SBen Walker spdk_fio_handle_options(struct thread_data *td, struct fio_file *f, struct spdk_bdev *bdev) 137197b01d09SBen Walker { 137297b01d09SBen Walker struct spdk_fio_options *fio_options = td->eo; 137397b01d09SBen Walker 137497b01d09SBen Walker if (fio_options->initial_zone_reset && spdk_bdev_is_zoned(bdev)) { 137597b01d09SBen Walker #if FIO_HAS_ZBD 137697b01d09SBen Walker int rc = spdk_fio_init(td); 137797b01d09SBen Walker if (rc) { 137897b01d09SBen Walker return rc; 137997b01d09SBen Walker } 138097b01d09SBen Walker /* offset used to indicate conventional zones that need to be skipped (reset not allowed) */ 138197b01d09SBen Walker rc = spdk_fio_reset_zones(td->io_ops_data, f->engine_data, td->o.start_offset, 138297b01d09SBen Walker f->real_file_size - td->o.start_offset); 138397b01d09SBen Walker if (rc) { 138497b01d09SBen Walker spdk_fio_cleanup(td); 138597b01d09SBen Walker return rc; 138697b01d09SBen Walker } 138797b01d09SBen Walker #else 138897b01d09SBen Walker SPDK_ERRLOG("fio version is too old to support zoned block devices\n"); 138997b01d09SBen Walker #endif 139097b01d09SBen Walker } 139197b01d09SBen Walker 139297b01d09SBen Walker return 0; 139397b01d09SBen Walker } 139497b01d09SBen Walker 139597b01d09SBen Walker static int 139697b01d09SBen Walker spdk_fio_handle_options_per_target(struct thread_data *td, struct fio_file *f) 139797b01d09SBen Walker { 139897b01d09SBen Walker struct spdk_fio_target *target = f->engine_data; 139997b01d09SBen Walker struct spdk_fio_options *fio_options = td->eo; 140097b01d09SBen Walker 140197b01d09SBen Walker if (fio_options->zone_append && spdk_bdev_is_zoned(target->bdev)) { 140297b01d09SBen Walker if (spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) { 140397b01d09SBen Walker SPDK_DEBUGLOG(fio_bdev, "Using zone appends instead of writes on: '%s'\n", 140497b01d09SBen Walker f->file_name); 140597b01d09SBen Walker target->zone_append_enabled = true; 140697b01d09SBen Walker } else { 140797b01d09SBen Walker SPDK_WARNLOG("Falling back to writes on: '%s' - bdev lacks zone append cmd\n", 140897b01d09SBen Walker f->file_name); 140997b01d09SBen Walker } 141097b01d09SBen Walker } 141197b01d09SBen Walker 141297b01d09SBen Walker return 0; 141397b01d09SBen Walker } 141497b01d09SBen Walker 141597b01d09SBen Walker static struct fio_option options[] = { 141697b01d09SBen Walker { 141797b01d09SBen Walker .name = "spdk_conf", 141897b01d09SBen Walker .lname = "SPDK configuration file", 141997b01d09SBen Walker .type = FIO_OPT_STR_STORE, 142097b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, conf), 142197b01d09SBen Walker .help = "A SPDK JSON configuration file", 142297b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 142397b01d09SBen Walker .group = FIO_OPT_G_INVALID, 142497b01d09SBen Walker }, 142597b01d09SBen Walker { 142697b01d09SBen Walker .name = "spdk_json_conf", 142797b01d09SBen Walker .lname = "SPDK JSON configuration file", 142897b01d09SBen Walker .type = FIO_OPT_STR_STORE, 142997b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, json_conf), 143097b01d09SBen Walker .help = "A SPDK JSON configuration file", 143197b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 143297b01d09SBen Walker .group = FIO_OPT_G_INVALID, 143397b01d09SBen Walker }, 143497b01d09SBen Walker { 143597b01d09SBen Walker .name = "spdk_mem", 143697b01d09SBen Walker .lname = "SPDK memory in MB", 143797b01d09SBen Walker .type = FIO_OPT_INT, 143897b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, mem_mb), 143997b01d09SBen Walker .help = "Amount of memory in MB to allocate for SPDK", 144097b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 144197b01d09SBen Walker .group = FIO_OPT_G_INVALID, 144297b01d09SBen Walker }, 144397b01d09SBen Walker { 144497b01d09SBen Walker .name = "spdk_single_seg", 144597b01d09SBen Walker .lname = "SPDK switch to create just a single hugetlbfs file", 144697b01d09SBen Walker .type = FIO_OPT_BOOL, 144797b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, mem_single_seg), 144897b01d09SBen Walker .help = "If set to 1, SPDK will use just a single hugetlbfs file", 144997b01d09SBen Walker .def = "0", 145097b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 145197b01d09SBen Walker .group = FIO_OPT_G_INVALID, 145297b01d09SBen Walker }, 145397b01d09SBen Walker { 145497b01d09SBen Walker .name = "log_flags", 145597b01d09SBen Walker .lname = "log flags", 145697b01d09SBen Walker .type = FIO_OPT_STR_STORE, 145797b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, log_flags), 145897b01d09SBen Walker .help = "SPDK log flags to enable", 145997b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 146097b01d09SBen Walker .group = FIO_OPT_G_INVALID, 146197b01d09SBen Walker }, 146297b01d09SBen Walker { 146397b01d09SBen Walker .name = "initial_zone_reset", 146497b01d09SBen Walker .lname = "Reset Zones on initialization", 146597b01d09SBen Walker .type = FIO_OPT_INT, 146697b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, initial_zone_reset), 146797b01d09SBen Walker .def = "0", 146897b01d09SBen Walker .help = "Reset Zones on initialization (0=disable, 1=Reset All Zones)", 146997b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 147097b01d09SBen Walker .group = FIO_OPT_G_INVALID, 147197b01d09SBen Walker }, 147297b01d09SBen Walker { 147397b01d09SBen Walker .name = "zone_append", 147497b01d09SBen Walker .lname = "Use zone append instead of write", 147597b01d09SBen Walker .type = FIO_OPT_INT, 147697b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, zone_append), 147797b01d09SBen Walker .def = "0", 147897b01d09SBen Walker .help = "Use zone append instead of write (1=zone append, 0=write)", 147997b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 148097b01d09SBen Walker .group = FIO_OPT_G_INVALID, 148197b01d09SBen Walker }, 148297b01d09SBen Walker { 148397b01d09SBen Walker .name = "env_context", 148497b01d09SBen Walker .lname = "Environment context options", 148597b01d09SBen Walker .type = FIO_OPT_STR_STORE, 148697b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, env_context), 148797b01d09SBen Walker .help = "Opaque context for use of the env implementation", 148897b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 148997b01d09SBen Walker .group = FIO_OPT_G_INVALID, 149097b01d09SBen Walker }, 149197b01d09SBen Walker { 149297b01d09SBen Walker .name = "spdk_rpc_listen_addr", 149397b01d09SBen Walker .lname = "SPDK RPC listen address", 149497b01d09SBen Walker .type = FIO_OPT_STR_STORE, 149597b01d09SBen Walker .off1 = offsetof(struct spdk_fio_options, rpc_listen_addr), 149697b01d09SBen Walker .help = "The address to listen the RPC operations", 149797b01d09SBen Walker .category = FIO_OPT_C_ENGINE, 149897b01d09SBen Walker .group = FIO_OPT_G_INVALID, 149997b01d09SBen Walker }, 150097b01d09SBen Walker { 150197b01d09SBen Walker .name = NULL, 150297b01d09SBen Walker }, 150397b01d09SBen Walker }; 150497b01d09SBen Walker 150597b01d09SBen Walker /* FIO imports this structure using dlsym */ 150697b01d09SBen Walker struct ioengine_ops ioengine = { 150797b01d09SBen Walker .name = "spdk_bdev", 150897b01d09SBen Walker .version = FIO_IOOPS_VERSION, 150997b01d09SBen Walker .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO, 151097b01d09SBen Walker .setup = spdk_fio_setup, 151197b01d09SBen Walker .init = spdk_fio_init, 151297b01d09SBen Walker /* .prep = unused, */ 151397b01d09SBen Walker .queue = spdk_fio_queue, 151497b01d09SBen Walker /* .commit = unused, */ 151597b01d09SBen Walker .getevents = spdk_fio_getevents, 151697b01d09SBen Walker .event = spdk_fio_event, 151797b01d09SBen Walker /* .errdetails = unused, */ 151897b01d09SBen Walker /* .cancel = unused, */ 151997b01d09SBen Walker .cleanup = spdk_fio_cleanup, 152097b01d09SBen Walker .open_file = spdk_fio_open, 152197b01d09SBen Walker .close_file = spdk_fio_close, 152297b01d09SBen Walker .invalidate = spdk_fio_invalidate, 152397b01d09SBen Walker /* .unlink_file = unused, */ 152497b01d09SBen Walker /* .get_file_size = unused, */ 152597b01d09SBen Walker /* .terminate = unused, */ 152697b01d09SBen Walker .iomem_alloc = spdk_fio_iomem_alloc, 152797b01d09SBen Walker .iomem_free = spdk_fio_iomem_free, 152897b01d09SBen Walker .io_u_init = spdk_fio_io_u_init, 152997b01d09SBen Walker .io_u_free = spdk_fio_io_u_free, 153097b01d09SBen Walker #if FIO_HAS_ZBD 153197b01d09SBen Walker .get_zoned_model = spdk_fio_get_zoned_model, 153297b01d09SBen Walker .report_zones = spdk_fio_report_zones, 153397b01d09SBen Walker .reset_wp = spdk_fio_reset_wp, 153497b01d09SBen Walker #endif 153597b01d09SBen Walker #if FIO_IOOPS_VERSION >= 30 153697b01d09SBen Walker .get_max_open_zones = spdk_fio_get_max_open_zones, 153797b01d09SBen Walker #endif 153897b01d09SBen Walker .option_struct_size = sizeof(struct spdk_fio_options), 153997b01d09SBen Walker .options = options, 154097b01d09SBen Walker }; 154197b01d09SBen Walker 154297b01d09SBen Walker static void fio_init 154397b01d09SBen Walker spdk_fio_register(void) 154497b01d09SBen Walker { 154597b01d09SBen Walker register_ioengine(&ioengine); 154697b01d09SBen Walker } 154797b01d09SBen Walker 154897b01d09SBen Walker static void 154997b01d09SBen Walker spdk_fio_finish_env(void) 155097b01d09SBen Walker { 155197b01d09SBen Walker pthread_mutex_lock(&g_init_mtx); 155297b01d09SBen Walker g_poll_loop = false; 155397b01d09SBen Walker pthread_cond_signal(&g_init_cond); 155497b01d09SBen Walker pthread_mutex_unlock(&g_init_mtx); 155597b01d09SBen Walker pthread_join(g_init_thread_id, NULL); 155697b01d09SBen Walker 155797b01d09SBen Walker spdk_thread_lib_fini(); 155897b01d09SBen Walker spdk_env_fini(); 155997b01d09SBen Walker } 156097b01d09SBen Walker 156197b01d09SBen Walker static void fio_exit 156297b01d09SBen Walker spdk_fio_unregister(void) 156397b01d09SBen Walker { 156497b01d09SBen Walker if (g_spdk_env_initialized) { 156597b01d09SBen Walker spdk_fio_finish_env(); 156697b01d09SBen Walker g_spdk_env_initialized = false; 156797b01d09SBen Walker } 156897b01d09SBen Walker unregister_ioengine(&ioengine); 156997b01d09SBen Walker } 157097b01d09SBen Walker 157197b01d09SBen Walker SPDK_LOG_REGISTER_COMPONENT(fio_bdev) 1572