1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/nvme.h" 9 #include "spdk/nvme_zns.h" 10 #include "spdk/vmd.h" 11 #include "spdk/env.h" 12 #include "spdk/string.h" 13 #include "spdk/log.h" 14 #include "spdk/likely.h" 15 #include "spdk/endian.h" 16 #include "spdk/dif.h" 17 #include "spdk/util.h" 18 #include "spdk/trace.h" 19 20 #include "config-host.h" 21 #include "fio.h" 22 #include "optgroup.h" 23 24 #ifdef for_each_rw_ddir 25 #define FIO_HAS_ZBD (FIO_IOOPS_VERSION >= 26) 26 #define FIO_HAS_FDP (FIO_IOOPS_VERSION >= 32) 27 #define FIO_HAS_MRT (FIO_IOOPS_VERSION >= 34) 28 #else 29 #define FIO_HAS_ZBD (0) 30 #define FIO_HAS_FDP (0) 31 #define FIO_HAS_MRT (0) 32 #endif 33 34 /* FreeBSD is missing CLOCK_MONOTONIC_RAW, 35 * so alternative is provided. */ 36 #ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ 37 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC 38 #endif 39 40 #define NVME_IO_ALIGN 4096 41 42 static bool g_spdk_env_initialized; 43 static bool g_log_flag_error; 44 static int g_spdk_enable_sgl = 0; 45 static uint32_t g_spdk_sge_size = 4096; 46 static uint32_t g_spdk_bit_bucket_data_len = 0; 47 static uint32_t g_spdk_pract_flag; 48 static uint32_t g_spdk_prchk_flags; 49 static uint32_t g_spdk_md_per_io_size = 4096; 50 static uint16_t g_spdk_apptag; 51 static uint16_t g_spdk_apptag_mask; 52 53 struct spdk_fio_options { 54 void *pad; /* off1 used in option descriptions may not be 0 */ 55 int enable_wrr; 56 int arbitration_burst; 57 int low_weight; 58 int medium_weight; 59 int high_weight; 60 int wrr_priority; 61 int mem_size; 62 int shm_id; 63 int enable_sgl; 64 int sge_size; 65 int bit_bucket_data_len; 66 char *hostnqn; 67 int pi_act; 68 char *pi_chk; 69 int md_per_io_size; 70 int apptag; 71 int apptag_mask; 72 char *digest_enable; 73 int enable_vmd; 74 int initial_zone_reset; 75 int zone_append; 76 int print_qid_mappings; 77 int spdk_tracing; 78 char *log_flags; 79 }; 80 81 struct spdk_fio_request { 82 struct io_u *io; 83 /** Offset in current iovec, fio only uses 1 vector */ 84 uint32_t iov_offset; 85 86 /** Amount of data used for Bit Bucket SGL */ 87 uint32_t bit_bucket_data_len; 88 89 /** Context for NVMe PI */ 90 struct spdk_dif_ctx dif_ctx; 91 /** Separate metadata buffer pointer */ 92 void *md_buf; 93 94 /** Dataset management range information */ 95 struct spdk_nvme_dsm_range *dsm_range; 96 97 struct spdk_fio_thread *fio_thread; 98 struct spdk_fio_qpair *fio_qpair; 99 }; 100 101 struct spdk_fio_ctrlr { 102 struct spdk_nvme_transport_id tr_id; 103 struct spdk_nvme_ctrlr_opts opts; 104 struct spdk_nvme_ctrlr *ctrlr; 105 TAILQ_ENTRY(spdk_fio_ctrlr) link; 106 }; 107 108 static TAILQ_HEAD(, spdk_fio_ctrlr) g_ctrlrs = TAILQ_HEAD_INITIALIZER(g_ctrlrs); 109 static int g_td_count; 110 static pthread_t g_ctrlr_thread_id = 0; 111 static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; 112 static bool g_error; 113 114 struct spdk_fio_qpair { 115 struct fio_file *f; 116 struct spdk_nvme_qpair *qpair; 117 struct spdk_nvme_ns *ns; 118 uint32_t io_flags; 119 bool zone_append_enabled; 120 bool nvme_pi_enabled; 121 /* True for DIF and false for DIX, and this is valid only if nvme_pi_enabled is true. */ 122 bool extended_lba; 123 /* True for protection info transferred at start of metadata, 124 * false for protection info transferred at end of metadata, and 125 * this is valid only if nvme_pi_enabled is true. 126 */ 127 bool md_start; 128 TAILQ_ENTRY(spdk_fio_qpair) link; 129 struct spdk_fio_ctrlr *fio_ctrlr; 130 }; 131 132 struct spdk_fio_thread { 133 struct thread_data *td; 134 135 TAILQ_HEAD(, spdk_fio_qpair) fio_qpair; 136 struct spdk_fio_qpair *fio_qpair_current; /* the current fio_qpair to be handled. */ 137 138 struct io_u **iocq; /* io completion queue */ 139 unsigned int iocq_count; /* number of iocq entries filled by last getevents */ 140 unsigned int iocq_size; /* number of iocq entries allocated */ 141 142 }; 143 144 struct spdk_fio_probe_ctx { 145 struct thread_data *td; 146 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 147 struct fio_file *f; /* fio_file given by user */ 148 }; 149 150 static void * 151 spdk_fio_poll_ctrlrs(void *arg) 152 { 153 struct spdk_fio_ctrlr *fio_ctrlr; 154 int oldstate; 155 int rc; 156 157 /* Loop until the thread is cancelled */ 158 while (true) { 159 rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); 160 if (rc != 0) { 161 SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", 162 rc, spdk_strerror(rc)); 163 } 164 165 pthread_mutex_lock(&g_mutex); 166 167 TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) { 168 spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); 169 } 170 171 pthread_mutex_unlock(&g_mutex); 172 173 rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); 174 if (rc != 0) { 175 SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", 176 rc, spdk_strerror(rc)); 177 } 178 179 /* This is a pthread cancellation point and cannot be removed. */ 180 sleep(1); 181 } 182 183 return NULL; 184 } 185 186 static bool 187 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 188 struct spdk_nvme_ctrlr_opts *opts) 189 { 190 struct spdk_fio_probe_ctx *ctx = cb_ctx; 191 struct thread_data *td = ctx->td; 192 struct spdk_fio_options *fio_options = td->eo; 193 194 if (ctx->hostnqn[0] != '\0') { 195 memcpy(opts->hostnqn, ctx->hostnqn, sizeof(opts->hostnqn)); 196 } else if (fio_options->hostnqn) { 197 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); 198 } 199 200 if (fio_options->enable_wrr) { 201 opts->arb_mechanism = SPDK_NVME_CC_AMS_WRR; 202 opts->arbitration_burst = fio_options->arbitration_burst; 203 opts->low_priority_weight = fio_options->low_weight; 204 opts->medium_priority_weight = fio_options->medium_weight; 205 opts->high_priority_weight = fio_options->high_weight; 206 } 207 208 if (fio_options->digest_enable) { 209 if (strcasecmp(fio_options->digest_enable, "HEADER") == 0) { 210 opts->header_digest = true; 211 } else if (strcasecmp(fio_options->digest_enable, "DATA") == 0) { 212 opts->data_digest = true; 213 } else if (strcasecmp(fio_options->digest_enable, "BOTH") == 0) { 214 opts->header_digest = true; 215 opts->data_digest = true; 216 } 217 } 218 219 return true; 220 } 221 222 static struct spdk_fio_ctrlr * 223 get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) 224 { 225 struct spdk_fio_ctrlr *fio_ctrlr; 226 227 TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) { 228 if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { 229 return fio_ctrlr; 230 } 231 } 232 233 return NULL; 234 } 235 236 /** 237 * Returns the fio_qpair matching the given fio_file and has an associated ns 238 */ 239 static struct spdk_fio_qpair * 240 get_fio_qpair(struct spdk_fio_thread *fio_thread, struct fio_file *f) 241 { 242 struct spdk_fio_qpair *fio_qpair; 243 244 TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) { 245 if ((fio_qpair->f == f) && fio_qpair->ns) { 246 return fio_qpair; 247 } 248 } 249 250 return NULL; 251 } 252 253 #if FIO_HAS_ZBD 254 /** 255 * Callback function to use while processing completions until completion-indicator turns non-zero 256 */ 257 static void 258 pcu_cb(void *ctx, const struct spdk_nvme_cpl *cpl) 259 { 260 int *completed = ctx; 261 262 *completed = spdk_nvme_cpl_is_error(cpl) ? -1 : 1; 263 } 264 265 /** 266 * Process Completions Until the given 'completed' indicator turns non-zero or an error occurs 267 */ 268 static int32_t 269 pcu(struct spdk_nvme_qpair *qpair, int *completed) 270 { 271 int32_t ret; 272 273 while (!*completed) { 274 ret = spdk_nvme_qpair_process_completions(qpair, 1); 275 if (ret < 0) { 276 log_err("spdk/nvme: process_compl(): ret: %d\n", ret); 277 return ret; 278 } 279 } 280 281 return 0; 282 } 283 #endif 284 285 static inline uint32_t 286 _nvme_get_host_buffer_sector_size(struct spdk_nvme_ns *ns, uint32_t io_flags) 287 { 288 bool md_excluded_from_xfer = false; 289 uint32_t md_size; 290 uint32_t ns_flags; 291 292 ns_flags = spdk_nvme_ns_get_flags(ns); 293 md_size = spdk_nvme_ns_get_md_size(ns); 294 295 /* For extended LBA format, if the metadata size is 8 bytes and PRACT is 296 * enabled(controller inserts/strips PI), we should reduce metadata size 297 * from block size. 298 */ 299 md_excluded_from_xfer = ((io_flags & SPDK_NVME_IO_FLAGS_PRACT) && 300 (ns_flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) && 301 (ns_flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) && 302 (md_size == 8)); 303 304 return md_excluded_from_xfer ? spdk_nvme_ns_get_sector_size(ns) : 305 spdk_nvme_ns_get_extended_sector_size(ns); 306 } 307 308 static void 309 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 310 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 311 { 312 struct spdk_fio_probe_ctx *ctx = cb_ctx; 313 struct thread_data *td = ctx->td; 314 struct spdk_fio_thread *fio_thread = td->io_ops_data; 315 struct spdk_fio_ctrlr *fio_ctrlr; 316 struct spdk_fio_qpair *fio_qpair; 317 struct spdk_nvme_ns *ns; 318 const struct spdk_nvme_ns_data *nsdata; 319 struct fio_file *f = ctx->f; 320 uint32_t ns_id; 321 char *p; 322 long int tmp; 323 uint32_t block_size; 324 struct spdk_fio_options *fio_options = td->eo; 325 326 p = strstr(f->file_name, "ns="); 327 if (p != NULL) { 328 tmp = spdk_strtol(p + 3, 10); 329 if (tmp <= 0) { 330 SPDK_ERRLOG("namespace id should be >=1, but was invalid: %ld\n", tmp); 331 g_error = true; 332 return; 333 } 334 ns_id = (uint32_t)tmp; 335 } else { 336 ns_id = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 337 if (ns_id == 0) { 338 /* The ctrlr has no active namespaces and we didn't specify any so nothing to do. */ 339 return; 340 } 341 } 342 343 pthread_mutex_lock(&g_mutex); 344 fio_ctrlr = get_fio_ctrlr(trid); 345 /* it is a new ctrlr and needs to be added */ 346 if (!fio_ctrlr) { 347 /* Create an fio_ctrlr and add it to the list */ 348 fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); 349 if (!fio_ctrlr) { 350 SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); 351 g_error = true; 352 pthread_mutex_unlock(&g_mutex); 353 return; 354 } 355 fio_ctrlr->opts = *opts; 356 fio_ctrlr->ctrlr = ctrlr; 357 fio_ctrlr->tr_id = *trid; 358 TAILQ_INSERT_TAIL(&g_ctrlrs, fio_ctrlr, link); 359 } 360 pthread_mutex_unlock(&g_mutex); 361 362 ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); 363 if (ns == NULL) { 364 SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); 365 g_error = true; 366 return; 367 } 368 369 if (!spdk_nvme_ns_is_active(ns)) { 370 SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); 371 g_error = true; 372 return; 373 } 374 nsdata = spdk_nvme_ns_get_data(ns); 375 376 TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) { 377 if ((fio_qpair->f == f) || 378 ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && 379 (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { 380 /* Not the error case. Avoid duplicated connection */ 381 return; 382 } 383 } 384 385 /* create a new qpair */ 386 fio_qpair = calloc(1, sizeof(*fio_qpair)); 387 if (!fio_qpair) { 388 g_error = true; 389 SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); 390 return; 391 } 392 393 f->engine_data = fio_qpair; 394 fio_qpair->ns = ns; 395 fio_qpair->f = f; 396 fio_qpair->fio_ctrlr = fio_ctrlr; 397 TAILQ_INSERT_TAIL(&fio_thread->fio_qpair, fio_qpair, link); 398 399 if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { 400 assert(spdk_nvme_ns_get_pi_type(ns) != SPDK_NVME_FMT_NVM_PROTECTION_DISABLE); 401 fio_qpair->io_flags = g_spdk_pract_flag | g_spdk_prchk_flags; 402 fio_qpair->nvme_pi_enabled = true; 403 fio_qpair->md_start = nsdata->dps.md_start; 404 fio_qpair->extended_lba = spdk_nvme_ns_supports_extended_lba(ns); 405 fprintf(stdout, "PI type%u enabled with %s\n", spdk_nvme_ns_get_pi_type(ns), 406 fio_qpair->extended_lba ? "extended lba" : "separate metadata"); 407 } 408 409 block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags); 410 for_each_rw_ddir(ddir) { 411 if (td->o.min_bs[ddir] % block_size != 0 || td->o.max_bs[ddir] % block_size != 0) { 412 if (spdk_nvme_ns_supports_extended_lba(ns)) { 413 SPDK_ERRLOG("--bs or other block size related option has to be a multiple of (LBA data size + Metadata size)\n"); 414 } else { 415 SPDK_ERRLOG("--bs or other block size related option has to be a multiple of LBA data size\n"); 416 } 417 g_error = true; 418 return; 419 } 420 } 421 422 if (fio_options->zone_append && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) { 423 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED) { 424 SPDK_DEBUGLOG(fio_nvme, "Using zone appends instead of writes on: '%s'\n", 425 f->file_name); 426 fio_qpair->zone_append_enabled = true; 427 } else { 428 SPDK_WARNLOG("Falling back to writes on: '%s' - ns lacks zone append cmd\n", 429 f->file_name); 430 } 431 } 432 433 #if FIO_HAS_ZBD 434 if (td_trim(td) && td->o.zone_mode == ZONE_MODE_ZBD) { 435 td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM; 436 } 437 #endif 438 439 if (fio_options->initial_zone_reset == 1 && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) { 440 #if FIO_HAS_ZBD 441 struct spdk_nvme_qpair *tmp_qpair; 442 int completed = 0, err; 443 444 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 445 * Create a temporary qpair in order to perform the initial zone reset. 446 */ 447 assert(!fio_qpair->qpair); 448 449 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); 450 if (!tmp_qpair) { 451 SPDK_ERRLOG("Cannot allocate a temporary qpair\n"); 452 g_error = true; 453 return; 454 } 455 456 err = spdk_nvme_zns_reset_zone(ns, tmp_qpair, 0x0, true, pcu_cb, &completed); 457 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 458 log_err("spdk/nvme: warn: initial_zone_reset: err: %d, cpl: %d\n", 459 err, completed); 460 } 461 462 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 463 #else 464 log_err("spdk/nvme: ZBD/ZNS is not supported\n"); 465 #endif 466 } 467 468 f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); 469 if (f->real_file_size <= 0) { 470 g_error = true; 471 SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); 472 return; 473 } 474 475 f->filetype = FIO_TYPE_BLOCK; 476 fio_file_set_size_known(f); 477 } 478 479 static void 480 parse_prchk_flags(const char *prchk_str) 481 { 482 if (!prchk_str) { 483 return; 484 } 485 486 if (strstr(prchk_str, "GUARD") != NULL) { 487 g_spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; 488 } 489 if (strstr(prchk_str, "REFTAG") != NULL) { 490 g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; 491 } 492 if (strstr(prchk_str, "APPTAG") != NULL) { 493 g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; 494 } 495 } 496 497 static void 498 parse_pract_flag(int pract) 499 { 500 if (pract == 1) { 501 g_spdk_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; 502 } else { 503 g_spdk_pract_flag = 0; 504 } 505 } 506 507 static bool 508 fio_redirected_to_dev_null(void) 509 { 510 char path[PATH_MAX] = ""; 511 ssize_t ret; 512 513 ret = readlink("/proc/self/fd/1", path, sizeof(path)); 514 515 if (ret == -1 || strcmp(path, "/dev/null") != 0) { 516 return false; 517 } 518 519 ret = readlink("/proc/self/fd/2", path, sizeof(path)); 520 521 if (ret == -1 || strcmp(path, "/dev/null") != 0) { 522 return false; 523 } 524 525 return true; 526 } 527 528 static int 529 spdk_fio_init(struct thread_data *td) 530 { 531 int ret = 0; 532 struct spdk_fio_options *fio_options = td->eo; 533 534 if (fio_options->spdk_tracing) { 535 ret = spdk_trace_register_user_thread(); 536 } 537 538 return ret; 539 } 540 541 /* Called once at initialization. This is responsible for gathering the size of 542 * each "file", which in our case are in the form 543 * 'key=value [key=value] ... ns=value' 544 * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' 545 * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ 546 static int 547 spdk_fio_setup(struct thread_data *td) 548 { 549 struct spdk_fio_thread *fio_thread; 550 struct spdk_fio_options *fio_options = td->eo; 551 struct spdk_fio_probe_ctx ctx; 552 struct spdk_env_opts opts; 553 struct fio_file *f; 554 char *p; 555 int rc = 0; 556 struct spdk_nvme_transport_id trid; 557 struct spdk_fio_ctrlr *fio_ctrlr; 558 char *trid_info; 559 unsigned int i; 560 size_t size; 561 562 /* 563 * If we're running in a daemonized FIO instance, it's possible 564 * fd 1/2 were re-used for something important by FIO. Newer fio 565 * versions are careful to redirect those to /dev/null, but if we're 566 * not, we'll abort early, so we don't accidentally write messages to 567 * an important file, etc. 568 */ 569 if (is_backend && !fio_redirected_to_dev_null()) { 570 char buf[1024]; 571 snprintf(buf, sizeof(buf), 572 "SPDK FIO plugin is in daemon mode, but stdout/stderr " 573 "aren't redirected to /dev/null. Aborting."); 574 fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf)); 575 return -1; 576 } 577 578 if (!td->o.use_thread) { 579 log_err("spdk: must set thread=1 when using spdk plugin\n"); 580 return 1; 581 } 582 583 if (g_log_flag_error) { 584 /* The first thread found an error when parsing log flags, so 585 * just return error immediately for all of the other threads. 586 */ 587 return 1; 588 } 589 590 pthread_mutex_lock(&g_mutex); 591 592 fio_thread = calloc(1, sizeof(*fio_thread)); 593 assert(fio_thread != NULL); 594 595 td->io_ops_data = fio_thread; 596 fio_thread->td = td; 597 598 fio_thread->iocq_size = td->o.iodepth; 599 fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); 600 assert(fio_thread->iocq != NULL); 601 602 TAILQ_INIT(&fio_thread->fio_qpair); 603 604 if (!g_spdk_env_initialized) { 605 spdk_env_opts_init(&opts); 606 opts.name = "fio"; 607 opts.mem_size = fio_options->mem_size; 608 opts.shm_id = fio_options->shm_id; 609 g_spdk_enable_sgl = fio_options->enable_sgl; 610 g_spdk_sge_size = fio_options->sge_size; 611 g_spdk_bit_bucket_data_len = fio_options->bit_bucket_data_len; 612 parse_pract_flag(fio_options->pi_act); 613 g_spdk_md_per_io_size = spdk_max(fio_options->md_per_io_size, 4096); 614 g_spdk_apptag = (uint16_t)fio_options->apptag; 615 g_spdk_apptag_mask = (uint16_t)fio_options->apptag_mask; 616 parse_prchk_flags(fio_options->pi_chk); 617 if (spdk_env_init(&opts) < 0) { 618 SPDK_ERRLOG("Unable to initialize SPDK env\n"); 619 free(fio_thread->iocq); 620 free(fio_thread); 621 fio_thread = NULL; 622 pthread_mutex_unlock(&g_mutex); 623 return 1; 624 } 625 626 if (fio_options->log_flags) { 627 char *tok = strtok(fio_options->log_flags, ","); 628 do { 629 rc = spdk_log_set_flag(tok); 630 if (rc < 0) { 631 SPDK_ERRLOG("unknown log flag %s\n", tok); 632 g_log_flag_error = true; 633 return 1; 634 } 635 } while ((tok = strtok(NULL, ",")) != NULL); 636 #ifdef DEBUG 637 spdk_log_set_print_level(SPDK_LOG_DEBUG); 638 #endif 639 } 640 641 g_spdk_env_initialized = true; 642 spdk_unaffinitize_thread(); 643 644 if (fio_options->spdk_tracing) { 645 spdk_trace_init("spdk_fio_tracepoints", 65536, td->o.numjobs); 646 spdk_trace_enable_tpoint_group("nvme_pcie"); 647 spdk_trace_enable_tpoint_group("nvme_tcp"); 648 } 649 650 /* Spawn a thread to continue polling the controllers */ 651 rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); 652 if (rc != 0) { 653 SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); 654 } 655 656 if (fio_options->enable_vmd && spdk_vmd_init()) { 657 SPDK_ERRLOG("Failed to initialize VMD. Some NVMe devices can be unavailable.\n"); 658 } 659 } 660 pthread_mutex_unlock(&g_mutex); 661 662 for_each_file(td, f, i) { 663 memset(&trid, 0, sizeof(trid)); 664 memset(&ctx, 0, sizeof(ctx)); 665 666 trid.trtype = SPDK_NVME_TRANSPORT_PCIE; 667 668 p = strstr(f->file_name, " ns="); 669 if (p != NULL) { 670 trid_info = strndup(f->file_name, p - f->file_name); 671 } else { 672 trid_info = strndup(f->file_name, strlen(f->file_name)); 673 } 674 675 if (!trid_info) { 676 SPDK_ERRLOG("Failed to allocate space for trid_info\n"); 677 continue; 678 } 679 680 rc = spdk_nvme_transport_id_parse(&trid, trid_info); 681 if (rc < 0) { 682 SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); 683 free(trid_info); 684 continue; 685 } 686 free(trid_info); 687 688 if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 689 struct spdk_pci_addr pci_addr; 690 if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { 691 SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); 692 continue; 693 } 694 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); 695 } else { 696 if (trid.subnqn[0] == '\0') { 697 snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", 698 SPDK_NVMF_DISCOVERY_NQN); 699 } 700 if ((p = strcasestr(f->file_name, "hostnqn:")) || 701 (p = strcasestr(f->file_name, "hostnqn="))) { 702 p += strlen("hostnqn:"); 703 size = strcspn(p, " \t\n"); 704 if (size > sizeof(ctx.hostnqn)) { 705 SPDK_ERRLOG("Invalid hostnqn: too long\n"); 706 continue; 707 } 708 memcpy(ctx.hostnqn, p, size); 709 } 710 } 711 712 ctx.td = td; 713 ctx.f = f; 714 715 pthread_mutex_lock(&g_mutex); 716 fio_ctrlr = get_fio_ctrlr(&trid); 717 pthread_mutex_unlock(&g_mutex); 718 if (fio_ctrlr) { 719 attach_cb(&ctx, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); 720 } else { 721 /* Enumerate all of the controllers */ 722 if (spdk_nvme_probe(&trid, &ctx, probe_cb, attach_cb, NULL) != 0) { 723 SPDK_ERRLOG("spdk_nvme_probe() failed\n"); 724 continue; 725 } 726 } 727 728 if (g_error) { 729 log_err("Failed to initialize spdk fio plugin\n"); 730 rc = 1; 731 break; 732 } 733 } 734 735 pthread_mutex_lock(&g_mutex); 736 g_td_count++; 737 pthread_mutex_unlock(&g_mutex); 738 739 return rc; 740 } 741 742 static int 743 spdk_fio_open(struct thread_data *td, struct fio_file *f) 744 { 745 struct spdk_fio_qpair *fio_qpair = f->engine_data; 746 struct spdk_fio_ctrlr *fio_ctrlr = fio_qpair->fio_ctrlr; 747 struct spdk_fio_options *fio_options = td->eo; 748 struct spdk_nvme_io_qpair_opts qpopts; 749 750 assert(fio_qpair->qpair == NULL); 751 spdk_nvme_ctrlr_get_default_io_qpair_opts(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); 752 qpopts.delay_cmd_submit = true; 753 if (fio_options->enable_wrr) { 754 qpopts.qprio = fio_options->wrr_priority; 755 } 756 757 fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); 758 if (!fio_qpair->qpair) { 759 SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); 760 g_error = true; 761 free(fio_qpair); 762 return -1; 763 } 764 765 if (fio_options->print_qid_mappings == 1) { 766 log_info("job %s: %s qid %d\n", td->o.name, f->file_name, 767 spdk_nvme_qpair_get_id(fio_qpair->qpair)); 768 } 769 770 return 0; 771 } 772 773 static int 774 spdk_fio_close(struct thread_data *td, struct fio_file *f) 775 { 776 struct spdk_fio_qpair *fio_qpair = f->engine_data; 777 778 assert(fio_qpair->qpair != NULL); 779 spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); 780 fio_qpair->qpair = NULL; 781 return 0; 782 } 783 784 static int 785 spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) 786 { 787 td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL); 788 return td->orig_buffer == NULL; 789 } 790 791 static void 792 spdk_fio_iomem_free(struct thread_data *td) 793 { 794 spdk_dma_free(td->orig_buffer); 795 } 796 797 static int 798 spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) 799 { 800 struct spdk_fio_thread *fio_thread = td->io_ops_data; 801 struct spdk_fio_request *fio_req; 802 uint32_t dsm_size; 803 804 io_u->engine_data = NULL; 805 806 fio_req = calloc(1, sizeof(*fio_req)); 807 if (fio_req == NULL) { 808 return 1; 809 } 810 811 if (!(td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM)) { 812 #if FIO_HAS_MRT 813 /* By default number of range is set to 1 */ 814 dsm_size = td->o.num_range * sizeof(struct spdk_nvme_dsm_range); 815 #else 816 dsm_size = sizeof(struct spdk_nvme_dsm_range); 817 #endif 818 fio_req->dsm_range = calloc(1, dsm_size); 819 if (fio_req->dsm_range == NULL) { 820 free(fio_req); 821 return 1; 822 } 823 } 824 825 fio_req->md_buf = spdk_dma_zmalloc(g_spdk_md_per_io_size, NVME_IO_ALIGN, NULL); 826 if (fio_req->md_buf == NULL) { 827 fprintf(stderr, "Allocate %u metadata failed\n", g_spdk_md_per_io_size); 828 free(fio_req->dsm_range); 829 free(fio_req); 830 return 1; 831 } 832 833 fio_req->io = io_u; 834 fio_req->fio_thread = fio_thread; 835 836 io_u->engine_data = fio_req; 837 838 return 0; 839 } 840 841 static void 842 spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) 843 { 844 struct spdk_fio_request *fio_req = io_u->engine_data; 845 846 if (fio_req) { 847 assert(fio_req->io == io_u); 848 spdk_dma_free(fio_req->md_buf); 849 free(fio_req->dsm_range); 850 free(fio_req); 851 io_u->engine_data = NULL; 852 } 853 } 854 855 static inline uint64_t 856 fio_offset_to_zslba(unsigned long long offset, struct spdk_nvme_ns *ns) 857 { 858 return (offset / spdk_nvme_zns_ns_get_zone_size(ns)) * spdk_nvme_zns_ns_get_zone_size_sectors(ns); 859 } 860 861 static int 862 fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 863 { 864 struct spdk_nvme_ns *ns = fio_qpair->ns; 865 struct spdk_fio_request *fio_req = io_u->engine_data; 866 uint32_t md_size, extended_lba_size, lba_count; 867 uint64_t lba; 868 struct iovec iov; 869 int rc; 870 struct spdk_dif_ctx_init_ext_opts dif_opts; 871 872 /* Set appmask and apptag when PRACT is enabled */ 873 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 874 fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; 875 fio_req->dif_ctx.app_tag = g_spdk_apptag; 876 return 0; 877 } 878 879 extended_lba_size = spdk_nvme_ns_get_extended_sector_size(ns); 880 md_size = spdk_nvme_ns_get_md_size(ns); 881 lba = io_u->offset / extended_lba_size; 882 lba_count = io_u->xfer_buflen / extended_lba_size; 883 884 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 885 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 886 rc = spdk_dif_ctx_init(&fio_req->dif_ctx, extended_lba_size, md_size, 887 true, fio_qpair->md_start, 888 (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), 889 fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 890 0, 0, &dif_opts); 891 if (rc != 0) { 892 fprintf(stderr, "Initialization of DIF context failed\n"); 893 return rc; 894 } 895 896 if (io_u->ddir != DDIR_WRITE) { 897 return 0; 898 } 899 900 iov.iov_base = io_u->buf; 901 iov.iov_len = io_u->xfer_buflen; 902 rc = spdk_dif_generate(&iov, 1, lba_count, &fio_req->dif_ctx); 903 if (rc != 0) { 904 fprintf(stderr, "Generation of DIF failed\n"); 905 } 906 907 return rc; 908 } 909 910 static int 911 fio_separate_md_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 912 { 913 struct spdk_nvme_ns *ns = fio_qpair->ns; 914 struct spdk_fio_request *fio_req = io_u->engine_data; 915 uint32_t md_size, block_size, lba_count; 916 uint64_t lba; 917 struct iovec iov, md_iov; 918 int rc; 919 struct spdk_dif_ctx_init_ext_opts dif_opts; 920 921 /* Set appmask and apptag when PRACT is enabled */ 922 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 923 fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; 924 fio_req->dif_ctx.app_tag = g_spdk_apptag; 925 return 0; 926 } 927 928 block_size = spdk_nvme_ns_get_sector_size(ns); 929 md_size = spdk_nvme_ns_get_md_size(ns); 930 lba = io_u->offset / block_size; 931 lba_count = io_u->xfer_buflen / block_size; 932 933 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 934 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 935 rc = spdk_dif_ctx_init(&fio_req->dif_ctx, block_size, md_size, 936 false, fio_qpair->md_start, 937 (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), 938 fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 939 0, 0, &dif_opts); 940 if (rc != 0) { 941 fprintf(stderr, "Initialization of DIF context failed\n"); 942 return rc; 943 } 944 945 if (io_u->ddir != DDIR_WRITE) { 946 return 0; 947 } 948 949 iov.iov_base = io_u->buf; 950 iov.iov_len = io_u->xfer_buflen; 951 md_iov.iov_base = fio_req->md_buf; 952 md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); 953 rc = spdk_dix_generate(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx); 954 if (rc < 0) { 955 fprintf(stderr, "Generation of DIX failed\n"); 956 } 957 958 return rc; 959 } 960 961 static int 962 fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 963 { 964 struct spdk_nvme_ns *ns = fio_qpair->ns; 965 struct spdk_fio_request *fio_req = io_u->engine_data; 966 uint32_t lba_count; 967 struct iovec iov; 968 struct spdk_dif_error err_blk = {}; 969 int rc; 970 971 /* Do nothing when PRACT is enabled */ 972 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 973 return 0; 974 } 975 976 iov.iov_base = io_u->buf; 977 iov.iov_len = io_u->xfer_buflen; 978 lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_extended_sector_size(ns); 979 980 rc = spdk_dif_verify(&iov, 1, lba_count, &fio_req->dif_ctx, &err_blk); 981 if (rc != 0) { 982 fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n", 983 err_blk.err_type, err_blk.err_offset); 984 } 985 986 return rc; 987 } 988 989 static int 990 fio_separate_md_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 991 { 992 struct spdk_nvme_ns *ns = fio_qpair->ns; 993 struct spdk_fio_request *fio_req = io_u->engine_data; 994 uint32_t md_size, lba_count; 995 struct iovec iov, md_iov; 996 struct spdk_dif_error err_blk = {}; 997 int rc; 998 999 /* Do nothing when PRACT is enabled */ 1000 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 1001 return 0; 1002 } 1003 1004 iov.iov_base = io_u->buf; 1005 iov.iov_len = io_u->xfer_buflen; 1006 lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_sector_size(ns); 1007 md_size = spdk_nvme_ns_get_md_size(ns); 1008 md_iov.iov_base = fio_req->md_buf; 1009 md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); 1010 1011 rc = spdk_dix_verify(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx, &err_blk); 1012 if (rc != 0) { 1013 fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n", 1014 err_blk.err_type, err_blk.err_offset); 1015 } 1016 1017 return rc; 1018 } 1019 1020 static void 1021 spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) 1022 { 1023 struct spdk_fio_request *fio_req = ctx; 1024 struct spdk_fio_thread *fio_thread = fio_req->fio_thread; 1025 struct spdk_fio_qpair *fio_qpair = fio_req->fio_qpair; 1026 int rc; 1027 1028 if (fio_qpair->nvme_pi_enabled && fio_req->io->ddir == DDIR_READ) { 1029 if (fio_qpair->extended_lba) { 1030 rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io); 1031 } else { 1032 rc = fio_separate_md_verify_pi(fio_qpair, fio_req->io); 1033 } 1034 if (rc != 0) { 1035 fio_req->io->error = abs(rc); 1036 } 1037 } 1038 1039 if (spdk_nvme_cpl_is_error(cpl)) { 1040 fio_req->io->error = EIO; 1041 } 1042 1043 assert(fio_thread->iocq_count < fio_thread->iocq_size); 1044 fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; 1045 } 1046 1047 static void 1048 spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) 1049 { 1050 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; 1051 1052 fio_req->iov_offset = sgl_offset; 1053 fio_req->bit_bucket_data_len = 0; 1054 } 1055 1056 static int 1057 spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) 1058 { 1059 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; 1060 struct io_u *io_u = fio_req->io; 1061 uint32_t iov_len; 1062 uint32_t bit_bucket_len; 1063 1064 *address = io_u->buf; 1065 1066 if (fio_req->iov_offset) { 1067 assert(fio_req->iov_offset <= io_u->xfer_buflen); 1068 *address += fio_req->iov_offset; 1069 } 1070 1071 iov_len = io_u->xfer_buflen - fio_req->iov_offset; 1072 if (iov_len > g_spdk_sge_size) { 1073 iov_len = g_spdk_sge_size; 1074 } 1075 1076 if ((fio_req->bit_bucket_data_len < g_spdk_bit_bucket_data_len) && (io_u->ddir == DDIR_READ)) { 1077 assert(g_spdk_bit_bucket_data_len < io_u->xfer_buflen); 1078 *address = (void *)UINT64_MAX; 1079 bit_bucket_len = g_spdk_bit_bucket_data_len - fio_req->bit_bucket_data_len; 1080 if (iov_len > bit_bucket_len) { 1081 iov_len = bit_bucket_len; 1082 } 1083 fio_req->bit_bucket_data_len += iov_len; 1084 } 1085 1086 fio_req->iov_offset += iov_len; 1087 *length = iov_len; 1088 1089 return 0; 1090 } 1091 1092 #if FIO_IOOPS_VERSION >= 24 1093 typedef enum fio_q_status fio_q_status_t; 1094 #else 1095 typedef int fio_q_status_t; 1096 #endif 1097 1098 static fio_q_status_t 1099 spdk_fio_queue(struct thread_data *td, struct io_u *io_u) 1100 { 1101 int rc = 1; 1102 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1103 struct spdk_fio_request *fio_req = io_u->engine_data; 1104 struct spdk_fio_qpair *fio_qpair; 1105 struct spdk_nvme_ns *ns = NULL; 1106 void *md_buf = NULL; 1107 struct spdk_dif_ctx *dif_ctx = &fio_req->dif_ctx; 1108 #if FIO_HAS_FDP 1109 struct spdk_nvme_ns_cmd_ext_io_opts ext_opts; 1110 #endif 1111 struct spdk_nvme_dsm_range *range; 1112 uint32_t block_size; 1113 uint64_t lba; 1114 uint32_t lba_count; 1115 uint32_t num_range; 1116 1117 fio_qpair = get_fio_qpair(fio_thread, io_u->file); 1118 if (fio_qpair == NULL) { 1119 return -ENXIO; 1120 } 1121 ns = fio_qpair->ns; 1122 1123 if (fio_qpair->nvme_pi_enabled && !fio_qpair->extended_lba) { 1124 md_buf = fio_req->md_buf; 1125 } 1126 fio_req->fio_qpair = fio_qpair; 1127 1128 block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags); 1129 lba = io_u->offset / block_size; 1130 lba_count = io_u->xfer_buflen / block_size; 1131 1132 #if FIO_HAS_FDP 1133 /* Only SGL support for write command with directives */ 1134 if (io_u->ddir == DDIR_WRITE && io_u->dtype && !g_spdk_enable_sgl) { 1135 log_err("spdk/nvme: queue() directives require SGL to be enabled\n"); 1136 io_u->error = -EINVAL; 1137 return FIO_Q_COMPLETED; 1138 } 1139 #endif 1140 1141 /* TODO: considering situations that fio will randomize and verify io_u */ 1142 if (fio_qpair->nvme_pi_enabled) { 1143 if (fio_qpair->extended_lba) { 1144 rc = fio_extended_lba_setup_pi(fio_qpair, io_u); 1145 } else { 1146 rc = fio_separate_md_setup_pi(fio_qpair, io_u); 1147 } 1148 if (rc < 0) { 1149 io_u->error = -rc; 1150 return FIO_Q_COMPLETED; 1151 } 1152 } 1153 1154 switch (io_u->ddir) { 1155 case DDIR_READ: 1156 if (!g_spdk_enable_sgl) { 1157 rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, lba_count, 1158 spdk_fio_completion_cb, fio_req, 1159 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1160 } else { 1161 rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, 1162 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1163 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1164 dif_ctx->apptag_mask, dif_ctx->app_tag); 1165 } 1166 break; 1167 case DDIR_WRITE: 1168 if (!g_spdk_enable_sgl) { 1169 if (!fio_qpair->zone_append_enabled) { 1170 rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, 1171 lba_count, 1172 spdk_fio_completion_cb, fio_req, 1173 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1174 } else { 1175 uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns); 1176 rc = spdk_nvme_zns_zone_append_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, zslba, 1177 lba_count, 1178 spdk_fio_completion_cb, fio_req, 1179 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1180 } 1181 } else { 1182 if (!fio_qpair->zone_append_enabled) { 1183 #if FIO_HAS_FDP 1184 if (spdk_unlikely(io_u->dtype)) { 1185 ext_opts.size = SPDK_SIZEOF(&ext_opts, cdw13); 1186 ext_opts.io_flags = fio_qpair->io_flags | (io_u->dtype << 20); 1187 ext_opts.metadata = md_buf; 1188 ext_opts.cdw13 = (io_u->dspec << 16); 1189 ext_opts.apptag = dif_ctx->app_tag; 1190 ext_opts.apptag_mask = dif_ctx->apptag_mask; 1191 rc = spdk_nvme_ns_cmd_writev_ext(ns, fio_qpair->qpair, lba, lba_count, 1192 spdk_fio_completion_cb, fio_req, 1193 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, &ext_opts); 1194 break; 1195 } 1196 #endif 1197 rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, 1198 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1199 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1200 dif_ctx->apptag_mask, dif_ctx->app_tag); 1201 } else { 1202 uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns); 1203 rc = spdk_nvme_zns_zone_appendv_with_md(ns, fio_qpair->qpair, zslba, 1204 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1205 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1206 dif_ctx->apptag_mask, dif_ctx->app_tag); 1207 } 1208 } 1209 break; 1210 case DDIR_TRIM: 1211 if (td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM) { 1212 do_io_u_trim(td, io_u); 1213 io_u_mark_submit(td, 1); 1214 io_u_mark_complete(td, 1); 1215 return FIO_Q_COMPLETED; 1216 } 1217 1218 range = fio_req->dsm_range; 1219 #if FIO_HAS_MRT 1220 if (td->o.num_range == 1) { 1221 range->attributes.raw = 0; 1222 range->length = lba_count; 1223 range->starting_lba = lba; 1224 num_range = 1; 1225 } else { 1226 struct trim_range *tr = (struct trim_range *)io_u->xfer_buf; 1227 for (uint32_t i = 0; i < io_u->number_trim; i++) { 1228 range->attributes.raw = 0; 1229 range->length = tr->len / block_size; 1230 range->starting_lba = tr->start / block_size; 1231 range++; 1232 tr++; 1233 } 1234 num_range = io_u->number_trim; 1235 range = fio_req->dsm_range; 1236 } 1237 #else 1238 range->attributes.raw = 0; 1239 range->length = lba_count; 1240 range->starting_lba = lba; 1241 num_range = 1; 1242 #endif 1243 1244 rc = spdk_nvme_ns_cmd_dataset_management(ns, fio_qpair->qpair, 1245 SPDK_NVME_DSM_ATTR_DEALLOCATE, range, num_range, 1246 spdk_fio_completion_cb, fio_req); 1247 break; 1248 default: 1249 assert(false); 1250 break; 1251 } 1252 1253 /* NVMe read/write functions return -ENOMEM if there are no free requests. */ 1254 if (rc == -ENOMEM) { 1255 return FIO_Q_BUSY; 1256 } 1257 1258 if (rc != 0) { 1259 io_u->error = abs(rc); 1260 return FIO_Q_COMPLETED; 1261 } 1262 1263 return FIO_Q_QUEUED; 1264 } 1265 1266 static struct io_u * 1267 spdk_fio_event(struct thread_data *td, int event) 1268 { 1269 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1270 1271 assert(event >= 0); 1272 assert((unsigned)event < fio_thread->iocq_count); 1273 return fio_thread->iocq[event]; 1274 } 1275 1276 static int 1277 spdk_fio_getevents(struct thread_data *td, unsigned int min, 1278 unsigned int max, const struct timespec *t) 1279 { 1280 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1281 struct spdk_fio_qpair *fio_qpair = NULL; 1282 struct timespec t0, t1; 1283 uint64_t timeout = 0; 1284 1285 if (t) { 1286 timeout = t->tv_sec * 1000000000L + t->tv_nsec; 1287 clock_gettime(CLOCK_MONOTONIC_RAW, &t0); 1288 } 1289 1290 fio_thread->iocq_count = 0; 1291 1292 /* fetch the next qpair */ 1293 if (fio_thread->fio_qpair_current) { 1294 fio_qpair = TAILQ_NEXT(fio_thread->fio_qpair_current, link); 1295 } 1296 1297 for (;;) { 1298 if (fio_qpair == NULL) { 1299 fio_qpair = TAILQ_FIRST(&fio_thread->fio_qpair); 1300 } 1301 1302 while (fio_qpair != NULL) { 1303 /* 1304 * We can be called while spdk_fio_open()s are still 1305 * ongoing, in which case, ->qpair can still be NULL. 1306 */ 1307 if (fio_qpair->qpair == NULL) { 1308 fio_qpair = TAILQ_NEXT(fio_qpair, link); 1309 continue; 1310 } 1311 1312 spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); 1313 1314 if (fio_thread->iocq_count >= min) { 1315 /* reset the current handling qpair */ 1316 fio_thread->fio_qpair_current = fio_qpair; 1317 return fio_thread->iocq_count; 1318 } 1319 1320 fio_qpair = TAILQ_NEXT(fio_qpair, link); 1321 } 1322 1323 if (t) { 1324 uint64_t elapse; 1325 1326 clock_gettime(CLOCK_MONOTONIC_RAW, &t1); 1327 elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) 1328 + t1.tv_nsec - t0.tv_nsec; 1329 if (elapse > timeout) { 1330 break; 1331 } 1332 } 1333 } 1334 1335 /* reset the current handling qpair */ 1336 fio_thread->fio_qpair_current = fio_qpair; 1337 return fio_thread->iocq_count; 1338 } 1339 1340 static int 1341 spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) 1342 { 1343 /* TODO: This should probably send a flush to the device, but for now just return successful. */ 1344 return 0; 1345 } 1346 1347 #if FIO_HAS_ZBD 1348 static int 1349 spdk_fio_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model) 1350 { 1351 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1352 struct spdk_fio_qpair *fio_qpair = NULL; 1353 const struct spdk_nvme_zns_ns_data *zns_data = NULL; 1354 1355 if (f->filetype != FIO_TYPE_BLOCK) { 1356 log_info("spdk/nvme: unsupported filetype: %d\n", f->filetype); 1357 return -EINVAL; 1358 } 1359 1360 fio_qpair = get_fio_qpair(fio_thread, f); 1361 if (!fio_qpair) { 1362 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1363 return -ENODEV; 1364 } 1365 1366 switch (spdk_nvme_ns_get_csi(fio_qpair->ns)) { 1367 case SPDK_NVME_CSI_NVM: 1368 *model = ZBD_NONE; 1369 return 0; 1370 1371 case SPDK_NVME_CSI_KV: 1372 log_err("spdk/nvme: KV namespace is currently not supported\n"); 1373 return -ENOSYS; 1374 1375 case SPDK_NVME_CSI_ZNS: 1376 zns_data = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1377 if (!zns_data) { 1378 log_err("spdk/nvme: file_name: '%s', ZNS is not enabled\n", f->file_name); 1379 return -EINVAL; 1380 } 1381 1382 *model = ZBD_HOST_MANAGED; 1383 1384 return 0; 1385 } 1386 1387 return -EINVAL; 1388 } 1389 1390 static int 1391 spdk_fio_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset, 1392 struct zbd_zone *zbdz, unsigned int nr_zones) 1393 { 1394 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1395 struct spdk_fio_qpair *fio_qpair = NULL; 1396 const struct spdk_nvme_zns_ns_data *zns = NULL; 1397 struct spdk_nvme_zns_zone_report *report; 1398 struct spdk_nvme_qpair *tmp_qpair; 1399 uint32_t report_nzones = 0, report_nzones_max, report_nbytes, mdts_nbytes; 1400 uint64_t zsze_nbytes, ns_nzones, lba_nbytes; 1401 int completed = 0, err; 1402 1403 fio_qpair = get_fio_qpair(fio_thread, f); 1404 if (!fio_qpair) { 1405 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1406 return -ENODEV; 1407 } 1408 zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1409 if (!zns) { 1410 log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name); 1411 return -EINVAL; 1412 } 1413 1414 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 1415 * Create a temporary qpair in order to perform report zones. 1416 */ 1417 assert(!fio_qpair->qpair); 1418 1419 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_qpair->fio_ctrlr->ctrlr, NULL, 0); 1420 if (!tmp_qpair) { 1421 log_err("spdk/nvme: cannot allocate a temporary qpair\n"); 1422 return -EIO; 1423 } 1424 1425 /** Retrieve device parameters */ 1426 mdts_nbytes = spdk_nvme_ns_get_max_io_xfer_size(fio_qpair->ns); 1427 lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns); 1428 zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns); 1429 ns_nzones = spdk_nvme_zns_ns_get_num_zones(fio_qpair->ns); 1430 1431 /** Allocate report-buffer without exceeding mdts, zbdz-storage, and what is needed */ 1432 report_nzones_max = (mdts_nbytes - sizeof(*report)) / sizeof(report->descs[0]); 1433 report_nzones_max = spdk_min(spdk_min(report_nzones_max, nr_zones), ns_nzones); 1434 report_nbytes = sizeof(report->descs[0]) * report_nzones_max + sizeof(*report); 1435 report = calloc(1, report_nbytes); 1436 if (!report) { 1437 log_err("spdk/nvme: failed report_zones(): ENOMEM\n"); 1438 err = -ENOMEM; 1439 goto exit; 1440 } 1441 1442 err = spdk_nvme_zns_report_zones(fio_qpair->ns, tmp_qpair, report, report_nbytes, 1443 offset / lba_nbytes, SPDK_NVME_ZRA_LIST_ALL, true, pcu_cb, 1444 &completed); 1445 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 1446 log_err("spdk/nvme: report_zones(): err: %d, cpl: %d\n", err, completed); 1447 err = err ? err : -EIO; 1448 goto exit; 1449 } 1450 assert(report->nr_zones <= report_nzones_max); 1451 report_nzones = report->nr_zones; 1452 1453 for (uint64_t idx = 0; idx < report->nr_zones; ++idx) { 1454 struct spdk_nvme_zns_zone_desc *zdesc = &report->descs[idx]; 1455 1456 zbdz[idx].start = zdesc->zslba * lba_nbytes; 1457 zbdz[idx].len = zsze_nbytes; 1458 zbdz[idx].capacity = zdesc->zcap * lba_nbytes; 1459 zbdz[idx].wp = zdesc->wp * lba_nbytes; 1460 1461 switch (zdesc->zt) { 1462 case SPDK_NVME_ZONE_TYPE_SEQWR: 1463 zbdz[idx].type = ZBD_ZONE_TYPE_SWR; 1464 break; 1465 1466 default: 1467 log_err("spdk/nvme: %s: inv. zone-type: 0x%x\n", f->file_name, zdesc->zt); 1468 err = -EIO; 1469 goto exit; 1470 } 1471 1472 switch (zdesc->zs) { 1473 case SPDK_NVME_ZONE_STATE_EMPTY: 1474 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY; 1475 break; 1476 case SPDK_NVME_ZONE_STATE_IOPEN: 1477 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN; 1478 break; 1479 case SPDK_NVME_ZONE_STATE_EOPEN: 1480 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN; 1481 break; 1482 case SPDK_NVME_ZONE_STATE_CLOSED: 1483 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED; 1484 break; 1485 case SPDK_NVME_ZONE_STATE_RONLY: 1486 zbdz[idx].cond = ZBD_ZONE_COND_READONLY; 1487 break; 1488 case SPDK_NVME_ZONE_STATE_FULL: 1489 zbdz[idx].cond = ZBD_ZONE_COND_FULL; 1490 break; 1491 case SPDK_NVME_ZONE_STATE_OFFLINE: 1492 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE; 1493 break; 1494 1495 default: 1496 log_err("spdk/nvme: %s: inv. zone-state: 0x%x\n", f->file_name, zdesc->zs); 1497 err = -EIO; 1498 goto exit; 1499 } 1500 } 1501 1502 exit: 1503 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 1504 free(report); 1505 1506 return err ? err : (int)report_nzones; 1507 } 1508 1509 static int 1510 spdk_fio_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset, uint64_t length) 1511 { 1512 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1513 struct spdk_fio_qpair *fio_qpair = NULL; 1514 const struct spdk_nvme_zns_ns_data *zns = NULL; 1515 uint64_t zsze_nbytes, lba_nbytes; 1516 int err = 0; 1517 1518 fio_qpair = get_fio_qpair(fio_thread, f); 1519 if (!fio_qpair) { 1520 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1521 return -ENODEV; 1522 } 1523 zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1524 if (!zns) { 1525 log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name); 1526 return -EINVAL; 1527 } 1528 zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns); 1529 lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns); 1530 1531 /** check the assumption that offset is valid zone-start lba */ 1532 if (offset % zsze_nbytes) { 1533 log_err("spdk/nvme: offset: %zu is not a valid zslba\n", offset); 1534 return -EINVAL; 1535 } 1536 1537 for (uint64_t cur = offset; cur < offset + length; cur += zsze_nbytes) { 1538 int completed = 0; 1539 1540 err = spdk_nvme_zns_reset_zone(fio_qpair->ns, fio_qpair->qpair, cur / lba_nbytes, 1541 false, pcu_cb, &completed); 1542 if (err || pcu(fio_qpair->qpair, &completed) || completed < 0) { 1543 log_err("spdk/nvme: zns_reset_zone(): err: %d, cpl: %d\n", err, completed); 1544 err = err ? err : -EIO; 1545 break; 1546 } 1547 } 1548 1549 return err; 1550 } 1551 #endif 1552 1553 #if FIO_IOOPS_VERSION >= 30 1554 static int 1555 spdk_fio_get_max_open_zones(struct thread_data *td, struct fio_file *f, 1556 unsigned int *max_open_zones) 1557 { 1558 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1559 struct spdk_fio_qpair *fio_qpair = NULL; 1560 1561 fio_qpair = get_fio_qpair(fio_thread, f); 1562 if (!fio_qpair) { 1563 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1564 return -ENODEV; 1565 } 1566 1567 *max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(fio_qpair->ns); 1568 1569 return 0; 1570 } 1571 #endif 1572 1573 #if FIO_HAS_FDP 1574 static int 1575 spdk_fio_fdp_fetch_ruhs(struct thread_data *td, struct fio_file *f, 1576 struct fio_ruhs_info *fruhs_info) 1577 { 1578 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1579 struct spdk_fio_qpair *fio_qpair = NULL; 1580 struct spdk_nvme_qpair *tmp_qpair; 1581 struct { 1582 struct spdk_nvme_fdp_ruhs ruhs; 1583 struct spdk_nvme_fdp_ruhs_desc desc[128]; 1584 } fdp_ruhs; 1585 uint16_t idx; 1586 int completed = 0, err; 1587 1588 fio_qpair = get_fio_qpair(fio_thread, f); 1589 if (!fio_qpair) { 1590 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1591 return -ENODEV; 1592 } 1593 1594 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 1595 * Create a temporary qpair in order to perform report zones. 1596 */ 1597 assert(!fio_qpair->qpair); 1598 1599 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_qpair->fio_ctrlr->ctrlr, NULL, 0); 1600 if (!tmp_qpair) { 1601 log_err("spdk/nvme: cannot allocate a temporary qpair\n"); 1602 return -EIO; 1603 } 1604 1605 err = spdk_nvme_ns_cmd_io_mgmt_recv(fio_qpair->ns, tmp_qpair, &fdp_ruhs, sizeof(fdp_ruhs), 1606 SPDK_NVME_FDP_IO_MGMT_RECV_RUHS, 0, pcu_cb, &completed); 1607 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 1608 log_err("spdk/nvme: fetch_ruhs(): err: %d, cpl: %d\n", err, completed); 1609 err = err ? err : -EIO; 1610 goto exit; 1611 } 1612 1613 fruhs_info->nr_ruhs = fdp_ruhs.ruhs.nruhsd; 1614 for (idx = 0; idx < fdp_ruhs.ruhs.nruhsd; idx++) { 1615 fruhs_info->plis[idx] = fdp_ruhs.desc[idx].pid; 1616 } 1617 1618 exit: 1619 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 1620 1621 return err; 1622 } 1623 #endif 1624 1625 static void 1626 spdk_fio_cleanup(struct thread_data *td) 1627 { 1628 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1629 struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; 1630 struct spdk_fio_options *fio_options = td->eo; 1631 1632 if (fio_options->spdk_tracing) { 1633 spdk_trace_unregister_user_thread(); 1634 } 1635 1636 TAILQ_FOREACH_SAFE(fio_qpair, &fio_thread->fio_qpair, link, fio_qpair_tmp) { 1637 TAILQ_REMOVE(&fio_thread->fio_qpair, fio_qpair, link); 1638 free(fio_qpair); 1639 } 1640 1641 free(fio_thread->iocq); 1642 free(fio_thread); 1643 1644 pthread_mutex_lock(&g_mutex); 1645 g_td_count--; 1646 if (g_td_count == 0) { 1647 struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; 1648 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 1649 1650 TAILQ_FOREACH_SAFE(fio_ctrlr, &g_ctrlrs, link, fio_ctrlr_tmp) { 1651 TAILQ_REMOVE(&g_ctrlrs, fio_ctrlr, link); 1652 spdk_nvme_detach_async(fio_ctrlr->ctrlr, &detach_ctx); 1653 free(fio_ctrlr); 1654 } 1655 1656 if (detach_ctx) { 1657 spdk_nvme_detach_poll(detach_ctx); 1658 } 1659 1660 if (fio_options->enable_vmd) { 1661 spdk_vmd_fini(); 1662 } 1663 } 1664 pthread_mutex_unlock(&g_mutex); 1665 if (TAILQ_EMPTY(&g_ctrlrs)) { 1666 if (pthread_cancel(g_ctrlr_thread_id) == 0) { 1667 pthread_join(g_ctrlr_thread_id, NULL); 1668 } 1669 } 1670 } 1671 1672 /* This function enables addition of SPDK parameters to the fio config 1673 * Adding new parameters by defining them here and defining a callback 1674 * function to read the parameter value. */ 1675 static struct fio_option options[] = { 1676 { 1677 .name = "enable_wrr", 1678 .lname = "Enable weighted round robin (WRR) for IO submission queues", 1679 .type = FIO_OPT_INT, 1680 .off1 = offsetof(struct spdk_fio_options, enable_wrr), 1681 .def = "0", 1682 .help = "Enable weighted round robin (WRR) for IO submission queues", 1683 .category = FIO_OPT_C_ENGINE, 1684 .group = FIO_OPT_G_INVALID, 1685 }, 1686 { 1687 .name = "arbitration_burst", 1688 .lname = "Arbitration Burst", 1689 .type = FIO_OPT_INT, 1690 .off1 = offsetof(struct spdk_fio_options, arbitration_burst), 1691 .def = "0", 1692 .help = "Arbitration Burst used for WRR (valid range from 0 - 7)", 1693 .category = FIO_OPT_C_ENGINE, 1694 .group = FIO_OPT_G_INVALID, 1695 }, 1696 { 1697 .name = "low_weight", 1698 .lname = "low_weight for WRR", 1699 .type = FIO_OPT_INT, 1700 .off1 = offsetof(struct spdk_fio_options, low_weight), 1701 .def = "0", 1702 .help = "low_weight used for WRR (valid range from 0 - 255)", 1703 .category = FIO_OPT_C_ENGINE, 1704 .group = FIO_OPT_G_INVALID, 1705 }, 1706 { 1707 .name = "medium_weight", 1708 .lname = "medium_weight for WRR", 1709 .type = FIO_OPT_INT, 1710 .off1 = offsetof(struct spdk_fio_options, medium_weight), 1711 .def = "0", 1712 .help = "medium weight used for WRR (valid range from 0 - 255)", 1713 .category = FIO_OPT_C_ENGINE, 1714 .group = FIO_OPT_G_INVALID, 1715 }, 1716 { 1717 .name = "high_weight", 1718 .lname = "high_weight for WRR", 1719 .type = FIO_OPT_INT, 1720 .off1 = offsetof(struct spdk_fio_options, high_weight), 1721 .def = "0", 1722 .help = "high weight used for WRR (valid range from 0 - 255)", 1723 .category = FIO_OPT_C_ENGINE, 1724 .group = FIO_OPT_G_INVALID, 1725 }, 1726 { 1727 .name = "wrr_priority", 1728 .lname = "priority used for WRR", 1729 .type = FIO_OPT_INT, 1730 .off1 = offsetof(struct spdk_fio_options, wrr_priority), 1731 .def = "0", 1732 .help = "priority used for WRR (valid range from 0-3)", 1733 .category = FIO_OPT_C_ENGINE, 1734 .group = FIO_OPT_G_INVALID, 1735 }, 1736 { 1737 .name = "mem_size_mb", 1738 .lname = "Memory size in MB", 1739 .type = FIO_OPT_INT, 1740 .off1 = offsetof(struct spdk_fio_options, mem_size), 1741 .def = "0", 1742 .help = "Memory Size for SPDK (MB)", 1743 .category = FIO_OPT_C_ENGINE, 1744 .group = FIO_OPT_G_INVALID, 1745 }, 1746 { 1747 .name = "shm_id", 1748 .lname = "shared memory ID", 1749 .type = FIO_OPT_INT, 1750 .off1 = offsetof(struct spdk_fio_options, shm_id), 1751 .def = "-1", 1752 .help = "Shared Memory ID", 1753 .category = FIO_OPT_C_ENGINE, 1754 .group = FIO_OPT_G_INVALID, 1755 }, 1756 { 1757 .name = "enable_sgl", 1758 .lname = "SGL used for I/O commands", 1759 .type = FIO_OPT_INT, 1760 .off1 = offsetof(struct spdk_fio_options, enable_sgl), 1761 .def = "0", 1762 .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", 1763 .category = FIO_OPT_C_ENGINE, 1764 .group = FIO_OPT_G_INVALID, 1765 }, 1766 { 1767 .name = "sge_size", 1768 .lname = "SGL size used for I/O commands", 1769 .type = FIO_OPT_INT, 1770 .off1 = offsetof(struct spdk_fio_options, sge_size), 1771 .def = "4096", 1772 .help = "SGL size in bytes for I/O Commands (default 4096)", 1773 .category = FIO_OPT_C_ENGINE, 1774 .group = FIO_OPT_G_INVALID, 1775 }, 1776 { 1777 .name = "bit_bucket_data_len", 1778 .lname = "Amount of data used for Bit Bucket", 1779 .type = FIO_OPT_INT, 1780 .off1 = offsetof(struct spdk_fio_options, bit_bucket_data_len), 1781 .def = "0", 1782 .help = "Bit Bucket Data Length for READ commands (disabled by default)", 1783 .category = FIO_OPT_C_ENGINE, 1784 .group = FIO_OPT_G_INVALID, 1785 }, 1786 { 1787 .name = "hostnqn", 1788 .lname = "Host NQN to use when connecting to controllers.", 1789 .type = FIO_OPT_STR_STORE, 1790 .off1 = offsetof(struct spdk_fio_options, hostnqn), 1791 .help = "Host NQN", 1792 .category = FIO_OPT_C_ENGINE, 1793 .group = FIO_OPT_G_INVALID, 1794 }, 1795 { 1796 .name = "pi_act", 1797 .lname = "Protection Information Action", 1798 .type = FIO_OPT_INT, 1799 .off1 = offsetof(struct spdk_fio_options, pi_act), 1800 .def = "1", 1801 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", 1802 .category = FIO_OPT_C_ENGINE, 1803 .group = FIO_OPT_G_INVALID, 1804 }, 1805 { 1806 .name = "pi_chk", 1807 .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", 1808 .type = FIO_OPT_STR_STORE, 1809 .off1 = offsetof(struct spdk_fio_options, pi_chk), 1810 .def = NULL, 1811 .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", 1812 .category = FIO_OPT_C_ENGINE, 1813 .group = FIO_OPT_G_INVALID, 1814 }, 1815 { 1816 .name = "md_per_io_size", 1817 .lname = "Separate Metadata Buffer Size per I/O", 1818 .type = FIO_OPT_INT, 1819 .off1 = offsetof(struct spdk_fio_options, md_per_io_size), 1820 .def = "4096", 1821 .help = "Size of separate metadata buffer per I/O (Default: 4096)", 1822 .category = FIO_OPT_C_ENGINE, 1823 .group = FIO_OPT_G_INVALID, 1824 }, 1825 { 1826 .name = "apptag", 1827 .lname = "Application Tag used in Protection Information", 1828 .type = FIO_OPT_INT, 1829 .off1 = offsetof(struct spdk_fio_options, apptag), 1830 .def = "0x1234", 1831 .help = "Application Tag used in Protection Information field (Default: 0x1234)", 1832 .category = FIO_OPT_C_ENGINE, 1833 .group = FIO_OPT_G_INVALID, 1834 }, 1835 { 1836 .name = "apptag_mask", 1837 .lname = "Application Tag Mask", 1838 .type = FIO_OPT_INT, 1839 .off1 = offsetof(struct spdk_fio_options, apptag_mask), 1840 .def = "0xffff", 1841 .help = "Application Tag Mask used with Application Tag (Default: 0xffff)", 1842 .category = FIO_OPT_C_ENGINE, 1843 .group = FIO_OPT_G_INVALID, 1844 }, 1845 { 1846 .name = "digest_enable", 1847 .lname = "PDU digest choice for NVMe/TCP Transport(NONE|HEADER|DATA|BOTH)", 1848 .type = FIO_OPT_STR_STORE, 1849 .off1 = offsetof(struct spdk_fio_options, digest_enable), 1850 .def = NULL, 1851 .help = "Control the NVMe/TCP control(digest_enable=NONE|HEADER|DATA|BOTH)", 1852 .category = FIO_OPT_C_ENGINE, 1853 .group = FIO_OPT_G_INVALID, 1854 }, 1855 { 1856 .name = "enable_vmd", 1857 .lname = "Enable VMD enumeration", 1858 .type = FIO_OPT_INT, 1859 .off1 = offsetof(struct spdk_fio_options, enable_vmd), 1860 .def = "0", 1861 .help = "Enable VMD enumeration (enable_vmd=1 or enable_vmd=0)", 1862 .category = FIO_OPT_C_ENGINE, 1863 .group = FIO_OPT_G_INVALID, 1864 }, 1865 { 1866 .name = "initial_zone_reset", 1867 .lname = "Reset Zones on initialization", 1868 .type = FIO_OPT_INT, 1869 .off1 = offsetof(struct spdk_fio_options, initial_zone_reset), 1870 .def = "0", 1871 .help = "Reset Zones on initialization (0=disable, 1=Reset All Zones)", 1872 .category = FIO_OPT_C_ENGINE, 1873 .group = FIO_OPT_G_INVALID, 1874 }, 1875 { 1876 .name = "zone_append", 1877 .lname = "Use zone append instead of write", 1878 .type = FIO_OPT_INT, 1879 .off1 = offsetof(struct spdk_fio_options, zone_append), 1880 .def = "0", 1881 .help = "Use zone append instead of write (1=zone append, 0=write)", 1882 .category = FIO_OPT_C_ENGINE, 1883 .group = FIO_OPT_G_INVALID, 1884 }, 1885 { 1886 .name = "print_qid_mappings", 1887 .lname = "Print job-to-qid mappings", 1888 .type = FIO_OPT_INT, 1889 .off1 = offsetof(struct spdk_fio_options, print_qid_mappings), 1890 .def = "0", 1891 .help = "Print job-to-qid mappings (0=disable, 1=enable)", 1892 .category = FIO_OPT_C_ENGINE, 1893 .group = FIO_OPT_G_INVALID, 1894 }, 1895 { 1896 .name = "log_flags", 1897 .lname = "log_flags", 1898 .type = FIO_OPT_STR_STORE, 1899 .off1 = offsetof(struct spdk_fio_options, log_flags), 1900 .help = "Enable log flags (comma-separated list)", 1901 .category = FIO_OPT_C_ENGINE, 1902 .group = FIO_OPT_G_INVALID, 1903 }, 1904 { 1905 .name = "spdk_tracing", 1906 .lname = "Enable SPDK Tracing", 1907 .type = FIO_OPT_INT, 1908 .off1 = offsetof(struct spdk_fio_options, spdk_tracing), 1909 .def = "0", 1910 .help = "SPDK Tracing (0=disable, 1=enable)", 1911 .category = FIO_OPT_C_ENGINE, 1912 .group = FIO_OPT_G_INVALID, 1913 }, 1914 { 1915 .name = NULL, 1916 }, 1917 }; 1918 1919 /* FIO imports this structure using dlsym */ 1920 struct ioengine_ops ioengine = { 1921 .name = "spdk", 1922 .version = FIO_IOOPS_VERSION, 1923 .queue = spdk_fio_queue, 1924 .getevents = spdk_fio_getevents, 1925 .event = spdk_fio_event, 1926 .cleanup = spdk_fio_cleanup, 1927 .open_file = spdk_fio_open, 1928 .close_file = spdk_fio_close, 1929 .invalidate = spdk_fio_invalidate, 1930 .iomem_alloc = spdk_fio_iomem_alloc, 1931 .iomem_free = spdk_fio_iomem_free, 1932 .setup = spdk_fio_setup, 1933 .init = spdk_fio_init, 1934 .io_u_init = spdk_fio_io_u_init, 1935 .io_u_free = spdk_fio_io_u_free, 1936 #if FIO_HAS_ZBD 1937 .get_zoned_model = spdk_fio_get_zoned_model, 1938 .report_zones = spdk_fio_report_zones, 1939 .reset_wp = spdk_fio_reset_wp, 1940 #endif 1941 #if FIO_IOOPS_VERSION >= 30 1942 .get_max_open_zones = spdk_fio_get_max_open_zones, 1943 #endif 1944 #if FIO_HAS_FDP 1945 .fdp_fetch_ruhs = spdk_fio_fdp_fetch_ruhs, 1946 #endif 1947 #if FIO_HAS_MRT 1948 .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO | FIO_MULTI_RANGE_TRIM, 1949 #else 1950 .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO, 1951 #endif 1952 .options = options, 1953 .option_struct_size = sizeof(struct spdk_fio_options), 1954 }; 1955 1956 static void fio_init 1957 fio_spdk_register(void) 1958 { 1959 register_ioengine(&ioengine); 1960 } 1961 1962 static void fio_exit 1963 fio_spdk_unregister(void) 1964 { 1965 if (g_spdk_env_initialized) { 1966 spdk_trace_cleanup(); 1967 spdk_env_fini(); 1968 } 1969 1970 unregister_ioengine(&ioengine); 1971 } 1972 1973 SPDK_LOG_REGISTER_COMPONENT(fio_nvme) 1974