1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/nvme.h" 9 #include "spdk/nvme_zns.h" 10 #include "spdk/vmd.h" 11 #include "spdk/env.h" 12 #include "spdk/string.h" 13 #include "spdk/log.h" 14 #include "spdk/likely.h" 15 #include "spdk/endian.h" 16 #include "spdk/dif.h" 17 #include "spdk/util.h" 18 #include "spdk/trace.h" 19 20 #include "config-host.h" 21 #include "fio.h" 22 #include "optgroup.h" 23 24 #ifdef for_each_rw_ddir 25 #define FIO_HAS_ZBD (FIO_IOOPS_VERSION >= 26) 26 #define FIO_HAS_FDP (FIO_IOOPS_VERSION >= 35) 27 #define FIO_HAS_MRT (FIO_IOOPS_VERSION >= 34) 28 #else 29 #define FIO_HAS_ZBD (0) 30 #define FIO_HAS_FDP (0) 31 #define FIO_HAS_MRT (0) 32 #endif 33 34 /* FreeBSD is missing CLOCK_MONOTONIC_RAW, 35 * so alternative is provided. */ 36 #ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ 37 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC 38 #endif 39 40 #define NVME_IO_ALIGN 4096 41 42 static bool g_spdk_env_initialized; 43 static bool g_log_flag_error; 44 static int g_spdk_enable_sgl = 0; 45 static uint32_t g_spdk_sge_size = 4096; 46 static uint32_t g_spdk_bit_bucket_data_len = 0; 47 static uint32_t g_spdk_pract_flag; 48 static uint32_t g_spdk_prchk_flags; 49 static uint32_t g_spdk_md_per_io_size = 4096; 50 static uint16_t g_spdk_apptag; 51 static uint16_t g_spdk_apptag_mask; 52 53 struct spdk_fio_options { 54 void *pad; /* off1 used in option descriptions may not be 0 */ 55 int enable_wrr; 56 int arbitration_burst; 57 int low_weight; 58 int medium_weight; 59 int high_weight; 60 int wrr_priority; 61 int mem_size; 62 int shm_id; 63 int enable_sgl; 64 int sge_size; 65 int bit_bucket_data_len; 66 char *hostnqn; 67 int pi_act; 68 char *pi_chk; 69 int md_per_io_size; 70 int apptag; 71 int apptag_mask; 72 char *digest_enable; 73 int enable_vmd; 74 int initial_zone_reset; 75 int zone_append; 76 int print_qid_mappings; 77 int spdk_tracing; 78 char *log_flags; 79 int disable_pcie_sgl_merge; 80 }; 81 82 struct spdk_fio_request { 83 struct io_u *io; 84 /** Offset in current iovec, fio only uses 1 vector */ 85 uint32_t iov_offset; 86 87 /** Amount of data used for Bit Bucket SGL */ 88 uint32_t bit_bucket_data_len; 89 90 /** Context for NVMe PI */ 91 struct spdk_dif_ctx dif_ctx; 92 /** Separate metadata buffer pointer */ 93 void *md_buf; 94 95 /** Dataset management range information */ 96 struct spdk_nvme_dsm_range *dsm_range; 97 98 struct spdk_fio_thread *fio_thread; 99 struct spdk_fio_qpair *fio_qpair; 100 }; 101 102 struct spdk_fio_ctrlr { 103 struct spdk_nvme_transport_id tr_id; 104 struct spdk_nvme_ctrlr_opts opts; 105 struct spdk_nvme_ctrlr *ctrlr; 106 TAILQ_ENTRY(spdk_fio_ctrlr) link; 107 }; 108 109 static TAILQ_HEAD(, spdk_fio_ctrlr) g_ctrlrs = TAILQ_HEAD_INITIALIZER(g_ctrlrs); 110 static int g_td_count; 111 static pthread_t g_ctrlr_thread_id = 0; 112 static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; 113 static bool g_error; 114 115 struct spdk_fio_qpair { 116 struct fio_file *f; 117 struct spdk_nvme_qpair *qpair; 118 struct spdk_nvme_ns *ns; 119 uint32_t io_flags; 120 bool zone_append_enabled; 121 bool nvme_pi_enabled; 122 /* True for DIF and false for DIX, and this is valid only if nvme_pi_enabled is true. */ 123 bool extended_lba; 124 /* True for protection info transferred at start of metadata, 125 * false for protection info transferred at end of metadata, and 126 * this is valid only if nvme_pi_enabled is true. 127 */ 128 bool md_start; 129 TAILQ_ENTRY(spdk_fio_qpair) link; 130 struct spdk_fio_ctrlr *fio_ctrlr; 131 }; 132 133 struct spdk_fio_thread { 134 struct thread_data *td; 135 136 TAILQ_HEAD(, spdk_fio_qpair) fio_qpair; 137 struct spdk_fio_qpair *fio_qpair_current; /* the current fio_qpair to be handled. */ 138 139 struct io_u **iocq; /* io completion queue */ 140 unsigned int iocq_count; /* number of iocq entries filled by last getevents */ 141 unsigned int iocq_size; /* number of iocq entries allocated */ 142 143 }; 144 145 struct spdk_fio_probe_ctx { 146 struct thread_data *td; 147 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 148 struct fio_file *f; /* fio_file given by user */ 149 }; 150 151 static void * 152 spdk_fio_poll_ctrlrs(void *arg) 153 { 154 struct spdk_fio_ctrlr *fio_ctrlr; 155 int oldstate; 156 int rc; 157 158 /* Loop until the thread is cancelled */ 159 while (true) { 160 rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); 161 if (rc != 0) { 162 SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", 163 rc, spdk_strerror(rc)); 164 } 165 166 pthread_mutex_lock(&g_mutex); 167 168 TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) { 169 spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); 170 } 171 172 pthread_mutex_unlock(&g_mutex); 173 174 rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); 175 if (rc != 0) { 176 SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", 177 rc, spdk_strerror(rc)); 178 } 179 180 /* This is a pthread cancellation point and cannot be removed. */ 181 sleep(1); 182 } 183 184 return NULL; 185 } 186 187 static bool 188 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 189 struct spdk_nvme_ctrlr_opts *opts) 190 { 191 struct spdk_fio_probe_ctx *ctx = cb_ctx; 192 struct thread_data *td = ctx->td; 193 struct spdk_fio_options *fio_options = td->eo; 194 195 if (ctx->hostnqn[0] != '\0') { 196 memcpy(opts->hostnqn, ctx->hostnqn, sizeof(opts->hostnqn)); 197 } else if (fio_options->hostnqn) { 198 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); 199 } 200 201 if (fio_options->enable_wrr) { 202 opts->arb_mechanism = SPDK_NVME_CC_AMS_WRR; 203 opts->arbitration_burst = fio_options->arbitration_burst; 204 opts->low_priority_weight = fio_options->low_weight; 205 opts->medium_priority_weight = fio_options->medium_weight; 206 opts->high_priority_weight = fio_options->high_weight; 207 } 208 209 if (fio_options->digest_enable) { 210 if (strcasecmp(fio_options->digest_enable, "HEADER") == 0) { 211 opts->header_digest = true; 212 } else if (strcasecmp(fio_options->digest_enable, "DATA") == 0) { 213 opts->data_digest = true; 214 } else if (strcasecmp(fio_options->digest_enable, "BOTH") == 0) { 215 opts->header_digest = true; 216 opts->data_digest = true; 217 } 218 } 219 220 return true; 221 } 222 223 static struct spdk_fio_ctrlr * 224 get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) 225 { 226 struct spdk_fio_ctrlr *fio_ctrlr; 227 228 TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) { 229 if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { 230 return fio_ctrlr; 231 } 232 } 233 234 return NULL; 235 } 236 237 /** 238 * Returns the fio_qpair matching the given fio_file and has an associated ns 239 */ 240 static struct spdk_fio_qpair * 241 get_fio_qpair(struct spdk_fio_thread *fio_thread, struct fio_file *f) 242 { 243 struct spdk_fio_qpair *fio_qpair; 244 245 TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) { 246 if ((fio_qpair->f == f) && fio_qpair->ns) { 247 return fio_qpair; 248 } 249 } 250 251 return NULL; 252 } 253 254 #if FIO_HAS_ZBD 255 /** 256 * Callback function to use while processing completions until completion-indicator turns non-zero 257 */ 258 static void 259 pcu_cb(void *ctx, const struct spdk_nvme_cpl *cpl) 260 { 261 int *completed = ctx; 262 263 *completed = spdk_nvme_cpl_is_error(cpl) ? -1 : 1; 264 } 265 266 /** 267 * Process Completions Until the given 'completed' indicator turns non-zero or an error occurs 268 */ 269 static int32_t 270 pcu(struct spdk_nvme_qpair *qpair, int *completed) 271 { 272 int32_t ret; 273 274 while (!*completed) { 275 ret = spdk_nvme_qpair_process_completions(qpair, 1); 276 if (ret < 0) { 277 log_err("spdk/nvme: process_compl(): ret: %d\n", ret); 278 return ret; 279 } 280 } 281 282 return 0; 283 } 284 #endif 285 286 static inline uint32_t 287 _nvme_get_host_buffer_sector_size(struct spdk_nvme_ns *ns, uint32_t io_flags) 288 { 289 bool md_excluded_from_xfer = false; 290 uint32_t md_size; 291 uint32_t ns_flags; 292 293 ns_flags = spdk_nvme_ns_get_flags(ns); 294 md_size = spdk_nvme_ns_get_md_size(ns); 295 296 /* For extended LBA format, if the metadata size is 8 bytes and PRACT is 297 * enabled(controller inserts/strips PI), we should reduce metadata size 298 * from block size. 299 */ 300 md_excluded_from_xfer = ((io_flags & SPDK_NVME_IO_FLAGS_PRACT) && 301 (ns_flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) && 302 (ns_flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) && 303 (md_size == 8)); 304 305 return md_excluded_from_xfer ? spdk_nvme_ns_get_sector_size(ns) : 306 spdk_nvme_ns_get_extended_sector_size(ns); 307 } 308 309 static void 310 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 311 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 312 { 313 struct spdk_fio_probe_ctx *ctx = cb_ctx; 314 struct thread_data *td = ctx->td; 315 struct spdk_fio_thread *fio_thread = td->io_ops_data; 316 struct spdk_fio_ctrlr *fio_ctrlr; 317 struct spdk_fio_qpair *fio_qpair; 318 struct spdk_nvme_ns *ns; 319 const struct spdk_nvme_ns_data *nsdata; 320 struct fio_file *f = ctx->f; 321 uint32_t ns_id; 322 char *p; 323 long int tmp; 324 uint32_t block_size; 325 struct spdk_fio_options *fio_options = td->eo; 326 327 p = strstr(f->file_name, "ns="); 328 if (p != NULL) { 329 tmp = spdk_strtol(p + 3, 10); 330 if (tmp <= 0) { 331 SPDK_ERRLOG("namespace id should be >=1, but was invalid: %ld\n", tmp); 332 g_error = true; 333 return; 334 } 335 ns_id = (uint32_t)tmp; 336 } else { 337 ns_id = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); 338 if (ns_id == 0) { 339 /* The ctrlr has no active namespaces and we didn't specify any so nothing to do. */ 340 return; 341 } 342 } 343 344 pthread_mutex_lock(&g_mutex); 345 fio_ctrlr = get_fio_ctrlr(trid); 346 /* it is a new ctrlr and needs to be added */ 347 if (!fio_ctrlr) { 348 /* Create an fio_ctrlr and add it to the list */ 349 fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); 350 if (!fio_ctrlr) { 351 SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); 352 g_error = true; 353 pthread_mutex_unlock(&g_mutex); 354 return; 355 } 356 fio_ctrlr->opts = *opts; 357 fio_ctrlr->ctrlr = ctrlr; 358 fio_ctrlr->tr_id = *trid; 359 TAILQ_INSERT_TAIL(&g_ctrlrs, fio_ctrlr, link); 360 } 361 pthread_mutex_unlock(&g_mutex); 362 363 ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); 364 if (ns == NULL) { 365 SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); 366 g_error = true; 367 return; 368 } 369 370 if (!spdk_nvme_ns_is_active(ns)) { 371 SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); 372 g_error = true; 373 return; 374 } 375 nsdata = spdk_nvme_ns_get_data(ns); 376 377 TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) { 378 if ((fio_qpair->f == f) || 379 ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && 380 (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { 381 /* Not the error case. Avoid duplicated connection */ 382 return; 383 } 384 } 385 386 /* create a new qpair */ 387 fio_qpair = calloc(1, sizeof(*fio_qpair)); 388 if (!fio_qpair) { 389 g_error = true; 390 SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); 391 return; 392 } 393 394 f->engine_data = fio_qpair; 395 fio_qpair->ns = ns; 396 fio_qpair->f = f; 397 fio_qpair->fio_ctrlr = fio_ctrlr; 398 TAILQ_INSERT_TAIL(&fio_thread->fio_qpair, fio_qpair, link); 399 400 if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { 401 assert(spdk_nvme_ns_get_pi_type(ns) != SPDK_NVME_FMT_NVM_PROTECTION_DISABLE); 402 fio_qpair->io_flags = g_spdk_pract_flag | g_spdk_prchk_flags; 403 fio_qpair->nvme_pi_enabled = true; 404 fio_qpair->md_start = nsdata->dps.md_start; 405 fio_qpair->extended_lba = spdk_nvme_ns_supports_extended_lba(ns); 406 fprintf(stdout, "PI type%u enabled with %s\n", spdk_nvme_ns_get_pi_type(ns), 407 fio_qpair->extended_lba ? "extended lba" : "separate metadata"); 408 } 409 410 block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags); 411 for_each_rw_ddir(ddir) { 412 if (td->o.min_bs[ddir] % block_size != 0 || td->o.max_bs[ddir] % block_size != 0) { 413 if (spdk_nvme_ns_supports_extended_lba(ns)) { 414 SPDK_ERRLOG("--bs or other block size related option has to be a multiple of (LBA data size + Metadata size)\n"); 415 } else { 416 SPDK_ERRLOG("--bs or other block size related option has to be a multiple of LBA data size\n"); 417 } 418 g_error = true; 419 return; 420 } 421 } 422 423 if (fio_options->zone_append && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) { 424 if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED) { 425 SPDK_DEBUGLOG(fio_nvme, "Using zone appends instead of writes on: '%s'\n", 426 f->file_name); 427 fio_qpair->zone_append_enabled = true; 428 } else { 429 SPDK_WARNLOG("Falling back to writes on: '%s' - ns lacks zone append cmd\n", 430 f->file_name); 431 } 432 } 433 434 #if FIO_HAS_ZBD 435 if (td_trim(td) && td->o.zone_mode == ZONE_MODE_ZBD) { 436 td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM; 437 } 438 #endif 439 440 if (fio_options->initial_zone_reset == 1 && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) { 441 #if FIO_HAS_ZBD 442 struct spdk_nvme_qpair *tmp_qpair; 443 int completed = 0, err; 444 445 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 446 * Create a temporary qpair in order to perform the initial zone reset. 447 */ 448 assert(!fio_qpair->qpair); 449 450 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); 451 if (!tmp_qpair) { 452 SPDK_ERRLOG("Cannot allocate a temporary qpair\n"); 453 g_error = true; 454 return; 455 } 456 457 err = spdk_nvme_zns_reset_zone(ns, tmp_qpair, 0x0, true, pcu_cb, &completed); 458 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 459 log_err("spdk/nvme: warn: initial_zone_reset: err: %d, cpl: %d\n", 460 err, completed); 461 } 462 463 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 464 #else 465 log_err("spdk/nvme: ZBD/ZNS is not supported\n"); 466 #endif 467 } 468 469 f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); 470 if (f->real_file_size <= 0) { 471 g_error = true; 472 SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); 473 return; 474 } 475 476 f->filetype = FIO_TYPE_BLOCK; 477 fio_file_set_size_known(f); 478 } 479 480 static void 481 parse_prchk_flags(const char *prchk_str) 482 { 483 if (!prchk_str) { 484 return; 485 } 486 487 if (strstr(prchk_str, "GUARD") != NULL) { 488 g_spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; 489 } 490 if (strstr(prchk_str, "REFTAG") != NULL) { 491 g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; 492 } 493 if (strstr(prchk_str, "APPTAG") != NULL) { 494 g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; 495 } 496 } 497 498 static void 499 parse_pract_flag(int pract) 500 { 501 if (pract == 1) { 502 g_spdk_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; 503 } else { 504 g_spdk_pract_flag = 0; 505 } 506 } 507 508 static bool 509 fio_redirected_to_dev_null(void) 510 { 511 char path[PATH_MAX] = ""; 512 ssize_t ret; 513 514 ret = readlink("/proc/self/fd/1", path, sizeof(path)); 515 516 if (ret == -1 || strcmp(path, "/dev/null") != 0) { 517 return false; 518 } 519 520 ret = readlink("/proc/self/fd/2", path, sizeof(path)); 521 522 if (ret == -1 || strcmp(path, "/dev/null") != 0) { 523 return false; 524 } 525 526 return true; 527 } 528 529 static int 530 spdk_fio_init(struct thread_data *td) 531 { 532 int ret = 0; 533 struct spdk_fio_options *fio_options = td->eo; 534 535 if (fio_options->spdk_tracing) { 536 ret = spdk_trace_register_user_thread(); 537 } 538 539 return ret; 540 } 541 542 /* Called once at initialization. This is responsible for gathering the size of 543 * each "file", which in our case are in the form 544 * 'key=value [key=value] ... ns=value' 545 * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' 546 * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ 547 static int 548 spdk_fio_setup(struct thread_data *td) 549 { 550 struct spdk_fio_thread *fio_thread; 551 struct spdk_fio_options *fio_options = td->eo; 552 struct spdk_fio_probe_ctx ctx; 553 struct spdk_env_opts opts; 554 struct fio_file *f; 555 char *p; 556 int rc = 0; 557 struct spdk_nvme_transport_id trid; 558 struct spdk_fio_ctrlr *fio_ctrlr; 559 char *trid_info; 560 unsigned int i; 561 size_t size; 562 563 /* 564 * If we're running in a daemonized FIO instance, it's possible 565 * fd 1/2 were re-used for something important by FIO. Newer fio 566 * versions are careful to redirect those to /dev/null, but if we're 567 * not, we'll abort early, so we don't accidentally write messages to 568 * an important file, etc. 569 */ 570 if (is_backend && !fio_redirected_to_dev_null()) { 571 char buf[1024]; 572 snprintf(buf, sizeof(buf), 573 "SPDK FIO plugin is in daemon mode, but stdout/stderr " 574 "aren't redirected to /dev/null. Aborting."); 575 fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf)); 576 return -1; 577 } 578 579 if (!td->o.use_thread) { 580 log_err("spdk: must set thread=1 when using spdk plugin\n"); 581 return 1; 582 } 583 584 if (g_log_flag_error) { 585 /* The first thread found an error when parsing log flags, so 586 * just return error immediately for all of the other threads. 587 */ 588 return 1; 589 } 590 591 pthread_mutex_lock(&g_mutex); 592 593 fio_thread = calloc(1, sizeof(*fio_thread)); 594 assert(fio_thread != NULL); 595 596 td->io_ops_data = fio_thread; 597 fio_thread->td = td; 598 599 fio_thread->iocq_size = td->o.iodepth; 600 fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); 601 assert(fio_thread->iocq != NULL); 602 603 TAILQ_INIT(&fio_thread->fio_qpair); 604 605 if (!g_spdk_env_initialized) { 606 opts.opts_size = sizeof(opts); 607 spdk_env_opts_init(&opts); 608 opts.name = "fio"; 609 opts.mem_size = fio_options->mem_size; 610 opts.shm_id = fio_options->shm_id; 611 g_spdk_enable_sgl = fio_options->enable_sgl; 612 g_spdk_sge_size = fio_options->sge_size; 613 g_spdk_bit_bucket_data_len = fio_options->bit_bucket_data_len; 614 parse_pract_flag(fio_options->pi_act); 615 g_spdk_md_per_io_size = spdk_max(fio_options->md_per_io_size, 4096); 616 g_spdk_apptag = (uint16_t)fio_options->apptag; 617 g_spdk_apptag_mask = (uint16_t)fio_options->apptag_mask; 618 parse_prchk_flags(fio_options->pi_chk); 619 if (spdk_env_init(&opts) < 0) { 620 SPDK_ERRLOG("Unable to initialize SPDK env\n"); 621 free(fio_thread->iocq); 622 free(fio_thread); 623 fio_thread = NULL; 624 pthread_mutex_unlock(&g_mutex); 625 return 1; 626 } 627 628 if (fio_options->log_flags) { 629 char *sp = NULL; 630 char *tok = strtok_r(fio_options->log_flags, ",", &sp); 631 do { 632 rc = spdk_log_set_flag(tok); 633 if (rc < 0) { 634 SPDK_ERRLOG("unknown log flag %s\n", tok); 635 g_log_flag_error = true; 636 return 1; 637 } 638 } while ((tok = strtok_r(NULL, ",", &sp)) != NULL); 639 #ifdef DEBUG 640 spdk_log_set_print_level(SPDK_LOG_DEBUG); 641 #endif 642 } 643 644 g_spdk_env_initialized = true; 645 spdk_unaffinitize_thread(); 646 647 if (fio_options->spdk_tracing) { 648 spdk_trace_init("spdk_fio_tracepoints", 65536, td->o.numjobs); 649 spdk_trace_enable_tpoint_group("nvme_pcie"); 650 spdk_trace_enable_tpoint_group("nvme_tcp"); 651 } 652 653 /* Spawn a thread to continue polling the controllers */ 654 rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); 655 if (rc != 0) { 656 SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); 657 } 658 659 if (fio_options->enable_vmd && spdk_vmd_init()) { 660 SPDK_ERRLOG("Failed to initialize VMD. Some NVMe devices can be unavailable.\n"); 661 } 662 } 663 pthread_mutex_unlock(&g_mutex); 664 665 for_each_file(td, f, i) { 666 memset(&trid, 0, sizeof(trid)); 667 memset(&ctx, 0, sizeof(ctx)); 668 669 trid.trtype = SPDK_NVME_TRANSPORT_PCIE; 670 671 p = strstr(f->file_name, " ns="); 672 if (p != NULL) { 673 trid_info = strndup(f->file_name, p - f->file_name); 674 } else { 675 trid_info = strndup(f->file_name, strlen(f->file_name)); 676 } 677 678 if (!trid_info) { 679 SPDK_ERRLOG("Failed to allocate space for trid_info\n"); 680 continue; 681 } 682 683 rc = spdk_nvme_transport_id_parse(&trid, trid_info); 684 if (rc < 0) { 685 SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); 686 free(trid_info); 687 continue; 688 } 689 free(trid_info); 690 691 if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { 692 struct spdk_pci_addr pci_addr; 693 if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { 694 SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); 695 continue; 696 } 697 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); 698 } else { 699 if (trid.subnqn[0] == '\0') { 700 snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", 701 SPDK_NVMF_DISCOVERY_NQN); 702 } 703 if ((p = strcasestr(f->file_name, "hostnqn:")) || 704 (p = strcasestr(f->file_name, "hostnqn="))) { 705 p += strlen("hostnqn:"); 706 size = strcspn(p, " \t\n"); 707 if (size > sizeof(ctx.hostnqn)) { 708 SPDK_ERRLOG("Invalid hostnqn: too long\n"); 709 continue; 710 } 711 memcpy(ctx.hostnqn, p, size); 712 } 713 } 714 715 ctx.td = td; 716 ctx.f = f; 717 718 pthread_mutex_lock(&g_mutex); 719 fio_ctrlr = get_fio_ctrlr(&trid); 720 pthread_mutex_unlock(&g_mutex); 721 if (fio_ctrlr) { 722 attach_cb(&ctx, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); 723 } else { 724 /* Enumerate all of the controllers */ 725 if (spdk_nvme_probe(&trid, &ctx, probe_cb, attach_cb, NULL) != 0) { 726 SPDK_ERRLOG("spdk_nvme_probe() failed\n"); 727 continue; 728 } 729 } 730 731 if (g_error) { 732 log_err("Failed to initialize spdk fio plugin\n"); 733 rc = 1; 734 break; 735 } 736 } 737 738 pthread_mutex_lock(&g_mutex); 739 g_td_count++; 740 pthread_mutex_unlock(&g_mutex); 741 742 return rc; 743 } 744 745 static int 746 spdk_fio_open(struct thread_data *td, struct fio_file *f) 747 { 748 struct spdk_fio_qpair *fio_qpair = f->engine_data; 749 struct spdk_fio_ctrlr *fio_ctrlr = fio_qpair->fio_ctrlr; 750 struct spdk_fio_options *fio_options = td->eo; 751 struct spdk_nvme_io_qpair_opts qpopts; 752 753 assert(fio_qpair->qpair == NULL); 754 spdk_nvme_ctrlr_get_default_io_qpair_opts(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); 755 qpopts.delay_cmd_submit = true; 756 if (fio_options->enable_wrr) { 757 qpopts.qprio = fio_options->wrr_priority; 758 } 759 qpopts.disable_pcie_sgl_merge = fio_options->disable_pcie_sgl_merge; 760 761 fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); 762 if (!fio_qpair->qpair) { 763 SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); 764 g_error = true; 765 free(fio_qpair); 766 return -1; 767 } 768 769 if (fio_options->print_qid_mappings == 1) { 770 log_info("job %s: %s qid %d\n", td->o.name, f->file_name, 771 spdk_nvme_qpair_get_id(fio_qpair->qpair)); 772 } 773 774 return 0; 775 } 776 777 static int 778 spdk_fio_close(struct thread_data *td, struct fio_file *f) 779 { 780 struct spdk_fio_qpair *fio_qpair = f->engine_data; 781 782 assert(fio_qpair->qpair != NULL); 783 spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); 784 fio_qpair->qpair = NULL; 785 return 0; 786 } 787 788 static int 789 spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) 790 { 791 struct spdk_fio_thread *fio_thread = td->io_ops_data; 792 struct spdk_fio_qpair *fio_qpair; 793 struct spdk_nvme_ctrlr *ctrlr; 794 int32_t numa_id = SPDK_ENV_NUMA_ID_ANY, tmp_numa_id; 795 796 /* If all ctrlrs used by this fio_thread have the same numa 797 * id, allocate from that one. If they come from different numa 798 * ids, then don't try to optimize and just use SPDK_ENV_NUMA_ID_ANY. 799 */ 800 TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) { 801 ctrlr = fio_qpair->fio_ctrlr->ctrlr; 802 tmp_numa_id = spdk_nvme_ctrlr_get_numa_id(ctrlr); 803 if (numa_id == SPDK_ENV_NUMA_ID_ANY) { 804 numa_id = tmp_numa_id; 805 } else if (tmp_numa_id != numa_id && 806 tmp_numa_id != SPDK_ENV_NUMA_ID_ANY) { 807 numa_id = SPDK_ENV_NUMA_ID_ANY; 808 break; 809 } 810 } 811 812 td->orig_buffer = spdk_dma_zmalloc_socket(total_mem, NVME_IO_ALIGN, NULL, numa_id); 813 return td->orig_buffer == NULL; 814 } 815 816 static void 817 spdk_fio_iomem_free(struct thread_data *td) 818 { 819 spdk_dma_free(td->orig_buffer); 820 } 821 822 static int 823 spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) 824 { 825 struct spdk_fio_thread *fio_thread = td->io_ops_data; 826 struct spdk_fio_request *fio_req; 827 uint32_t dsm_size; 828 829 io_u->engine_data = NULL; 830 831 fio_req = calloc(1, sizeof(*fio_req)); 832 if (fio_req == NULL) { 833 return 1; 834 } 835 836 if (!(td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM)) { 837 #if FIO_HAS_MRT 838 /* By default number of range is set to 1 */ 839 dsm_size = td->o.num_range * sizeof(struct spdk_nvme_dsm_range); 840 #else 841 dsm_size = sizeof(struct spdk_nvme_dsm_range); 842 #endif 843 fio_req->dsm_range = calloc(1, dsm_size); 844 if (fio_req->dsm_range == NULL) { 845 free(fio_req); 846 return 1; 847 } 848 } 849 850 fio_req->md_buf = spdk_dma_zmalloc(g_spdk_md_per_io_size, NVME_IO_ALIGN, NULL); 851 if (fio_req->md_buf == NULL) { 852 fprintf(stderr, "Allocate %u metadata failed\n", g_spdk_md_per_io_size); 853 free(fio_req->dsm_range); 854 free(fio_req); 855 return 1; 856 } 857 858 fio_req->io = io_u; 859 fio_req->fio_thread = fio_thread; 860 861 io_u->engine_data = fio_req; 862 863 return 0; 864 } 865 866 static void 867 spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) 868 { 869 struct spdk_fio_request *fio_req = io_u->engine_data; 870 871 if (fio_req) { 872 assert(fio_req->io == io_u); 873 spdk_dma_free(fio_req->md_buf); 874 free(fio_req->dsm_range); 875 free(fio_req); 876 io_u->engine_data = NULL; 877 } 878 } 879 880 static inline uint64_t 881 fio_offset_to_zslba(unsigned long long offset, struct spdk_nvme_ns *ns) 882 { 883 return (offset / spdk_nvme_zns_ns_get_zone_size(ns)) * spdk_nvme_zns_ns_get_zone_size_sectors(ns); 884 } 885 886 static int 887 fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 888 { 889 struct spdk_nvme_ns *ns = fio_qpair->ns; 890 struct spdk_fio_request *fio_req = io_u->engine_data; 891 uint32_t md_size, extended_lba_size, lba_count; 892 uint64_t lba; 893 struct iovec iov; 894 int rc; 895 struct spdk_dif_ctx_init_ext_opts dif_opts; 896 897 /* Set appmask and apptag when PRACT is enabled */ 898 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 899 fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; 900 fio_req->dif_ctx.app_tag = g_spdk_apptag; 901 return 0; 902 } 903 904 extended_lba_size = spdk_nvme_ns_get_extended_sector_size(ns); 905 md_size = spdk_nvme_ns_get_md_size(ns); 906 lba = io_u->offset / extended_lba_size; 907 lba_count = io_u->xfer_buflen / extended_lba_size; 908 909 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 910 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 911 rc = spdk_dif_ctx_init(&fio_req->dif_ctx, extended_lba_size, md_size, 912 true, fio_qpair->md_start, 913 (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), 914 fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 915 0, 0, &dif_opts); 916 if (rc != 0) { 917 fprintf(stderr, "Initialization of DIF context failed\n"); 918 return rc; 919 } 920 921 if (io_u->ddir != DDIR_WRITE) { 922 return 0; 923 } 924 925 iov.iov_base = io_u->buf; 926 iov.iov_len = io_u->xfer_buflen; 927 rc = spdk_dif_generate(&iov, 1, lba_count, &fio_req->dif_ctx); 928 if (rc != 0) { 929 fprintf(stderr, "Generation of DIF failed\n"); 930 } 931 932 return rc; 933 } 934 935 static int 936 fio_separate_md_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 937 { 938 struct spdk_nvme_ns *ns = fio_qpair->ns; 939 struct spdk_fio_request *fio_req = io_u->engine_data; 940 uint32_t md_size, block_size, lba_count; 941 uint64_t lba; 942 struct iovec iov, md_iov; 943 int rc; 944 struct spdk_dif_ctx_init_ext_opts dif_opts; 945 946 /* Set appmask and apptag when PRACT is enabled */ 947 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 948 fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; 949 fio_req->dif_ctx.app_tag = g_spdk_apptag; 950 return 0; 951 } 952 953 block_size = spdk_nvme_ns_get_sector_size(ns); 954 md_size = spdk_nvme_ns_get_md_size(ns); 955 lba = io_u->offset / block_size; 956 lba_count = io_u->xfer_buflen / block_size; 957 958 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 959 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 960 rc = spdk_dif_ctx_init(&fio_req->dif_ctx, block_size, md_size, 961 false, fio_qpair->md_start, 962 (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), 963 fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 964 0, 0, &dif_opts); 965 if (rc != 0) { 966 fprintf(stderr, "Initialization of DIF context failed\n"); 967 return rc; 968 } 969 970 if (io_u->ddir != DDIR_WRITE) { 971 return 0; 972 } 973 974 iov.iov_base = io_u->buf; 975 iov.iov_len = io_u->xfer_buflen; 976 md_iov.iov_base = fio_req->md_buf; 977 md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); 978 rc = spdk_dix_generate(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx); 979 if (rc < 0) { 980 fprintf(stderr, "Generation of DIX failed\n"); 981 } 982 983 return rc; 984 } 985 986 static int 987 fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 988 { 989 struct spdk_nvme_ns *ns = fio_qpair->ns; 990 struct spdk_fio_request *fio_req = io_u->engine_data; 991 uint32_t lba_count; 992 struct iovec iov; 993 struct spdk_dif_error err_blk = {}; 994 int rc; 995 996 /* Do nothing when PRACT is enabled */ 997 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 998 return 0; 999 } 1000 1001 iov.iov_base = io_u->buf; 1002 iov.iov_len = io_u->xfer_buflen; 1003 lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_extended_sector_size(ns); 1004 1005 rc = spdk_dif_verify(&iov, 1, lba_count, &fio_req->dif_ctx, &err_blk); 1006 if (rc != 0) { 1007 fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n", 1008 err_blk.err_type, err_blk.err_offset); 1009 } 1010 1011 return rc; 1012 } 1013 1014 static int 1015 fio_separate_md_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) 1016 { 1017 struct spdk_nvme_ns *ns = fio_qpair->ns; 1018 struct spdk_fio_request *fio_req = io_u->engine_data; 1019 uint32_t md_size, lba_count; 1020 struct iovec iov, md_iov; 1021 struct spdk_dif_error err_blk = {}; 1022 int rc; 1023 1024 /* Do nothing when PRACT is enabled */ 1025 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { 1026 return 0; 1027 } 1028 1029 iov.iov_base = io_u->buf; 1030 iov.iov_len = io_u->xfer_buflen; 1031 lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_sector_size(ns); 1032 md_size = spdk_nvme_ns_get_md_size(ns); 1033 md_iov.iov_base = fio_req->md_buf; 1034 md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); 1035 1036 rc = spdk_dix_verify(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx, &err_blk); 1037 if (rc != 0) { 1038 fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n", 1039 err_blk.err_type, err_blk.err_offset); 1040 } 1041 1042 return rc; 1043 } 1044 1045 static void 1046 spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) 1047 { 1048 struct spdk_fio_request *fio_req = ctx; 1049 struct spdk_fio_thread *fio_thread = fio_req->fio_thread; 1050 struct spdk_fio_qpair *fio_qpair = fio_req->fio_qpair; 1051 int rc; 1052 1053 if (fio_qpair->nvme_pi_enabled && fio_req->io->ddir == DDIR_READ) { 1054 if (fio_qpair->extended_lba) { 1055 rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io); 1056 } else { 1057 rc = fio_separate_md_verify_pi(fio_qpair, fio_req->io); 1058 } 1059 if (rc != 0) { 1060 fio_req->io->error = abs(rc); 1061 } 1062 } 1063 1064 if (spdk_nvme_cpl_is_error(cpl)) { 1065 fio_req->io->error = EIO; 1066 } 1067 1068 assert(fio_thread->iocq_count < fio_thread->iocq_size); 1069 fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; 1070 } 1071 1072 static void 1073 spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) 1074 { 1075 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; 1076 1077 fio_req->iov_offset = sgl_offset; 1078 fio_req->bit_bucket_data_len = 0; 1079 } 1080 1081 static int 1082 spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) 1083 { 1084 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; 1085 struct io_u *io_u = fio_req->io; 1086 uint32_t iov_len; 1087 uint32_t bit_bucket_len; 1088 1089 *address = io_u->buf; 1090 1091 if (fio_req->iov_offset) { 1092 assert(fio_req->iov_offset <= io_u->xfer_buflen); 1093 *address += fio_req->iov_offset; 1094 } 1095 1096 iov_len = io_u->xfer_buflen - fio_req->iov_offset; 1097 if (iov_len > g_spdk_sge_size) { 1098 iov_len = g_spdk_sge_size; 1099 } 1100 1101 if ((fio_req->bit_bucket_data_len < g_spdk_bit_bucket_data_len) && (io_u->ddir == DDIR_READ)) { 1102 assert(g_spdk_bit_bucket_data_len < io_u->xfer_buflen); 1103 *address = (void *)UINT64_MAX; 1104 bit_bucket_len = g_spdk_bit_bucket_data_len - fio_req->bit_bucket_data_len; 1105 if (iov_len > bit_bucket_len) { 1106 iov_len = bit_bucket_len; 1107 } 1108 fio_req->bit_bucket_data_len += iov_len; 1109 } 1110 1111 fio_req->iov_offset += iov_len; 1112 *length = iov_len; 1113 1114 return 0; 1115 } 1116 1117 #if FIO_IOOPS_VERSION >= 24 1118 typedef enum fio_q_status fio_q_status_t; 1119 #else 1120 typedef int fio_q_status_t; 1121 #endif 1122 1123 static fio_q_status_t 1124 spdk_fio_queue(struct thread_data *td, struct io_u *io_u) 1125 { 1126 int rc = 1; 1127 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1128 struct spdk_fio_request *fio_req = io_u->engine_data; 1129 struct spdk_fio_qpair *fio_qpair; 1130 struct spdk_nvme_ns *ns = NULL; 1131 void *md_buf = NULL; 1132 struct spdk_dif_ctx *dif_ctx = &fio_req->dif_ctx; 1133 #if FIO_HAS_FDP 1134 struct spdk_nvme_ns_cmd_ext_io_opts ext_opts; 1135 #endif 1136 struct spdk_nvme_dsm_range *range; 1137 uint32_t block_size; 1138 uint64_t lba; 1139 uint32_t lba_count; 1140 uint32_t num_range; 1141 1142 fio_qpair = get_fio_qpair(fio_thread, io_u->file); 1143 if (fio_qpair == NULL) { 1144 return -ENXIO; 1145 } 1146 ns = fio_qpair->ns; 1147 1148 if (fio_qpair->nvme_pi_enabled && !fio_qpair->extended_lba) { 1149 md_buf = fio_req->md_buf; 1150 } 1151 fio_req->fio_qpair = fio_qpair; 1152 1153 block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags); 1154 lba = io_u->offset / block_size; 1155 lba_count = io_u->xfer_buflen / block_size; 1156 1157 #if FIO_HAS_FDP 1158 /* Only SGL support for write command with directives */ 1159 if (io_u->ddir == DDIR_WRITE && io_u->dtype && !g_spdk_enable_sgl) { 1160 log_err("spdk/nvme: queue() directives require SGL to be enabled\n"); 1161 io_u->error = -EINVAL; 1162 return FIO_Q_COMPLETED; 1163 } 1164 #endif 1165 1166 /* TODO: considering situations that fio will randomize and verify io_u */ 1167 if (fio_qpair->nvme_pi_enabled) { 1168 if (fio_qpair->extended_lba) { 1169 rc = fio_extended_lba_setup_pi(fio_qpair, io_u); 1170 } else { 1171 rc = fio_separate_md_setup_pi(fio_qpair, io_u); 1172 } 1173 if (rc < 0) { 1174 io_u->error = -rc; 1175 return FIO_Q_COMPLETED; 1176 } 1177 } 1178 1179 switch (io_u->ddir) { 1180 case DDIR_READ: 1181 if (!g_spdk_enable_sgl) { 1182 rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, lba_count, 1183 spdk_fio_completion_cb, fio_req, 1184 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1185 } else { 1186 rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, 1187 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1188 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1189 dif_ctx->apptag_mask, dif_ctx->app_tag); 1190 } 1191 break; 1192 case DDIR_WRITE: 1193 if (!g_spdk_enable_sgl) { 1194 if (!fio_qpair->zone_append_enabled) { 1195 rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, 1196 lba_count, 1197 spdk_fio_completion_cb, fio_req, 1198 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1199 } else { 1200 uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns); 1201 rc = spdk_nvme_zns_zone_append_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, zslba, 1202 lba_count, 1203 spdk_fio_completion_cb, fio_req, 1204 fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); 1205 } 1206 } else { 1207 if (!fio_qpair->zone_append_enabled) { 1208 #if FIO_HAS_FDP 1209 if (spdk_unlikely(io_u->dtype)) { 1210 ext_opts.size = SPDK_SIZEOF(&ext_opts, cdw13); 1211 ext_opts.io_flags = fio_qpair->io_flags | (io_u->dtype << 20); 1212 ext_opts.metadata = md_buf; 1213 ext_opts.cdw13 = (io_u->dspec << 16); 1214 ext_opts.apptag = dif_ctx->app_tag; 1215 ext_opts.apptag_mask = dif_ctx->apptag_mask; 1216 rc = spdk_nvme_ns_cmd_writev_ext(ns, fio_qpair->qpair, lba, lba_count, 1217 spdk_fio_completion_cb, fio_req, 1218 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, &ext_opts); 1219 break; 1220 } 1221 #endif 1222 rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, 1223 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1224 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1225 dif_ctx->apptag_mask, dif_ctx->app_tag); 1226 } else { 1227 uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns); 1228 rc = spdk_nvme_zns_zone_appendv_with_md(ns, fio_qpair->qpair, zslba, 1229 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, 1230 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, 1231 dif_ctx->apptag_mask, dif_ctx->app_tag); 1232 } 1233 } 1234 break; 1235 case DDIR_TRIM: 1236 if (td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM) { 1237 do_io_u_trim(td, io_u); 1238 io_u_mark_submit(td, 1); 1239 io_u_mark_complete(td, 1); 1240 return FIO_Q_COMPLETED; 1241 } 1242 1243 range = fio_req->dsm_range; 1244 #if FIO_HAS_MRT 1245 if (td->o.num_range == 1) { 1246 range->attributes.raw = 0; 1247 range->length = lba_count; 1248 range->starting_lba = lba; 1249 num_range = 1; 1250 } else { 1251 struct trim_range *tr = (struct trim_range *)io_u->xfer_buf; 1252 for (uint32_t i = 0; i < io_u->number_trim; i++) { 1253 range->attributes.raw = 0; 1254 range->length = tr->len / block_size; 1255 range->starting_lba = tr->start / block_size; 1256 range++; 1257 tr++; 1258 } 1259 num_range = io_u->number_trim; 1260 range = fio_req->dsm_range; 1261 } 1262 #else 1263 range->attributes.raw = 0; 1264 range->length = lba_count; 1265 range->starting_lba = lba; 1266 num_range = 1; 1267 #endif 1268 1269 rc = spdk_nvme_ns_cmd_dataset_management(ns, fio_qpair->qpair, 1270 SPDK_NVME_DSM_ATTR_DEALLOCATE, range, num_range, 1271 spdk_fio_completion_cb, fio_req); 1272 break; 1273 default: 1274 assert(false); 1275 break; 1276 } 1277 1278 /* NVMe read/write functions return -ENOMEM if there are no free requests. */ 1279 if (rc == -ENOMEM) { 1280 return FIO_Q_BUSY; 1281 } 1282 1283 if (rc != 0) { 1284 io_u->error = abs(rc); 1285 return FIO_Q_COMPLETED; 1286 } 1287 1288 return FIO_Q_QUEUED; 1289 } 1290 1291 static struct io_u * 1292 spdk_fio_event(struct thread_data *td, int event) 1293 { 1294 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1295 1296 assert(event >= 0); 1297 assert((unsigned)event < fio_thread->iocq_count); 1298 return fio_thread->iocq[event]; 1299 } 1300 1301 static int 1302 spdk_fio_getevents(struct thread_data *td, unsigned int min, 1303 unsigned int max, const struct timespec *t) 1304 { 1305 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1306 struct spdk_fio_qpair *fio_qpair = NULL; 1307 struct timespec t0, t1; 1308 uint64_t timeout = 0; 1309 1310 if (t) { 1311 timeout = t->tv_sec * 1000000000L + t->tv_nsec; 1312 clock_gettime(CLOCK_MONOTONIC_RAW, &t0); 1313 } 1314 1315 fio_thread->iocq_count = 0; 1316 1317 /* fetch the next qpair */ 1318 if (fio_thread->fio_qpair_current) { 1319 fio_qpair = TAILQ_NEXT(fio_thread->fio_qpair_current, link); 1320 } 1321 1322 for (;;) { 1323 if (fio_qpair == NULL) { 1324 fio_qpair = TAILQ_FIRST(&fio_thread->fio_qpair); 1325 } 1326 1327 while (fio_qpair != NULL) { 1328 /* 1329 * We can be called while spdk_fio_open()s are still 1330 * ongoing, in which case, ->qpair can still be NULL. 1331 */ 1332 if (fio_qpair->qpair == NULL) { 1333 fio_qpair = TAILQ_NEXT(fio_qpair, link); 1334 continue; 1335 } 1336 1337 spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); 1338 1339 if (fio_thread->iocq_count >= min) { 1340 /* reset the current handling qpair */ 1341 fio_thread->fio_qpair_current = fio_qpair; 1342 return fio_thread->iocq_count; 1343 } 1344 1345 fio_qpair = TAILQ_NEXT(fio_qpair, link); 1346 } 1347 1348 if (t) { 1349 uint64_t elapse; 1350 1351 clock_gettime(CLOCK_MONOTONIC_RAW, &t1); 1352 elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) 1353 + t1.tv_nsec - t0.tv_nsec; 1354 if (elapse > timeout) { 1355 break; 1356 } 1357 } 1358 } 1359 1360 /* reset the current handling qpair */ 1361 fio_thread->fio_qpair_current = fio_qpair; 1362 return fio_thread->iocq_count; 1363 } 1364 1365 static int 1366 spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) 1367 { 1368 /* TODO: This should probably send a flush to the device, but for now just return successful. */ 1369 return 0; 1370 } 1371 1372 #if FIO_HAS_ZBD 1373 static int 1374 spdk_fio_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model) 1375 { 1376 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1377 struct spdk_fio_qpair *fio_qpair = NULL; 1378 const struct spdk_nvme_zns_ns_data *zns_data = NULL; 1379 1380 if (f->filetype != FIO_TYPE_BLOCK) { 1381 log_info("spdk/nvme: unsupported filetype: %d\n", f->filetype); 1382 return -EINVAL; 1383 } 1384 1385 fio_qpair = get_fio_qpair(fio_thread, f); 1386 if (!fio_qpair) { 1387 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1388 return -ENODEV; 1389 } 1390 1391 switch (spdk_nvme_ns_get_csi(fio_qpair->ns)) { 1392 case SPDK_NVME_CSI_NVM: 1393 *model = ZBD_NONE; 1394 return 0; 1395 1396 case SPDK_NVME_CSI_KV: 1397 log_err("spdk/nvme: KV namespace is currently not supported\n"); 1398 return -ENOSYS; 1399 1400 case SPDK_NVME_CSI_ZNS: 1401 zns_data = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1402 if (!zns_data) { 1403 log_err("spdk/nvme: file_name: '%s', ZNS is not enabled\n", f->file_name); 1404 return -EINVAL; 1405 } 1406 1407 *model = ZBD_HOST_MANAGED; 1408 1409 return 0; 1410 } 1411 1412 return -EINVAL; 1413 } 1414 1415 static int 1416 spdk_fio_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset, 1417 struct zbd_zone *zbdz, unsigned int nr_zones) 1418 { 1419 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1420 struct spdk_fio_qpair *fio_qpair = NULL; 1421 const struct spdk_nvme_zns_ns_data *zns = NULL; 1422 struct spdk_nvme_zns_zone_report *report; 1423 struct spdk_nvme_qpair *tmp_qpair; 1424 uint32_t report_nzones = 0, report_nzones_max, report_nbytes, mdts_nbytes; 1425 uint64_t zsze_nbytes, ns_nzones, lba_nbytes; 1426 int completed = 0, err; 1427 1428 fio_qpair = get_fio_qpair(fio_thread, f); 1429 if (!fio_qpair) { 1430 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1431 return -ENODEV; 1432 } 1433 zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1434 if (!zns) { 1435 log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name); 1436 return -EINVAL; 1437 } 1438 1439 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 1440 * Create a temporary qpair in order to perform report zones. 1441 */ 1442 assert(!fio_qpair->qpair); 1443 1444 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_qpair->fio_ctrlr->ctrlr, NULL, 0); 1445 if (!tmp_qpair) { 1446 log_err("spdk/nvme: cannot allocate a temporary qpair\n"); 1447 return -EIO; 1448 } 1449 1450 /** Retrieve device parameters */ 1451 mdts_nbytes = spdk_nvme_ns_get_max_io_xfer_size(fio_qpair->ns); 1452 lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns); 1453 zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns); 1454 ns_nzones = spdk_nvme_zns_ns_get_num_zones(fio_qpair->ns); 1455 1456 /** Allocate report-buffer without exceeding mdts, zbdz-storage, and what is needed */ 1457 report_nzones_max = (mdts_nbytes - sizeof(*report)) / sizeof(report->descs[0]); 1458 report_nzones_max = spdk_min(spdk_min(report_nzones_max, nr_zones), ns_nzones); 1459 report_nbytes = sizeof(report->descs[0]) * report_nzones_max + sizeof(*report); 1460 report = calloc(1, report_nbytes); 1461 if (!report) { 1462 log_err("spdk/nvme: failed report_zones(): ENOMEM\n"); 1463 err = -ENOMEM; 1464 goto exit; 1465 } 1466 1467 err = spdk_nvme_zns_report_zones(fio_qpair->ns, tmp_qpair, report, report_nbytes, 1468 offset / lba_nbytes, SPDK_NVME_ZRA_LIST_ALL, true, pcu_cb, 1469 &completed); 1470 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 1471 log_err("spdk/nvme: report_zones(): err: %d, cpl: %d\n", err, completed); 1472 err = err ? err : -EIO; 1473 goto exit; 1474 } 1475 assert(report->nr_zones <= report_nzones_max); 1476 report_nzones = report->nr_zones; 1477 1478 for (uint64_t idx = 0; idx < report->nr_zones; ++idx) { 1479 struct spdk_nvme_zns_zone_desc *zdesc = &report->descs[idx]; 1480 1481 zbdz[idx].start = zdesc->zslba * lba_nbytes; 1482 zbdz[idx].len = zsze_nbytes; 1483 zbdz[idx].capacity = zdesc->zcap * lba_nbytes; 1484 zbdz[idx].wp = zdesc->wp * lba_nbytes; 1485 1486 switch (zdesc->zt) { 1487 case SPDK_NVME_ZONE_TYPE_SEQWR: 1488 zbdz[idx].type = ZBD_ZONE_TYPE_SWR; 1489 break; 1490 1491 default: 1492 log_err("spdk/nvme: %s: inv. zone-type: 0x%x\n", f->file_name, zdesc->zt); 1493 err = -EIO; 1494 goto exit; 1495 } 1496 1497 switch (zdesc->zs) { 1498 case SPDK_NVME_ZONE_STATE_EMPTY: 1499 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY; 1500 break; 1501 case SPDK_NVME_ZONE_STATE_IOPEN: 1502 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN; 1503 break; 1504 case SPDK_NVME_ZONE_STATE_EOPEN: 1505 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN; 1506 break; 1507 case SPDK_NVME_ZONE_STATE_CLOSED: 1508 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED; 1509 break; 1510 case SPDK_NVME_ZONE_STATE_RONLY: 1511 zbdz[idx].cond = ZBD_ZONE_COND_READONLY; 1512 break; 1513 case SPDK_NVME_ZONE_STATE_FULL: 1514 zbdz[idx].cond = ZBD_ZONE_COND_FULL; 1515 break; 1516 case SPDK_NVME_ZONE_STATE_OFFLINE: 1517 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE; 1518 break; 1519 1520 default: 1521 log_err("spdk/nvme: %s: inv. zone-state: 0x%x\n", f->file_name, zdesc->zs); 1522 err = -EIO; 1523 goto exit; 1524 } 1525 } 1526 1527 exit: 1528 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 1529 free(report); 1530 1531 return err ? err : (int)report_nzones; 1532 } 1533 1534 static int 1535 spdk_fio_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset, uint64_t length) 1536 { 1537 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1538 struct spdk_fio_qpair *fio_qpair = NULL; 1539 const struct spdk_nvme_zns_ns_data *zns = NULL; 1540 uint64_t zsze_nbytes, lba_nbytes; 1541 int err = 0; 1542 1543 fio_qpair = get_fio_qpair(fio_thread, f); 1544 if (!fio_qpair) { 1545 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1546 return -ENODEV; 1547 } 1548 zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns); 1549 if (!zns) { 1550 log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name); 1551 return -EINVAL; 1552 } 1553 zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns); 1554 lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns); 1555 1556 /** check the assumption that offset is valid zone-start lba */ 1557 if (offset % zsze_nbytes) { 1558 log_err("spdk/nvme: offset: %zu is not a valid zslba\n", offset); 1559 return -EINVAL; 1560 } 1561 1562 for (uint64_t cur = offset; cur < offset + length; cur += zsze_nbytes) { 1563 int completed = 0; 1564 1565 err = spdk_nvme_zns_reset_zone(fio_qpair->ns, fio_qpair->qpair, cur / lba_nbytes, 1566 false, pcu_cb, &completed); 1567 if (err || pcu(fio_qpair->qpair, &completed) || completed < 0) { 1568 log_err("spdk/nvme: zns_reset_zone(): err: %d, cpl: %d\n", err, completed); 1569 err = err ? err : -EIO; 1570 break; 1571 } 1572 } 1573 1574 return err; 1575 } 1576 #endif 1577 1578 #if FIO_IOOPS_VERSION >= 30 1579 static int 1580 spdk_fio_get_max_open_zones(struct thread_data *td, struct fio_file *f, 1581 unsigned int *max_open_zones) 1582 { 1583 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1584 struct spdk_fio_qpair *fio_qpair = NULL; 1585 1586 fio_qpair = get_fio_qpair(fio_thread, f); 1587 if (!fio_qpair) { 1588 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1589 return -ENODEV; 1590 } 1591 1592 *max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(fio_qpair->ns); 1593 1594 return 0; 1595 } 1596 #endif 1597 1598 #if FIO_HAS_FDP 1599 /** 1600 * This is called twice as the number of ruhs descriptors are unknown. 1601 * In the first call fio only sends a buffer to fetch the number of ruhs 1602 * descriptors. In the second call fio will send a buffer to fetch all the 1603 * ruhs descriptors. 1604 */ 1605 static int 1606 spdk_fio_fdp_fetch_ruhs(struct thread_data *td, struct fio_file *f, 1607 struct fio_ruhs_info *fruhs_info) 1608 { 1609 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1610 struct spdk_fio_qpair *fio_qpair = NULL; 1611 struct spdk_nvme_qpair *tmp_qpair; 1612 struct spdk_nvme_fdp_ruhs *fdp_ruhs; 1613 uint32_t ruhs_nbytes; 1614 uint16_t idx, nruhsd; 1615 int completed = 0, err; 1616 1617 fio_qpair = get_fio_qpair(fio_thread, f); 1618 if (!fio_qpair) { 1619 log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name); 1620 return -ENODEV; 1621 } 1622 1623 /* qpair has not been allocated yet (it gets allocated in spdk_fio_open()). 1624 * Create a temporary qpair in order to perform report zones. 1625 */ 1626 assert(!fio_qpair->qpair); 1627 1628 tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_qpair->fio_ctrlr->ctrlr, NULL, 0); 1629 if (!tmp_qpair) { 1630 log_err("spdk/nvme: cannot allocate a temporary qpair\n"); 1631 return -EIO; 1632 } 1633 1634 nruhsd = fruhs_info->nr_ruhs; 1635 ruhs_nbytes = sizeof(*fdp_ruhs) + nruhsd * sizeof(struct spdk_nvme_fdp_ruhs_desc); 1636 fdp_ruhs = calloc(1, ruhs_nbytes); 1637 if (!fdp_ruhs) { 1638 log_err("spdk/nvme: failed fdp_fetch_ruhs(): ENOMEM\n"); 1639 err = -ENOMEM; 1640 goto exit; 1641 } 1642 1643 err = spdk_nvme_ns_cmd_io_mgmt_recv(fio_qpair->ns, tmp_qpair, fdp_ruhs, ruhs_nbytes, 1644 SPDK_NVME_FDP_IO_MGMT_RECV_RUHS, 0, pcu_cb, &completed); 1645 if (err || pcu(tmp_qpair, &completed) || completed < 0) { 1646 log_err("spdk/nvme: fetch_ruhs(): err: %d, cpl: %d\n", err, completed); 1647 err = err ? err : -EIO; 1648 goto exit; 1649 } 1650 1651 fruhs_info->nr_ruhs = fdp_ruhs->nruhsd; 1652 for (idx = 0; idx < nruhsd; idx++) { 1653 fruhs_info->plis[idx] = fdp_ruhs->ruhs_desc[idx].pid; 1654 } 1655 1656 exit: 1657 spdk_nvme_ctrlr_free_io_qpair(tmp_qpair); 1658 free(fdp_ruhs); 1659 1660 return err; 1661 } 1662 #endif 1663 1664 static void 1665 spdk_fio_cleanup(struct thread_data *td) 1666 { 1667 struct spdk_fio_thread *fio_thread = td->io_ops_data; 1668 struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; 1669 struct spdk_fio_options *fio_options = td->eo; 1670 1671 if (fio_options->spdk_tracing) { 1672 spdk_trace_unregister_user_thread(); 1673 } 1674 1675 TAILQ_FOREACH_SAFE(fio_qpair, &fio_thread->fio_qpair, link, fio_qpair_tmp) { 1676 TAILQ_REMOVE(&fio_thread->fio_qpair, fio_qpair, link); 1677 free(fio_qpair); 1678 } 1679 1680 free(fio_thread->iocq); 1681 free(fio_thread); 1682 1683 pthread_mutex_lock(&g_mutex); 1684 g_td_count--; 1685 if (g_td_count == 0) { 1686 struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; 1687 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 1688 1689 TAILQ_FOREACH_SAFE(fio_ctrlr, &g_ctrlrs, link, fio_ctrlr_tmp) { 1690 TAILQ_REMOVE(&g_ctrlrs, fio_ctrlr, link); 1691 spdk_nvme_detach_async(fio_ctrlr->ctrlr, &detach_ctx); 1692 free(fio_ctrlr); 1693 } 1694 1695 if (detach_ctx) { 1696 spdk_nvme_detach_poll(detach_ctx); 1697 } 1698 1699 if (fio_options->enable_vmd) { 1700 spdk_vmd_fini(); 1701 } 1702 } 1703 pthread_mutex_unlock(&g_mutex); 1704 if (TAILQ_EMPTY(&g_ctrlrs)) { 1705 if (pthread_cancel(g_ctrlr_thread_id) == 0) { 1706 pthread_join(g_ctrlr_thread_id, NULL); 1707 } 1708 } 1709 } 1710 1711 /* This function enables addition of SPDK parameters to the fio config 1712 * Adding new parameters by defining them here and defining a callback 1713 * function to read the parameter value. */ 1714 static struct fio_option options[] = { 1715 { 1716 .name = "enable_wrr", 1717 .lname = "Enable weighted round robin (WRR) for IO submission queues", 1718 .type = FIO_OPT_INT, 1719 .off1 = offsetof(struct spdk_fio_options, enable_wrr), 1720 .def = "0", 1721 .help = "Enable weighted round robin (WRR) for IO submission queues", 1722 .category = FIO_OPT_C_ENGINE, 1723 .group = FIO_OPT_G_INVALID, 1724 }, 1725 { 1726 .name = "arbitration_burst", 1727 .lname = "Arbitration Burst", 1728 .type = FIO_OPT_INT, 1729 .off1 = offsetof(struct spdk_fio_options, arbitration_burst), 1730 .def = "0", 1731 .help = "Arbitration Burst used for WRR (valid range from 0 - 7)", 1732 .category = FIO_OPT_C_ENGINE, 1733 .group = FIO_OPT_G_INVALID, 1734 }, 1735 { 1736 .name = "low_weight", 1737 .lname = "low_weight for WRR", 1738 .type = FIO_OPT_INT, 1739 .off1 = offsetof(struct spdk_fio_options, low_weight), 1740 .def = "0", 1741 .help = "low_weight used for WRR (valid range from 0 - 255)", 1742 .category = FIO_OPT_C_ENGINE, 1743 .group = FIO_OPT_G_INVALID, 1744 }, 1745 { 1746 .name = "medium_weight", 1747 .lname = "medium_weight for WRR", 1748 .type = FIO_OPT_INT, 1749 .off1 = offsetof(struct spdk_fio_options, medium_weight), 1750 .def = "0", 1751 .help = "medium weight used for WRR (valid range from 0 - 255)", 1752 .category = FIO_OPT_C_ENGINE, 1753 .group = FIO_OPT_G_INVALID, 1754 }, 1755 { 1756 .name = "high_weight", 1757 .lname = "high_weight for WRR", 1758 .type = FIO_OPT_INT, 1759 .off1 = offsetof(struct spdk_fio_options, high_weight), 1760 .def = "0", 1761 .help = "high weight used for WRR (valid range from 0 - 255)", 1762 .category = FIO_OPT_C_ENGINE, 1763 .group = FIO_OPT_G_INVALID, 1764 }, 1765 { 1766 .name = "wrr_priority", 1767 .lname = "priority used for WRR", 1768 .type = FIO_OPT_INT, 1769 .off1 = offsetof(struct spdk_fio_options, wrr_priority), 1770 .def = "0", 1771 .help = "priority used for WRR (valid range from 0-3)", 1772 .category = FIO_OPT_C_ENGINE, 1773 .group = FIO_OPT_G_INVALID, 1774 }, 1775 { 1776 .name = "mem_size_mb", 1777 .lname = "Memory size in MB", 1778 .type = FIO_OPT_INT, 1779 .off1 = offsetof(struct spdk_fio_options, mem_size), 1780 .def = "0", 1781 .help = "Memory Size for SPDK (MB)", 1782 .category = FIO_OPT_C_ENGINE, 1783 .group = FIO_OPT_G_INVALID, 1784 }, 1785 { 1786 .name = "shm_id", 1787 .lname = "shared memory ID", 1788 .type = FIO_OPT_INT, 1789 .off1 = offsetof(struct spdk_fio_options, shm_id), 1790 .def = "-1", 1791 .help = "Shared Memory ID", 1792 .category = FIO_OPT_C_ENGINE, 1793 .group = FIO_OPT_G_INVALID, 1794 }, 1795 { 1796 .name = "enable_sgl", 1797 .lname = "SGL used for I/O commands", 1798 .type = FIO_OPT_INT, 1799 .off1 = offsetof(struct spdk_fio_options, enable_sgl), 1800 .def = "0", 1801 .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", 1802 .category = FIO_OPT_C_ENGINE, 1803 .group = FIO_OPT_G_INVALID, 1804 }, 1805 { 1806 .name = "sge_size", 1807 .lname = "SGL size used for I/O commands", 1808 .type = FIO_OPT_INT, 1809 .off1 = offsetof(struct spdk_fio_options, sge_size), 1810 .def = "4096", 1811 .help = "SGL size in bytes for I/O Commands (default 4096)", 1812 .category = FIO_OPT_C_ENGINE, 1813 .group = FIO_OPT_G_INVALID, 1814 }, 1815 { 1816 .name = "disable_pcie_sgl_merge", 1817 .lname = "Disable merging of physically contiguous SGL elements", 1818 .type = FIO_OPT_INT, 1819 .off1 = offsetof(struct spdk_fio_options, disable_pcie_sgl_merge), 1820 .def = "0", 1821 .help = "Disable SGL element merging (0=merging, 1=no merging)", 1822 .category = FIO_OPT_C_ENGINE, 1823 .group = FIO_OPT_G_INVALID, 1824 }, 1825 { 1826 .name = "bit_bucket_data_len", 1827 .lname = "Amount of data used for Bit Bucket", 1828 .type = FIO_OPT_INT, 1829 .off1 = offsetof(struct spdk_fio_options, bit_bucket_data_len), 1830 .def = "0", 1831 .help = "Bit Bucket Data Length for READ commands (disabled by default)", 1832 .category = FIO_OPT_C_ENGINE, 1833 .group = FIO_OPT_G_INVALID, 1834 }, 1835 { 1836 .name = "hostnqn", 1837 .lname = "Host NQN to use when connecting to controllers.", 1838 .type = FIO_OPT_STR_STORE, 1839 .off1 = offsetof(struct spdk_fio_options, hostnqn), 1840 .help = "Host NQN", 1841 .category = FIO_OPT_C_ENGINE, 1842 .group = FIO_OPT_G_INVALID, 1843 }, 1844 { 1845 .name = "pi_act", 1846 .lname = "Protection Information Action", 1847 .type = FIO_OPT_INT, 1848 .off1 = offsetof(struct spdk_fio_options, pi_act), 1849 .def = "1", 1850 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", 1851 .category = FIO_OPT_C_ENGINE, 1852 .group = FIO_OPT_G_INVALID, 1853 }, 1854 { 1855 .name = "pi_chk", 1856 .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", 1857 .type = FIO_OPT_STR_STORE, 1858 .off1 = offsetof(struct spdk_fio_options, pi_chk), 1859 .def = NULL, 1860 .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", 1861 .category = FIO_OPT_C_ENGINE, 1862 .group = FIO_OPT_G_INVALID, 1863 }, 1864 { 1865 .name = "md_per_io_size", 1866 .lname = "Separate Metadata Buffer Size per I/O", 1867 .type = FIO_OPT_INT, 1868 .off1 = offsetof(struct spdk_fio_options, md_per_io_size), 1869 .def = "4096", 1870 .help = "Size of separate metadata buffer per I/O (Default: 4096)", 1871 .category = FIO_OPT_C_ENGINE, 1872 .group = FIO_OPT_G_INVALID, 1873 }, 1874 { 1875 .name = "apptag", 1876 .lname = "Application Tag used in Protection Information", 1877 .type = FIO_OPT_INT, 1878 .off1 = offsetof(struct spdk_fio_options, apptag), 1879 .def = "0x1234", 1880 .help = "Application Tag used in Protection Information field (Default: 0x1234)", 1881 .category = FIO_OPT_C_ENGINE, 1882 .group = FIO_OPT_G_INVALID, 1883 }, 1884 { 1885 .name = "apptag_mask", 1886 .lname = "Application Tag Mask", 1887 .type = FIO_OPT_INT, 1888 .off1 = offsetof(struct spdk_fio_options, apptag_mask), 1889 .def = "0xffff", 1890 .help = "Application Tag Mask used with Application Tag (Default: 0xffff)", 1891 .category = FIO_OPT_C_ENGINE, 1892 .group = FIO_OPT_G_INVALID, 1893 }, 1894 { 1895 .name = "digest_enable", 1896 .lname = "PDU digest choice for NVMe/TCP Transport(NONE|HEADER|DATA|BOTH)", 1897 .type = FIO_OPT_STR_STORE, 1898 .off1 = offsetof(struct spdk_fio_options, digest_enable), 1899 .def = NULL, 1900 .help = "Control the NVMe/TCP control(digest_enable=NONE|HEADER|DATA|BOTH)", 1901 .category = FIO_OPT_C_ENGINE, 1902 .group = FIO_OPT_G_INVALID, 1903 }, 1904 { 1905 .name = "enable_vmd", 1906 .lname = "Enable VMD enumeration", 1907 .type = FIO_OPT_INT, 1908 .off1 = offsetof(struct spdk_fio_options, enable_vmd), 1909 .def = "0", 1910 .help = "Enable VMD enumeration (enable_vmd=1 or enable_vmd=0)", 1911 .category = FIO_OPT_C_ENGINE, 1912 .group = FIO_OPT_G_INVALID, 1913 }, 1914 { 1915 .name = "initial_zone_reset", 1916 .lname = "Reset Zones on initialization", 1917 .type = FIO_OPT_INT, 1918 .off1 = offsetof(struct spdk_fio_options, initial_zone_reset), 1919 .def = "0", 1920 .help = "Reset Zones on initialization (0=disable, 1=Reset All Zones)", 1921 .category = FIO_OPT_C_ENGINE, 1922 .group = FIO_OPT_G_INVALID, 1923 }, 1924 { 1925 .name = "zone_append", 1926 .lname = "Use zone append instead of write", 1927 .type = FIO_OPT_INT, 1928 .off1 = offsetof(struct spdk_fio_options, zone_append), 1929 .def = "0", 1930 .help = "Use zone append instead of write (1=zone append, 0=write)", 1931 .category = FIO_OPT_C_ENGINE, 1932 .group = FIO_OPT_G_INVALID, 1933 }, 1934 { 1935 .name = "print_qid_mappings", 1936 .lname = "Print job-to-qid mappings", 1937 .type = FIO_OPT_INT, 1938 .off1 = offsetof(struct spdk_fio_options, print_qid_mappings), 1939 .def = "0", 1940 .help = "Print job-to-qid mappings (0=disable, 1=enable)", 1941 .category = FIO_OPT_C_ENGINE, 1942 .group = FIO_OPT_G_INVALID, 1943 }, 1944 { 1945 .name = "log_flags", 1946 .lname = "log_flags", 1947 .type = FIO_OPT_STR_STORE, 1948 .off1 = offsetof(struct spdk_fio_options, log_flags), 1949 .help = "Enable log flags (comma-separated list)", 1950 .category = FIO_OPT_C_ENGINE, 1951 .group = FIO_OPT_G_INVALID, 1952 }, 1953 { 1954 .name = "spdk_tracing", 1955 .lname = "Enable SPDK Tracing", 1956 .type = FIO_OPT_INT, 1957 .off1 = offsetof(struct spdk_fio_options, spdk_tracing), 1958 .def = "0", 1959 .help = "SPDK Tracing (0=disable, 1=enable)", 1960 .category = FIO_OPT_C_ENGINE, 1961 .group = FIO_OPT_G_INVALID, 1962 }, 1963 { 1964 .name = NULL, 1965 }, 1966 }; 1967 1968 /* FIO imports this structure using dlsym */ 1969 struct ioengine_ops ioengine = { 1970 .name = "spdk", 1971 .version = FIO_IOOPS_VERSION, 1972 .queue = spdk_fio_queue, 1973 .getevents = spdk_fio_getevents, 1974 .event = spdk_fio_event, 1975 .cleanup = spdk_fio_cleanup, 1976 .open_file = spdk_fio_open, 1977 .close_file = spdk_fio_close, 1978 .invalidate = spdk_fio_invalidate, 1979 .iomem_alloc = spdk_fio_iomem_alloc, 1980 .iomem_free = spdk_fio_iomem_free, 1981 .setup = spdk_fio_setup, 1982 .init = spdk_fio_init, 1983 .io_u_init = spdk_fio_io_u_init, 1984 .io_u_free = spdk_fio_io_u_free, 1985 #if FIO_HAS_ZBD 1986 .get_zoned_model = spdk_fio_get_zoned_model, 1987 .report_zones = spdk_fio_report_zones, 1988 .reset_wp = spdk_fio_reset_wp, 1989 #endif 1990 #if FIO_IOOPS_VERSION >= 30 1991 .get_max_open_zones = spdk_fio_get_max_open_zones, 1992 #endif 1993 #if FIO_HAS_FDP 1994 .fdp_fetch_ruhs = spdk_fio_fdp_fetch_ruhs, 1995 #endif 1996 #if FIO_HAS_MRT 1997 .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO | FIO_MULTI_RANGE_TRIM, 1998 #else 1999 .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN | FIO_DISKLESSIO, 2000 #endif 2001 .options = options, 2002 .option_struct_size = sizeof(struct spdk_fio_options), 2003 }; 2004 2005 static void fio_init 2006 fio_spdk_register(void) 2007 { 2008 register_ioengine(&ioengine); 2009 } 2010 2011 static void fio_exit 2012 fio_spdk_unregister(void) 2013 { 2014 if (g_spdk_env_initialized) { 2015 spdk_trace_cleanup(); 2016 spdk_env_fini(); 2017 } 2018 2019 unregister_ioengine(&ioengine); 2020 } 2021 2022 SPDK_LOG_REGISTER_COMPONENT(fio_nvme) 2023