1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/nvme.h" 9 #include "spdk/env.h" 10 #include "spdk/string.h" 11 #include "spdk/pci_ids.h" 12 13 struct ctrlr_entry { 14 struct spdk_nvme_ctrlr *ctrlr; 15 TAILQ_ENTRY(ctrlr_entry) link; 16 char name[1024]; 17 }; 18 19 struct ns_entry { 20 struct spdk_nvme_ns *ns; 21 struct spdk_nvme_ctrlr *ctrlr; 22 TAILQ_ENTRY(ns_entry) link; 23 uint32_t io_size_blocks; 24 uint64_t size_in_ios; 25 char name[1024]; 26 }; 27 28 struct ns_worker_ctx { 29 struct ns_entry *entry; 30 struct spdk_nvme_qpair *qpair; 31 uint64_t io_completed; 32 uint64_t io_completed_error; 33 uint64_t io_submitted; 34 uint64_t current_queue_depth; 35 uint64_t offset_in_ios; 36 bool is_draining; 37 38 TAILQ_ENTRY(ns_worker_ctx) link; 39 }; 40 41 struct reset_task { 42 struct ns_worker_ctx *ns_ctx; 43 void *buf; 44 }; 45 46 struct worker_thread { 47 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 48 unsigned lcore; 49 }; 50 51 static struct spdk_mempool *task_pool; 52 53 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 54 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 55 static int g_num_namespaces = 0; 56 static struct worker_thread *g_worker = NULL; 57 static bool g_qemu_ssd_found = false; 58 59 static uint64_t g_tsc_rate; 60 61 static int g_io_size_bytes; 62 static int g_rw_percentage; 63 static int g_is_random; 64 static int g_queue_depth; 65 static int g_time_in_sec; 66 67 #define TASK_POOL_NUM 8192 68 69 static void 70 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 71 { 72 struct ns_entry *entry; 73 const struct spdk_nvme_ctrlr_data *cdata; 74 75 if (!spdk_nvme_ns_is_active(ns)) { 76 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns)); 77 return; 78 } 79 80 entry = malloc(sizeof(struct ns_entry)); 81 if (entry == NULL) { 82 perror("ns_entry malloc"); 83 exit(1); 84 } 85 86 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 87 88 entry->ns = ns; 89 entry->ctrlr = ctrlr; 90 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / 91 g_io_size_bytes; 92 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 93 94 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 95 96 g_num_namespaces++; 97 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 98 } 99 100 static void 101 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 102 { 103 int nsid; 104 struct spdk_nvme_ns *ns; 105 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); 106 107 if (entry == NULL) { 108 perror("ctrlr_entry malloc"); 109 exit(1); 110 } 111 112 entry->ctrlr = ctrlr; 113 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 114 115 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; 116 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 117 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 118 if (ns == NULL) { 119 continue; 120 } 121 register_ns(ctrlr, ns); 122 } 123 } 124 125 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 126 127 static __thread unsigned int seed = 0; 128 129 static void 130 submit_single_io(struct ns_worker_ctx *ns_ctx) 131 { 132 struct reset_task *task = NULL; 133 uint64_t offset_in_ios; 134 int rc; 135 struct ns_entry *entry = ns_ctx->entry; 136 137 task = spdk_mempool_get(task_pool); 138 if (!task) { 139 fprintf(stderr, "Failed to get task from task_pool\n"); 140 exit(1); 141 } 142 143 task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 144 if (!task->buf) { 145 spdk_free(task->buf); 146 fprintf(stderr, "task->buf spdk_zmalloc failed\n"); 147 exit(1); 148 } 149 150 task->ns_ctx = ns_ctx; 151 task->ns_ctx->io_submitted++; 152 153 if (g_is_random) { 154 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 155 } else { 156 offset_in_ios = ns_ctx->offset_in_ios++; 157 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 158 ns_ctx->offset_in_ios = 0; 159 } 160 } 161 162 if ((g_rw_percentage == 100) || 163 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { 164 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, 165 offset_in_ios * entry->io_size_blocks, 166 entry->io_size_blocks, io_complete, task, 0); 167 } else { 168 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, 169 offset_in_ios * entry->io_size_blocks, 170 entry->io_size_blocks, io_complete, task, 0); 171 } 172 173 if (rc != 0) { 174 fprintf(stderr, "starting I/O failed\n"); 175 } else { 176 ns_ctx->current_queue_depth++; 177 } 178 } 179 180 static void 181 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion) 182 { 183 struct ns_worker_ctx *ns_ctx; 184 185 ns_ctx = task->ns_ctx; 186 ns_ctx->current_queue_depth--; 187 188 if (spdk_nvme_cpl_is_error(completion)) { 189 ns_ctx->io_completed_error++; 190 } else { 191 ns_ctx->io_completed++; 192 } 193 194 spdk_free(task->buf); 195 spdk_mempool_put(task_pool, task); 196 197 /* 198 * is_draining indicates when time has expired for the test run 199 * and we are just waiting for the previously submitted I/O 200 * to complete. In this case, do not submit a new I/O to replace 201 * the one just completed. 202 */ 203 if (!ns_ctx->is_draining) { 204 submit_single_io(ns_ctx); 205 } 206 } 207 208 static void 209 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 210 { 211 task_complete((struct reset_task *)ctx, completion); 212 } 213 214 static void 215 check_io(struct ns_worker_ctx *ns_ctx) 216 { 217 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); 218 } 219 220 static void 221 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 222 { 223 while (queue_depth-- > 0) { 224 submit_single_io(ns_ctx); 225 } 226 } 227 228 static void 229 drain_io(struct ns_worker_ctx *ns_ctx) 230 { 231 ns_ctx->is_draining = true; 232 while (ns_ctx->current_queue_depth > 0) { 233 check_io(ns_ctx); 234 } 235 } 236 237 static int 238 work_fn(void *arg) 239 { 240 uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 241 struct worker_thread *worker = (struct worker_thread *)arg; 242 struct ns_worker_ctx *ns_ctx = NULL; 243 bool did_reset = false; 244 245 printf("Starting thread on core %u\n", worker->lcore); 246 247 /* Submit initial I/O for each namespace. */ 248 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 249 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0); 250 if (ns_ctx->qpair == NULL) { 251 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore); 252 return -1; 253 } 254 submit_io(ns_ctx, g_queue_depth); 255 } 256 257 while (1) { 258 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) { 259 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 260 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) { 261 fprintf(stderr, "nvme reset failed.\n"); 262 return -1; 263 } 264 } 265 did_reset = true; 266 } 267 268 /* 269 * Check for completed I/O for each controller. A new 270 * I/O will be submitted in the io_complete callback 271 * to replace each I/O that is completed. 272 */ 273 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 274 check_io(ns_ctx); 275 } 276 277 if (spdk_get_ticks() > tsc_end) { 278 break; 279 } 280 } 281 282 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 283 drain_io(ns_ctx); 284 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 285 } 286 287 return 0; 288 } 289 290 static void 291 usage(char *program_name) 292 { 293 printf("%s options", program_name); 294 printf("\n"); 295 printf("\t[-q io depth]\n"); 296 printf("\t[-s io size in bytes]\n"); 297 printf("\t[-w io pattern type, must be one of\n"); 298 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 299 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 300 printf("\t[-t time in seconds(should be larger than 15 seconds)]\n"); 301 printf("\t[-m max completions per poll]\n"); 302 printf("\t\t(default:0 - unlimited)\n"); 303 } 304 305 static int 306 print_stats(void) 307 { 308 uint64_t io_completed, io_submitted, io_completed_error; 309 uint64_t total_completed_io, total_submitted_io, total_completed_err_io; 310 struct worker_thread *worker; 311 struct ns_worker_ctx *ns_ctx; 312 313 total_completed_io = 0; 314 total_submitted_io = 0; 315 total_completed_err_io = 0; 316 317 worker = g_worker; 318 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 319 io_completed = ns_ctx->io_completed; 320 io_submitted = ns_ctx->io_submitted; 321 io_completed_error = ns_ctx->io_completed_error; 322 total_completed_io += io_completed; 323 total_submitted_io += io_submitted; 324 total_completed_err_io += io_completed_error; 325 } 326 327 printf("========================================================\n"); 328 printf("%16" PRIu64 " IO completed successfully\n", total_completed_io); 329 printf("%16" PRIu64 " IO completed with error\n", total_completed_err_io); 330 printf("--------------------------------------------------------\n"); 331 printf("%16" PRIu64 " IO completed total\n", total_completed_io + total_completed_err_io); 332 printf("%16" PRIu64 " IO submitted\n", total_submitted_io); 333 334 if (total_submitted_io != (total_completed_io + total_completed_err_io)) { 335 fprintf(stderr, "Some IO are missing......\n"); 336 return -1; 337 } 338 339 return 0; 340 } 341 342 static int 343 parse_args(int argc, char **argv) 344 { 345 const char *workload_type; 346 int op; 347 bool mix_specified = false; 348 long int val; 349 350 /* default value */ 351 g_queue_depth = 0; 352 g_io_size_bytes = 0; 353 workload_type = NULL; 354 g_time_in_sec = 0; 355 g_rw_percentage = -1; 356 357 while ((op = getopt(argc, argv, "m:q:s:t:w:M:")) != -1) { 358 if (op == 'w') { 359 workload_type = optarg; 360 } else if (op == '?') { 361 usage(argv[0]); 362 return -EINVAL; 363 } else { 364 val = spdk_strtol(optarg, 10); 365 if (val < 0) { 366 fprintf(stderr, "Converting a string to integer failed\n"); 367 return val; 368 } 369 switch (op) { 370 case 'q': 371 g_queue_depth = val; 372 break; 373 case 's': 374 g_io_size_bytes = val; 375 break; 376 case 't': 377 g_time_in_sec = val; 378 break; 379 case 'M': 380 g_rw_percentage = val; 381 mix_specified = true; 382 break; 383 default: 384 usage(argv[0]); 385 return -EINVAL; 386 } 387 } 388 } 389 390 if (!g_queue_depth) { 391 usage(argv[0]); 392 return 1; 393 } 394 if (!g_io_size_bytes) { 395 usage(argv[0]); 396 return 1; 397 } 398 if (!workload_type) { 399 usage(argv[0]); 400 return 1; 401 } 402 if (!g_time_in_sec) { 403 usage(argv[0]); 404 return 1; 405 } 406 407 if (strcmp(workload_type, "read") && 408 strcmp(workload_type, "write") && 409 strcmp(workload_type, "randread") && 410 strcmp(workload_type, "randwrite") && 411 strcmp(workload_type, "rw") && 412 strcmp(workload_type, "randrw")) { 413 fprintf(stderr, 414 "io pattern type must be one of\n" 415 "(read, write, randread, randwrite, rw, randrw)\n"); 416 return 1; 417 } 418 419 if (!strcmp(workload_type, "read") || 420 !strcmp(workload_type, "randread")) { 421 g_rw_percentage = 100; 422 } 423 424 if (!strcmp(workload_type, "write") || 425 !strcmp(workload_type, "randwrite")) { 426 g_rw_percentage = 0; 427 } 428 429 if (!strcmp(workload_type, "read") || 430 !strcmp(workload_type, "randread") || 431 !strcmp(workload_type, "write") || 432 !strcmp(workload_type, "randwrite")) { 433 if (mix_specified) { 434 fprintf(stderr, "Ignoring -M option... Please use -M option" 435 " only when using rw or randrw.\n"); 436 } 437 } 438 439 if (!strcmp(workload_type, "rw") || 440 !strcmp(workload_type, "randrw")) { 441 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 442 fprintf(stderr, 443 "-M must be specified to value from 0 to 100 " 444 "for rw or randrw.\n"); 445 return 1; 446 } 447 } 448 449 if (!strcmp(workload_type, "read") || 450 !strcmp(workload_type, "write") || 451 !strcmp(workload_type, "rw")) { 452 g_is_random = 0; 453 } else { 454 g_is_random = 1; 455 } 456 457 return 0; 458 } 459 460 static int 461 register_worker(void) 462 { 463 struct worker_thread *worker; 464 465 worker = malloc(sizeof(struct worker_thread)); 466 if (worker == NULL) { 467 perror("worker_thread malloc"); 468 return -1; 469 } 470 471 memset(worker, 0, sizeof(struct worker_thread)); 472 TAILQ_INIT(&worker->ns_ctx); 473 worker->lcore = spdk_env_get_current_core(); 474 475 g_worker = worker; 476 477 return 0; 478 } 479 480 481 static bool 482 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 483 struct spdk_nvme_ctrlr_opts *opts) 484 { 485 opts->disable_error_logging = true; 486 return true; 487 } 488 489 static void 490 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 491 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 492 { 493 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 494 struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); 495 496 /* QEMU emulated SSDs can't handle this test, so we will skip 497 * them. QEMU NVMe SSDs report themselves as VID == Intel. So we need 498 * to check this specific 0x5845 device ID to know whether it's QEMU 499 * or not. 500 */ 501 if (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL && 502 spdk_pci_device_get_device_id(dev) == 0x5845) { 503 g_qemu_ssd_found = true; 504 printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr); 505 return; 506 } 507 } 508 509 register_ctrlr(ctrlr); 510 } 511 512 static int 513 register_controllers(void) 514 { 515 printf("Initializing NVMe Controllers\n"); 516 517 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 518 fprintf(stderr, "spdk_nvme_probe() failed\n"); 519 return 1; 520 } 521 522 return 0; 523 } 524 525 static void 526 unregister_controllers(void) 527 { 528 struct ctrlr_entry *entry, *tmp; 529 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 530 531 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 532 TAILQ_REMOVE(&g_controllers, entry, link); 533 spdk_nvme_detach_async(entry->ctrlr, &detach_ctx); 534 free(entry); 535 } 536 537 if (detach_ctx) { 538 spdk_nvme_detach_poll(detach_ctx); 539 } 540 } 541 542 static int 543 associate_workers_with_ns(void) 544 { 545 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 546 struct worker_thread *worker = g_worker; 547 struct ns_worker_ctx *ns_ctx; 548 int i, count; 549 550 count = g_num_namespaces; 551 552 for (i = 0; i < count; i++) { 553 if (entry == NULL) { 554 break; 555 } 556 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 557 if (!ns_ctx) { 558 return -1; 559 } 560 memset(ns_ctx, 0, sizeof(*ns_ctx)); 561 562 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 563 ns_ctx->entry = entry; 564 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 565 566 entry = TAILQ_NEXT(entry, link);; 567 if (entry == NULL) { 568 entry = TAILQ_FIRST(&g_namespaces); 569 } 570 } 571 572 return 0; 573 } 574 575 static void 576 unregister_worker(void) 577 { 578 struct ns_worker_ctx *ns_ctx, *tmp; 579 580 assert(g_worker != NULL); 581 582 TAILQ_FOREACH_SAFE(ns_ctx, &g_worker->ns_ctx, link, tmp) { 583 TAILQ_REMOVE(&g_worker->ns_ctx, ns_ctx, link); 584 free(ns_ctx); 585 } 586 587 free(g_worker); 588 g_worker = NULL; 589 } 590 591 static int 592 run_nvme_reset_cycle(void) 593 { 594 struct worker_thread *worker = g_worker; 595 struct ns_worker_ctx *ns_ctx; 596 597 if (work_fn(worker) != 0) { 598 return -1; 599 } 600 601 if (print_stats() != 0) { 602 return -1; 603 } 604 605 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 606 ns_ctx->io_completed = 0; 607 ns_ctx->io_completed_error = 0; 608 ns_ctx->io_submitted = 0; 609 ns_ctx->is_draining = false; 610 } 611 612 return 0; 613 } 614 615 static void 616 free_tasks(void) 617 { 618 if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) { 619 fprintf(stderr, "task_pool count is %zu but should be %d\n", 620 spdk_mempool_count(task_pool), TASK_POOL_NUM); 621 } 622 spdk_mempool_free(task_pool); 623 } 624 625 int 626 main(int argc, char **argv) 627 { 628 int rc; 629 int i; 630 struct spdk_env_opts opts; 631 632 633 rc = parse_args(argc, argv); 634 if (rc != 0) { 635 return rc; 636 } 637 638 spdk_env_opts_init(&opts); 639 opts.name = "reset"; 640 opts.core_mask = "0x1"; 641 opts.shm_id = 0; 642 if (spdk_env_init(&opts) < 0) { 643 fprintf(stderr, "Unable to initialize SPDK env\n"); 644 return 1; 645 } 646 647 if (register_controllers() != 0) { 648 return 1; 649 } 650 651 if (TAILQ_EMPTY(&g_controllers)) { 652 printf("No NVMe controller found, %s exiting\n", argv[0]); 653 return g_qemu_ssd_found ? 0 : 1; 654 } 655 656 task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM, 657 sizeof(struct reset_task), 658 64, SPDK_ENV_SOCKET_ID_ANY); 659 if (!task_pool) { 660 fprintf(stderr, "Cannot create task pool\n"); 661 return 1; 662 } 663 664 g_tsc_rate = spdk_get_ticks_hz(); 665 666 if (register_worker() != 0) { 667 return 1; 668 } 669 670 if (associate_workers_with_ns() != 0) { 671 rc = 1; 672 goto cleanup; 673 } 674 675 printf("Initialization complete. Launching workers.\n"); 676 677 for (i = 2; i >= 0; i--) { 678 rc = run_nvme_reset_cycle(); 679 if (rc != 0) { 680 goto cleanup; 681 } 682 } 683 684 cleanup: 685 unregister_controllers(); 686 unregister_worker(); 687 free_tasks(); 688 689 if (rc != 0) { 690 fprintf(stderr, "%s: errors occurred\n", argv[0]); 691 } 692 693 return rc; 694 } 695