1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/nvme.h" 9 #include "spdk/env.h" 10 #include "spdk/string.h" 11 #include "spdk/pci_ids.h" 12 13 struct ctrlr_entry { 14 struct spdk_nvme_ctrlr *ctrlr; 15 TAILQ_ENTRY(ctrlr_entry) link; 16 char name[1024]; 17 }; 18 19 struct ns_entry { 20 struct spdk_nvme_ns *ns; 21 struct spdk_nvme_ctrlr *ctrlr; 22 TAILQ_ENTRY(ns_entry) link; 23 uint32_t io_size_blocks; 24 uint64_t size_in_ios; 25 char name[1024]; 26 }; 27 28 struct ns_worker_ctx { 29 struct ns_entry *entry; 30 struct spdk_nvme_qpair *qpair; 31 uint64_t io_completed; 32 uint64_t io_completed_error; 33 uint64_t io_submitted; 34 uint64_t current_queue_depth; 35 uint64_t offset_in_ios; 36 bool is_draining; 37 38 TAILQ_ENTRY(ns_worker_ctx) link; 39 }; 40 41 struct reset_task { 42 struct ns_worker_ctx *ns_ctx; 43 void *buf; 44 }; 45 46 struct worker_thread { 47 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 48 unsigned lcore; 49 }; 50 51 static struct spdk_mempool *task_pool; 52 53 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 54 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 55 static int g_num_namespaces = 0; 56 static struct worker_thread *g_worker = NULL; 57 static bool g_qemu_ssd_found = false; 58 59 static uint64_t g_tsc_rate; 60 61 static int g_io_size_bytes; 62 static int g_rw_percentage; 63 static int g_is_random; 64 static int g_queue_depth; 65 static int g_time_in_sec; 66 67 #define TASK_POOL_NUM 8192 68 69 static void 70 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 71 { 72 struct ns_entry *entry; 73 const struct spdk_nvme_ctrlr_data *cdata; 74 75 if (!spdk_nvme_ns_is_active(ns)) { 76 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns)); 77 return; 78 } 79 80 entry = malloc(sizeof(struct ns_entry)); 81 if (entry == NULL) { 82 perror("ns_entry malloc"); 83 exit(1); 84 } 85 86 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 87 88 entry->ns = ns; 89 entry->ctrlr = ctrlr; 90 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / 91 g_io_size_bytes; 92 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 93 94 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 95 96 g_num_namespaces++; 97 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 98 } 99 100 static void 101 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 102 { 103 int nsid; 104 struct spdk_nvme_ns *ns; 105 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); 106 107 if (entry == NULL) { 108 perror("ctrlr_entry malloc"); 109 exit(1); 110 } 111 112 entry->ctrlr = ctrlr; 113 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 114 115 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; 116 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 117 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 118 if (ns == NULL) { 119 continue; 120 } 121 register_ns(ctrlr, ns); 122 } 123 } 124 125 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 126 127 static __thread unsigned int seed = 0; 128 129 static void 130 submit_single_io(struct ns_worker_ctx *ns_ctx) 131 { 132 struct reset_task *task = NULL; 133 uint64_t offset_in_ios; 134 int rc; 135 struct ns_entry *entry = ns_ctx->entry; 136 137 task = spdk_mempool_get(task_pool); 138 if (!task) { 139 fprintf(stderr, "Failed to get task from task_pool\n"); 140 exit(1); 141 } 142 143 task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 144 if (!task->buf) { 145 spdk_free(task->buf); 146 fprintf(stderr, "task->buf spdk_zmalloc failed\n"); 147 exit(1); 148 } 149 150 task->ns_ctx = ns_ctx; 151 task->ns_ctx->io_submitted++; 152 153 if (g_is_random) { 154 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 155 } else { 156 offset_in_ios = ns_ctx->offset_in_ios++; 157 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 158 ns_ctx->offset_in_ios = 0; 159 } 160 } 161 162 if ((g_rw_percentage == 100) || 163 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { 164 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, 165 offset_in_ios * entry->io_size_blocks, 166 entry->io_size_blocks, io_complete, task, 0); 167 } else { 168 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, 169 offset_in_ios * entry->io_size_blocks, 170 entry->io_size_blocks, io_complete, task, 0); 171 } 172 173 if (rc != 0) { 174 fprintf(stderr, "starting I/O failed\n"); 175 } else { 176 ns_ctx->current_queue_depth++; 177 } 178 } 179 180 static void 181 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion) 182 { 183 struct ns_worker_ctx *ns_ctx; 184 185 ns_ctx = task->ns_ctx; 186 ns_ctx->current_queue_depth--; 187 188 if (spdk_nvme_cpl_is_error(completion)) { 189 ns_ctx->io_completed_error++; 190 } else { 191 ns_ctx->io_completed++; 192 } 193 194 spdk_free(task->buf); 195 spdk_mempool_put(task_pool, task); 196 197 /* 198 * is_draining indicates when time has expired for the test run 199 * and we are just waiting for the previously submitted I/O 200 * to complete. In this case, do not submit a new I/O to replace 201 * the one just completed. 202 */ 203 if (!ns_ctx->is_draining) { 204 submit_single_io(ns_ctx); 205 } 206 } 207 208 static void 209 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 210 { 211 task_complete((struct reset_task *)ctx, completion); 212 } 213 214 static void 215 check_io(struct ns_worker_ctx *ns_ctx) 216 { 217 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); 218 } 219 220 static void 221 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 222 { 223 while (queue_depth-- > 0) { 224 submit_single_io(ns_ctx); 225 } 226 } 227 228 static void 229 drain_io(struct ns_worker_ctx *ns_ctx) 230 { 231 ns_ctx->is_draining = true; 232 while (ns_ctx->current_queue_depth > 0) { 233 check_io(ns_ctx); 234 } 235 } 236 237 static int 238 work_fn(void *arg) 239 { 240 uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 241 struct worker_thread *worker = (struct worker_thread *)arg; 242 struct ns_worker_ctx *ns_ctx = NULL; 243 bool did_reset = false; 244 245 printf("Starting thread on core %u\n", worker->lcore); 246 247 /* Submit initial I/O for each namespace. */ 248 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 249 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0); 250 if (ns_ctx->qpair == NULL) { 251 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore); 252 return -1; 253 } 254 submit_io(ns_ctx, g_queue_depth); 255 } 256 257 while (1) { 258 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) { 259 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 260 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) { 261 fprintf(stderr, "nvme reset failed.\n"); 262 return -1; 263 } 264 } 265 did_reset = true; 266 } 267 268 /* 269 * Check for completed I/O for each controller. A new 270 * I/O will be submitted in the io_complete callback 271 * to replace each I/O that is completed. 272 */ 273 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 274 check_io(ns_ctx); 275 } 276 277 if (spdk_get_ticks() > tsc_end) { 278 break; 279 } 280 } 281 282 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 283 drain_io(ns_ctx); 284 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 285 } 286 287 return 0; 288 } 289 290 static void 291 usage(char *program_name) 292 { 293 printf("%s options", program_name); 294 printf("\n"); 295 printf("\t[-q io depth]\n"); 296 printf("\t[-o io size in bytes]\n"); 297 printf("\t[-w io pattern type, must be one of\n"); 298 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 299 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 300 printf("\t[-t time in seconds(should be larger than 15 seconds)]\n"); 301 printf("\t\t(default:0 - unlimited)\n"); 302 } 303 304 static int 305 print_stats(void) 306 { 307 uint64_t io_completed, io_submitted, io_completed_error; 308 uint64_t total_completed_io, total_submitted_io, total_completed_err_io; 309 struct worker_thread *worker; 310 struct ns_worker_ctx *ns_ctx; 311 312 total_completed_io = 0; 313 total_submitted_io = 0; 314 total_completed_err_io = 0; 315 316 worker = g_worker; 317 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 318 io_completed = ns_ctx->io_completed; 319 io_submitted = ns_ctx->io_submitted; 320 io_completed_error = ns_ctx->io_completed_error; 321 total_completed_io += io_completed; 322 total_submitted_io += io_submitted; 323 total_completed_err_io += io_completed_error; 324 } 325 326 printf("========================================================\n"); 327 printf("%16" PRIu64 " IO completed successfully\n", total_completed_io); 328 printf("%16" PRIu64 " IO completed with error\n", total_completed_err_io); 329 printf("--------------------------------------------------------\n"); 330 printf("%16" PRIu64 " IO completed total\n", total_completed_io + total_completed_err_io); 331 printf("%16" PRIu64 " IO submitted\n", total_submitted_io); 332 333 if (total_submitted_io != (total_completed_io + total_completed_err_io)) { 334 fprintf(stderr, "Some IO are missing......\n"); 335 return -1; 336 } 337 338 return 0; 339 } 340 341 static int 342 parse_args(int argc, char **argv) 343 { 344 const char *workload_type; 345 int op; 346 bool mix_specified = false; 347 long int val; 348 349 /* default value */ 350 g_queue_depth = 0; 351 g_io_size_bytes = 0; 352 workload_type = NULL; 353 g_time_in_sec = 0; 354 g_rw_percentage = -1; 355 356 while ((op = getopt(argc, argv, "o:q:t:w:M:")) != -1) { 357 if (op == 'w') { 358 workload_type = optarg; 359 } else if (op == '?') { 360 usage(argv[0]); 361 return -EINVAL; 362 } else { 363 val = spdk_strtol(optarg, 10); 364 if (val < 0) { 365 fprintf(stderr, "Converting a string to integer failed\n"); 366 return val; 367 } 368 switch (op) { 369 case 'q': 370 g_queue_depth = val; 371 break; 372 case 'o': 373 g_io_size_bytes = val; 374 break; 375 case 't': 376 g_time_in_sec = val; 377 break; 378 case 'M': 379 g_rw_percentage = val; 380 mix_specified = true; 381 break; 382 default: 383 usage(argv[0]); 384 return -EINVAL; 385 } 386 } 387 } 388 389 if (!g_queue_depth) { 390 usage(argv[0]); 391 return 1; 392 } 393 if (!g_io_size_bytes) { 394 usage(argv[0]); 395 return 1; 396 } 397 if (!workload_type) { 398 usage(argv[0]); 399 return 1; 400 } 401 if (!g_time_in_sec) { 402 usage(argv[0]); 403 return 1; 404 } 405 406 if (strcmp(workload_type, "read") && 407 strcmp(workload_type, "write") && 408 strcmp(workload_type, "randread") && 409 strcmp(workload_type, "randwrite") && 410 strcmp(workload_type, "rw") && 411 strcmp(workload_type, "randrw")) { 412 fprintf(stderr, 413 "io pattern type must be one of\n" 414 "(read, write, randread, randwrite, rw, randrw)\n"); 415 return 1; 416 } 417 418 if (!strcmp(workload_type, "read") || 419 !strcmp(workload_type, "randread")) { 420 g_rw_percentage = 100; 421 } 422 423 if (!strcmp(workload_type, "write") || 424 !strcmp(workload_type, "randwrite")) { 425 g_rw_percentage = 0; 426 } 427 428 if (!strcmp(workload_type, "read") || 429 !strcmp(workload_type, "randread") || 430 !strcmp(workload_type, "write") || 431 !strcmp(workload_type, "randwrite")) { 432 if (mix_specified) { 433 fprintf(stderr, "Ignoring -M option... Please use -M option" 434 " only when using rw or randrw.\n"); 435 } 436 } 437 438 if (!strcmp(workload_type, "rw") || 439 !strcmp(workload_type, "randrw")) { 440 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 441 fprintf(stderr, 442 "-M must be specified to value from 0 to 100 " 443 "for rw or randrw.\n"); 444 return 1; 445 } 446 } 447 448 if (!strcmp(workload_type, "read") || 449 !strcmp(workload_type, "write") || 450 !strcmp(workload_type, "rw")) { 451 g_is_random = 0; 452 } else { 453 g_is_random = 1; 454 } 455 456 return 0; 457 } 458 459 static int 460 register_worker(void) 461 { 462 struct worker_thread *worker; 463 464 worker = malloc(sizeof(struct worker_thread)); 465 if (worker == NULL) { 466 perror("worker_thread malloc"); 467 return -1; 468 } 469 470 memset(worker, 0, sizeof(struct worker_thread)); 471 TAILQ_INIT(&worker->ns_ctx); 472 worker->lcore = spdk_env_get_current_core(); 473 474 g_worker = worker; 475 476 return 0; 477 } 478 479 480 static bool 481 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 482 struct spdk_nvme_ctrlr_opts *opts) 483 { 484 opts->disable_error_logging = true; 485 return true; 486 } 487 488 static void 489 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 490 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 491 { 492 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 493 struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); 494 495 /* QEMU emulated SSDs can't handle this test, so we will skip 496 * them. QEMU NVMe SSDs report themselves as VID == Intel. So we need 497 * to check this specific 0x5845 device ID to know whether it's QEMU 498 * or not. 499 */ 500 if (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL && 501 spdk_pci_device_get_device_id(dev) == 0x5845) { 502 g_qemu_ssd_found = true; 503 printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr); 504 return; 505 } 506 } 507 508 register_ctrlr(ctrlr); 509 } 510 511 static int 512 register_controllers(void) 513 { 514 printf("Initializing NVMe Controllers\n"); 515 516 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 517 fprintf(stderr, "spdk_nvme_probe() failed\n"); 518 return 1; 519 } 520 521 return 0; 522 } 523 524 static void 525 unregister_controllers(void) 526 { 527 struct ctrlr_entry *entry, *tmp; 528 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 529 530 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 531 TAILQ_REMOVE(&g_controllers, entry, link); 532 spdk_nvme_detach_async(entry->ctrlr, &detach_ctx); 533 free(entry); 534 } 535 536 if (detach_ctx) { 537 spdk_nvme_detach_poll(detach_ctx); 538 } 539 } 540 541 static int 542 associate_workers_with_ns(void) 543 { 544 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 545 struct worker_thread *worker = g_worker; 546 struct ns_worker_ctx *ns_ctx; 547 int i, count; 548 549 count = g_num_namespaces; 550 551 for (i = 0; i < count; i++) { 552 if (entry == NULL) { 553 break; 554 } 555 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 556 if (!ns_ctx) { 557 return -1; 558 } 559 memset(ns_ctx, 0, sizeof(*ns_ctx)); 560 561 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 562 ns_ctx->entry = entry; 563 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 564 565 entry = TAILQ_NEXT(entry, link);; 566 if (entry == NULL) { 567 entry = TAILQ_FIRST(&g_namespaces); 568 } 569 } 570 571 return 0; 572 } 573 574 static void 575 unregister_worker(void) 576 { 577 struct ns_worker_ctx *ns_ctx, *tmp; 578 579 assert(g_worker != NULL); 580 581 TAILQ_FOREACH_SAFE(ns_ctx, &g_worker->ns_ctx, link, tmp) { 582 TAILQ_REMOVE(&g_worker->ns_ctx, ns_ctx, link); 583 free(ns_ctx); 584 } 585 586 free(g_worker); 587 g_worker = NULL; 588 } 589 590 static int 591 run_nvme_reset_cycle(void) 592 { 593 struct worker_thread *worker = g_worker; 594 struct ns_worker_ctx *ns_ctx; 595 596 if (work_fn(worker) != 0) { 597 return -1; 598 } 599 600 if (print_stats() != 0) { 601 return -1; 602 } 603 604 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 605 ns_ctx->io_completed = 0; 606 ns_ctx->io_completed_error = 0; 607 ns_ctx->io_submitted = 0; 608 ns_ctx->is_draining = false; 609 } 610 611 return 0; 612 } 613 614 static void 615 free_tasks(void) 616 { 617 if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) { 618 fprintf(stderr, "task_pool count is %zu but should be %d\n", 619 spdk_mempool_count(task_pool), TASK_POOL_NUM); 620 } 621 spdk_mempool_free(task_pool); 622 } 623 624 int 625 main(int argc, char **argv) 626 { 627 int rc; 628 int i; 629 struct spdk_env_opts opts; 630 631 632 rc = parse_args(argc, argv); 633 if (rc != 0) { 634 return rc; 635 } 636 637 spdk_env_opts_init(&opts); 638 opts.name = "reset"; 639 opts.core_mask = "0x1"; 640 opts.shm_id = 0; 641 if (spdk_env_init(&opts) < 0) { 642 fprintf(stderr, "Unable to initialize SPDK env\n"); 643 return 1; 644 } 645 646 if (register_controllers() != 0) { 647 return 1; 648 } 649 650 if (TAILQ_EMPTY(&g_controllers)) { 651 printf("No NVMe controller found, %s exiting\n", argv[0]); 652 return g_qemu_ssd_found ? 0 : 1; 653 } 654 655 task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM, 656 sizeof(struct reset_task), 657 64, SPDK_ENV_SOCKET_ID_ANY); 658 if (!task_pool) { 659 fprintf(stderr, "Cannot create task pool\n"); 660 return 1; 661 } 662 663 g_tsc_rate = spdk_get_ticks_hz(); 664 665 if (register_worker() != 0) { 666 return 1; 667 } 668 669 if (associate_workers_with_ns() != 0) { 670 rc = 1; 671 goto cleanup; 672 } 673 674 printf("Initialization complete. Launching workers.\n"); 675 676 for (i = 2; i >= 0; i--) { 677 rc = run_nvme_reset_cycle(); 678 if (rc != 0) { 679 goto cleanup; 680 } 681 } 682 683 cleanup: 684 unregister_controllers(); 685 unregister_worker(); 686 free_tasks(); 687 688 if (rc != 0) { 689 fprintf(stderr, "%s: errors occurred\n", argv[0]); 690 } 691 692 return rc; 693 } 694