1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/nvme.h" 9 #include "spdk/env.h" 10 #include "spdk/string.h" 11 #include "spdk/pci_ids.h" 12 13 struct ctrlr_entry { 14 struct spdk_nvme_ctrlr *ctrlr; 15 TAILQ_ENTRY(ctrlr_entry) link; 16 char name[1024]; 17 }; 18 19 struct ns_entry { 20 struct spdk_nvme_ns *ns; 21 struct spdk_nvme_ctrlr *ctrlr; 22 TAILQ_ENTRY(ns_entry) link; 23 uint32_t io_size_blocks; 24 uint64_t size_in_ios; 25 char name[1024]; 26 }; 27 28 struct ns_worker_ctx { 29 struct ns_entry *entry; 30 struct spdk_nvme_qpair *qpair; 31 uint64_t io_completed; 32 uint64_t io_completed_error; 33 uint64_t io_submitted; 34 uint64_t current_queue_depth; 35 uint64_t offset_in_ios; 36 bool is_draining; 37 38 TAILQ_ENTRY(ns_worker_ctx) link; 39 }; 40 41 struct reset_task { 42 struct ns_worker_ctx *ns_ctx; 43 void *buf; 44 }; 45 46 struct worker_thread { 47 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 48 unsigned lcore; 49 }; 50 51 static struct spdk_mempool *task_pool; 52 53 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 54 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 55 static int g_num_namespaces = 0; 56 static struct worker_thread *g_worker = NULL; 57 static bool g_qemu_ssd_found = false; 58 59 static uint64_t g_tsc_rate; 60 61 static int g_io_size_bytes; 62 static int g_rw_percentage; 63 static int g_is_random; 64 static int g_queue_depth; 65 static int g_time_in_sec; 66 67 #define TASK_POOL_NUM 8192 68 69 static void 70 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 71 { 72 struct ns_entry *entry; 73 const struct spdk_nvme_ctrlr_data *cdata; 74 75 if (!spdk_nvme_ns_is_active(ns)) { 76 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns)); 77 return; 78 } 79 80 entry = malloc(sizeof(struct ns_entry)); 81 if (entry == NULL) { 82 perror("ns_entry malloc"); 83 exit(1); 84 } 85 86 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 87 88 entry->ns = ns; 89 entry->ctrlr = ctrlr; 90 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / 91 g_io_size_bytes; 92 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 93 94 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 95 96 g_num_namespaces++; 97 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 98 } 99 100 static void 101 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 102 { 103 int nsid; 104 struct spdk_nvme_ns *ns; 105 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); 106 107 if (entry == NULL) { 108 perror("ctrlr_entry malloc"); 109 exit(1); 110 } 111 112 entry->ctrlr = ctrlr; 113 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 114 115 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; 116 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 117 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 118 if (ns == NULL) { 119 continue; 120 } 121 register_ns(ctrlr, ns); 122 } 123 } 124 125 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 126 127 static __thread unsigned int seed = 0; 128 129 static void 130 submit_single_io(struct ns_worker_ctx *ns_ctx) 131 { 132 struct reset_task *task = NULL; 133 uint64_t offset_in_ios; 134 int rc; 135 struct ns_entry *entry = ns_ctx->entry; 136 137 task = spdk_mempool_get(task_pool); 138 if (!task) { 139 fprintf(stderr, "Failed to get task from task_pool\n"); 140 exit(1); 141 } 142 143 task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 144 if (!task->buf) { 145 spdk_free(task->buf); 146 fprintf(stderr, "task->buf spdk_zmalloc failed\n"); 147 exit(1); 148 } 149 150 task->ns_ctx = ns_ctx; 151 152 if (g_is_random) { 153 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 154 } else { 155 offset_in_ios = ns_ctx->offset_in_ios++; 156 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 157 ns_ctx->offset_in_ios = 0; 158 } 159 } 160 161 if ((g_rw_percentage == 100) || 162 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { 163 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, 164 offset_in_ios * entry->io_size_blocks, 165 entry->io_size_blocks, io_complete, task, 0); 166 } else { 167 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, 168 offset_in_ios * entry->io_size_blocks, 169 entry->io_size_blocks, io_complete, task, 0); 170 } 171 172 if (rc != 0) { 173 fprintf(stderr, "starting I/O failed\n"); 174 } else { 175 ns_ctx->current_queue_depth++; 176 ns_ctx->io_submitted++; 177 } 178 } 179 180 static void 181 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion) 182 { 183 struct ns_worker_ctx *ns_ctx; 184 185 ns_ctx = task->ns_ctx; 186 ns_ctx->current_queue_depth--; 187 188 if (spdk_nvme_cpl_is_error(completion)) { 189 ns_ctx->io_completed_error++; 190 } else { 191 ns_ctx->io_completed++; 192 } 193 194 spdk_free(task->buf); 195 spdk_mempool_put(task_pool, task); 196 197 /* 198 * is_draining indicates when time has expired for the test run 199 * and we are just waiting for the previously submitted I/O 200 * to complete. In this case, do not submit a new I/O to replace 201 * the one just completed. 202 */ 203 if (!ns_ctx->is_draining) { 204 submit_single_io(ns_ctx); 205 } 206 } 207 208 static void 209 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 210 { 211 task_complete((struct reset_task *)ctx, completion); 212 } 213 214 static void 215 check_io(struct ns_worker_ctx *ns_ctx) 216 { 217 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); 218 } 219 220 static void 221 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 222 { 223 while (queue_depth-- > 0) { 224 submit_single_io(ns_ctx); 225 } 226 } 227 228 static void 229 drain_io(struct ns_worker_ctx *ns_ctx) 230 { 231 ns_ctx->is_draining = true; 232 while (ns_ctx->current_queue_depth > 0) { 233 check_io(ns_ctx); 234 } 235 } 236 237 static int 238 work_fn(void *arg) 239 { 240 uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 241 struct worker_thread *worker = (struct worker_thread *)arg; 242 struct ns_worker_ctx *ns_ctx = NULL; 243 bool did_reset = false; 244 245 printf("Starting thread on core %u\n", worker->lcore); 246 247 /* Submit initial I/O for each namespace. */ 248 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 249 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0); 250 if (ns_ctx->qpair == NULL) { 251 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore); 252 return -1; 253 } 254 submit_io(ns_ctx, g_queue_depth); 255 } 256 257 while (1) { 258 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) { 259 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 260 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) { 261 fprintf(stderr, "nvme reset failed.\n"); 262 return -1; 263 } 264 } 265 did_reset = true; 266 } 267 268 /* 269 * Check for completed I/O for each controller. A new 270 * I/O will be submitted in the io_complete callback 271 * to replace each I/O that is completed. 272 */ 273 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 274 check_io(ns_ctx); 275 } 276 277 if (spdk_get_ticks() > tsc_end) { 278 break; 279 } 280 } 281 282 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 283 drain_io(ns_ctx); 284 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 285 } 286 287 return 0; 288 } 289 290 static void 291 usage(char *program_name) 292 { 293 printf("%s options", program_name); 294 printf("\n"); 295 printf("\t[-q io depth]\n"); 296 printf("\t[-o io size in bytes]\n"); 297 printf("\t[-w io pattern type, must be one of\n"); 298 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 299 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 300 printf("\t[-t time in seconds(should be larger than 15 seconds)]\n"); 301 printf("\t\t(default:0 - unlimited)\n"); 302 } 303 304 static int 305 print_stats(void) 306 { 307 uint64_t io_completed, io_submitted, io_completed_error; 308 uint64_t total_completed_io, total_submitted_io, total_completed_err_io; 309 struct worker_thread *worker; 310 struct ns_worker_ctx *ns_ctx; 311 312 total_completed_io = 0; 313 total_submitted_io = 0; 314 total_completed_err_io = 0; 315 316 worker = g_worker; 317 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 318 io_completed = ns_ctx->io_completed; 319 io_submitted = ns_ctx->io_submitted; 320 io_completed_error = ns_ctx->io_completed_error; 321 total_completed_io += io_completed; 322 total_submitted_io += io_submitted; 323 total_completed_err_io += io_completed_error; 324 } 325 326 printf("========================================================\n"); 327 printf("%16" PRIu64 " IO completed successfully\n", total_completed_io); 328 printf("%16" PRIu64 " IO completed with error\n", total_completed_err_io); 329 printf("--------------------------------------------------------\n"); 330 printf("%16" PRIu64 " IO completed total\n", total_completed_io + total_completed_err_io); 331 printf("%16" PRIu64 " IO submitted\n", total_submitted_io); 332 333 if (total_submitted_io != (total_completed_io + total_completed_err_io)) { 334 fprintf(stderr, "Some IO are missing......\n"); 335 return -1; 336 } 337 338 return 0; 339 } 340 341 static int 342 parse_args(int argc, char **argv) 343 { 344 const char *workload_type; 345 int op; 346 bool mix_specified = false; 347 long int val; 348 349 /* default value */ 350 g_queue_depth = 0; 351 g_io_size_bytes = 0; 352 workload_type = NULL; 353 g_time_in_sec = 0; 354 g_rw_percentage = -1; 355 356 while ((op = getopt(argc, argv, "o:q:t:w:M:")) != -1) { 357 if (op == 'w') { 358 workload_type = optarg; 359 } else if (op == '?') { 360 usage(argv[0]); 361 return -EINVAL; 362 } else { 363 val = spdk_strtol(optarg, 10); 364 if (val < 0) { 365 fprintf(stderr, "Converting a string to integer failed\n"); 366 return val; 367 } 368 switch (op) { 369 case 'q': 370 g_queue_depth = val; 371 break; 372 case 'o': 373 g_io_size_bytes = val; 374 break; 375 case 't': 376 g_time_in_sec = val; 377 break; 378 case 'M': 379 g_rw_percentage = val; 380 mix_specified = true; 381 break; 382 default: 383 usage(argv[0]); 384 return -EINVAL; 385 } 386 } 387 } 388 389 if (!g_queue_depth) { 390 usage(argv[0]); 391 return 1; 392 } 393 if (!g_io_size_bytes) { 394 usage(argv[0]); 395 return 1; 396 } 397 if (!workload_type) { 398 usage(argv[0]); 399 return 1; 400 } 401 if (!g_time_in_sec) { 402 usage(argv[0]); 403 return 1; 404 } 405 406 if (strcmp(workload_type, "read") && 407 strcmp(workload_type, "write") && 408 strcmp(workload_type, "randread") && 409 strcmp(workload_type, "randwrite") && 410 strcmp(workload_type, "rw") && 411 strcmp(workload_type, "randrw")) { 412 fprintf(stderr, 413 "io pattern type must be one of\n" 414 "(read, write, randread, randwrite, rw, randrw)\n"); 415 return 1; 416 } 417 418 if (!strcmp(workload_type, "read") || 419 !strcmp(workload_type, "randread")) { 420 g_rw_percentage = 100; 421 } 422 423 if (!strcmp(workload_type, "write") || 424 !strcmp(workload_type, "randwrite")) { 425 g_rw_percentage = 0; 426 } 427 428 if (!strcmp(workload_type, "read") || 429 !strcmp(workload_type, "randread") || 430 !strcmp(workload_type, "write") || 431 !strcmp(workload_type, "randwrite")) { 432 if (mix_specified) { 433 fprintf(stderr, "Ignoring -M option... Please use -M option" 434 " only when using rw or randrw.\n"); 435 } 436 } 437 438 if (!strcmp(workload_type, "rw") || 439 !strcmp(workload_type, "randrw")) { 440 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 441 fprintf(stderr, 442 "-M must be specified to value from 0 to 100 " 443 "for rw or randrw.\n"); 444 return 1; 445 } 446 } 447 448 if (!strcmp(workload_type, "read") || 449 !strcmp(workload_type, "write") || 450 !strcmp(workload_type, "rw")) { 451 g_is_random = 0; 452 } else { 453 g_is_random = 1; 454 } 455 456 return 0; 457 } 458 459 static int 460 register_worker(void) 461 { 462 struct worker_thread *worker; 463 464 worker = malloc(sizeof(struct worker_thread)); 465 if (worker == NULL) { 466 perror("worker_thread malloc"); 467 return -1; 468 } 469 470 memset(worker, 0, sizeof(struct worker_thread)); 471 TAILQ_INIT(&worker->ns_ctx); 472 worker->lcore = spdk_env_get_current_core(); 473 474 g_worker = worker; 475 476 return 0; 477 } 478 479 480 static bool 481 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 482 struct spdk_nvme_ctrlr_opts *opts) 483 { 484 opts->disable_error_logging = true; 485 return true; 486 } 487 488 static void 489 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 490 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 491 { 492 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 493 struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); 494 495 /* QEMU emulated SSDs can't handle this test, so we will skip them. QEMU NVMe SSDs 496 * report themselves as VID == (Intel|Red Hat). So we need to check this specific 497 * (0x5845|0x0010) device ID to know whether it's QEMU or not. 498 */ 499 if ((spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL && 500 spdk_pci_device_get_device_id(dev) == 0x5845) || 501 (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_REDHAT && 502 spdk_pci_device_get_device_id(dev) == 0x0010)) { 503 g_qemu_ssd_found = true; 504 printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr); 505 return; 506 } 507 } 508 509 register_ctrlr(ctrlr); 510 } 511 512 static int 513 register_controllers(void) 514 { 515 printf("Initializing NVMe Controllers\n"); 516 517 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 518 fprintf(stderr, "spdk_nvme_probe() failed\n"); 519 return 1; 520 } 521 522 return 0; 523 } 524 525 static void 526 unregister_controllers(void) 527 { 528 struct ctrlr_entry *entry, *tmp; 529 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 530 531 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 532 TAILQ_REMOVE(&g_controllers, entry, link); 533 spdk_nvme_detach_async(entry->ctrlr, &detach_ctx); 534 free(entry); 535 } 536 537 if (detach_ctx) { 538 spdk_nvme_detach_poll(detach_ctx); 539 } 540 } 541 542 static int 543 associate_workers_with_ns(void) 544 { 545 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 546 struct worker_thread *worker = g_worker; 547 struct ns_worker_ctx *ns_ctx; 548 int i, count; 549 550 count = g_num_namespaces; 551 552 for (i = 0; i < count; i++) { 553 if (entry == NULL) { 554 break; 555 } 556 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 557 if (!ns_ctx) { 558 return -1; 559 } 560 memset(ns_ctx, 0, sizeof(*ns_ctx)); 561 562 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 563 ns_ctx->entry = entry; 564 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 565 566 entry = TAILQ_NEXT(entry, link);; 567 if (entry == NULL) { 568 entry = TAILQ_FIRST(&g_namespaces); 569 } 570 } 571 572 return 0; 573 } 574 575 static void 576 unregister_worker(void) 577 { 578 struct ns_worker_ctx *ns_ctx, *tmp; 579 580 assert(g_worker != NULL); 581 582 TAILQ_FOREACH_SAFE(ns_ctx, &g_worker->ns_ctx, link, tmp) { 583 TAILQ_REMOVE(&g_worker->ns_ctx, ns_ctx, link); 584 free(ns_ctx); 585 } 586 587 free(g_worker); 588 g_worker = NULL; 589 } 590 591 static int 592 run_nvme_reset_cycle(void) 593 { 594 struct worker_thread *worker = g_worker; 595 struct ns_worker_ctx *ns_ctx; 596 597 if (work_fn(worker) != 0) { 598 return -1; 599 } 600 601 if (print_stats() != 0) { 602 return -1; 603 } 604 605 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 606 ns_ctx->io_completed = 0; 607 ns_ctx->io_completed_error = 0; 608 ns_ctx->io_submitted = 0; 609 ns_ctx->is_draining = false; 610 } 611 612 return 0; 613 } 614 615 static void 616 free_tasks(void) 617 { 618 if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) { 619 fprintf(stderr, "task_pool count is %zu but should be %d\n", 620 spdk_mempool_count(task_pool), TASK_POOL_NUM); 621 } 622 spdk_mempool_free(task_pool); 623 } 624 625 int 626 main(int argc, char **argv) 627 { 628 int rc; 629 int i; 630 struct spdk_env_opts opts; 631 632 633 rc = parse_args(argc, argv); 634 if (rc != 0) { 635 return rc; 636 } 637 638 opts.opts_size = sizeof(opts); 639 spdk_env_opts_init(&opts); 640 opts.name = "reset"; 641 opts.core_mask = "0x1"; 642 opts.shm_id = 0; 643 if (spdk_env_init(&opts) < 0) { 644 fprintf(stderr, "Unable to initialize SPDK env\n"); 645 return 1; 646 } 647 648 if (register_controllers() != 0) { 649 return 1; 650 } 651 652 if (TAILQ_EMPTY(&g_controllers)) { 653 printf("No NVMe controller found, %s exiting\n", argv[0]); 654 return g_qemu_ssd_found ? 0 : 1; 655 } 656 657 task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM, 658 sizeof(struct reset_task), 659 64, SPDK_ENV_NUMA_ID_ANY); 660 if (!task_pool) { 661 fprintf(stderr, "Cannot create task pool\n"); 662 return 1; 663 } 664 665 g_tsc_rate = spdk_get_ticks_hz(); 666 667 if (register_worker() != 0) { 668 return 1; 669 } 670 671 if (associate_workers_with_ns() != 0) { 672 rc = 1; 673 goto cleanup; 674 } 675 676 printf("Initialization complete. Launching workers.\n"); 677 678 for (i = 2; i >= 0; i--) { 679 rc = run_nvme_reset_cycle(); 680 if (rc != 0) { 681 goto cleanup; 682 } 683 } 684 685 cleanup: 686 unregister_controllers(); 687 unregister_worker(); 688 free_tasks(); 689 690 if (rc != 0) { 691 fprintf(stderr, "%s: errors occurred\n", argv[0]); 692 } 693 694 return rc; 695 } 696