1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/nvme.h" 37 #include "spdk/env.h" 38 #include "spdk/string.h" 39 #include "spdk/pci_ids.h" 40 41 struct ctrlr_entry { 42 struct spdk_nvme_ctrlr *ctrlr; 43 TAILQ_ENTRY(ctrlr_entry) link; 44 char name[1024]; 45 }; 46 47 struct ns_entry { 48 struct spdk_nvme_ns *ns; 49 struct spdk_nvme_ctrlr *ctrlr; 50 TAILQ_ENTRY(ns_entry) link; 51 uint32_t io_size_blocks; 52 uint64_t size_in_ios; 53 char name[1024]; 54 }; 55 56 struct ns_worker_ctx { 57 struct ns_entry *entry; 58 struct spdk_nvme_qpair *qpair; 59 uint64_t io_completed; 60 uint64_t io_completed_error; 61 uint64_t io_submitted; 62 uint64_t current_queue_depth; 63 uint64_t offset_in_ios; 64 bool is_draining; 65 66 TAILQ_ENTRY(ns_worker_ctx) link; 67 }; 68 69 struct reset_task { 70 struct ns_worker_ctx *ns_ctx; 71 void *buf; 72 }; 73 74 struct worker_thread { 75 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 76 unsigned lcore; 77 }; 78 79 static struct spdk_mempool *task_pool; 80 81 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 82 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 83 static int g_num_namespaces = 0; 84 static struct worker_thread *g_worker = NULL; 85 static bool g_qemu_ssd_found = false; 86 87 static uint64_t g_tsc_rate; 88 89 static int g_io_size_bytes; 90 static int g_rw_percentage; 91 static int g_is_random; 92 static int g_queue_depth; 93 static int g_time_in_sec; 94 95 #define TASK_POOL_NUM 8192 96 97 static void 98 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 99 { 100 struct ns_entry *entry; 101 const struct spdk_nvme_ctrlr_data *cdata; 102 103 if (!spdk_nvme_ns_is_active(ns)) { 104 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns)); 105 return; 106 } 107 108 entry = malloc(sizeof(struct ns_entry)); 109 if (entry == NULL) { 110 perror("ns_entry malloc"); 111 exit(1); 112 } 113 114 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 115 116 entry->ns = ns; 117 entry->ctrlr = ctrlr; 118 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / 119 g_io_size_bytes; 120 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 121 122 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 123 124 g_num_namespaces++; 125 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 126 } 127 128 static void 129 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 130 { 131 int nsid, num_ns; 132 struct spdk_nvme_ns *ns; 133 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); 134 135 if (entry == NULL) { 136 perror("ctrlr_entry malloc"); 137 exit(1); 138 } 139 140 entry->ctrlr = ctrlr; 141 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 142 143 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 144 for (nsid = 1; nsid <= num_ns; nsid++) { 145 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 146 if (ns == NULL) { 147 continue; 148 } 149 register_ns(ctrlr, ns); 150 } 151 } 152 153 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 154 155 static __thread unsigned int seed = 0; 156 157 static void 158 submit_single_io(struct ns_worker_ctx *ns_ctx) 159 { 160 struct reset_task *task = NULL; 161 uint64_t offset_in_ios; 162 int rc; 163 struct ns_entry *entry = ns_ctx->entry; 164 165 task = spdk_mempool_get(task_pool); 166 if (!task) { 167 fprintf(stderr, "Failed to get task from task_pool\n"); 168 exit(1); 169 } 170 171 task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 172 if (!task->buf) { 173 spdk_free(task->buf); 174 fprintf(stderr, "task->buf spdk_zmalloc failed\n"); 175 exit(1); 176 } 177 178 task->ns_ctx = ns_ctx; 179 task->ns_ctx->io_submitted++; 180 181 if (g_is_random) { 182 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 183 } else { 184 offset_in_ios = ns_ctx->offset_in_ios++; 185 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 186 ns_ctx->offset_in_ios = 0; 187 } 188 } 189 190 if ((g_rw_percentage == 100) || 191 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { 192 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, 193 offset_in_ios * entry->io_size_blocks, 194 entry->io_size_blocks, io_complete, task, 0); 195 } else { 196 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, 197 offset_in_ios * entry->io_size_blocks, 198 entry->io_size_blocks, io_complete, task, 0); 199 } 200 201 if (rc != 0) { 202 fprintf(stderr, "starting I/O failed\n"); 203 } else { 204 ns_ctx->current_queue_depth++; 205 } 206 } 207 208 static void 209 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion) 210 { 211 struct ns_worker_ctx *ns_ctx; 212 213 ns_ctx = task->ns_ctx; 214 ns_ctx->current_queue_depth--; 215 216 if (spdk_nvme_cpl_is_error(completion)) { 217 ns_ctx->io_completed_error++; 218 } else { 219 ns_ctx->io_completed++; 220 } 221 222 spdk_free(task->buf); 223 spdk_mempool_put(task_pool, task); 224 225 /* 226 * is_draining indicates when time has expired for the test run 227 * and we are just waiting for the previously submitted I/O 228 * to complete. In this case, do not submit a new I/O to replace 229 * the one just completed. 230 */ 231 if (!ns_ctx->is_draining) { 232 submit_single_io(ns_ctx); 233 } 234 } 235 236 static void 237 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 238 { 239 task_complete((struct reset_task *)ctx, completion); 240 } 241 242 static void 243 check_io(struct ns_worker_ctx *ns_ctx) 244 { 245 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); 246 } 247 248 static void 249 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 250 { 251 while (queue_depth-- > 0) { 252 submit_single_io(ns_ctx); 253 } 254 } 255 256 static void 257 drain_io(struct ns_worker_ctx *ns_ctx) 258 { 259 ns_ctx->is_draining = true; 260 while (ns_ctx->current_queue_depth > 0) { 261 check_io(ns_ctx); 262 } 263 } 264 265 static int 266 work_fn(void *arg) 267 { 268 uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 269 struct worker_thread *worker = (struct worker_thread *)arg; 270 struct ns_worker_ctx *ns_ctx = NULL; 271 bool did_reset = false; 272 273 printf("Starting thread on core %u\n", worker->lcore); 274 275 /* Submit initial I/O for each namespace. */ 276 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 277 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0); 278 if (ns_ctx->qpair == NULL) { 279 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore); 280 return -1; 281 } 282 submit_io(ns_ctx, g_queue_depth); 283 } 284 285 while (1) { 286 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) { 287 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 288 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) { 289 fprintf(stderr, "nvme reset failed.\n"); 290 return -1; 291 } 292 } 293 did_reset = true; 294 } 295 296 /* 297 * Check for completed I/O for each controller. A new 298 * I/O will be submitted in the io_complete callback 299 * to replace each I/O that is completed. 300 */ 301 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 302 check_io(ns_ctx); 303 } 304 305 if (spdk_get_ticks() > tsc_end) { 306 break; 307 } 308 } 309 310 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 311 drain_io(ns_ctx); 312 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 313 } 314 315 return 0; 316 } 317 318 static void usage(char *program_name) 319 { 320 printf("%s options", program_name); 321 printf("\n"); 322 printf("\t[-q io depth]\n"); 323 printf("\t[-s io size in bytes]\n"); 324 printf("\t[-w io pattern type, must be one of\n"); 325 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 326 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 327 printf("\t[-t time in seconds(should be larger than 15 seconds)]\n"); 328 printf("\t[-m max completions per poll]\n"); 329 printf("\t\t(default:0 - unlimited)\n"); 330 } 331 332 static int 333 print_stats(void) 334 { 335 uint64_t io_completed, io_submitted, io_completed_error; 336 uint64_t total_completed_io, total_submitted_io, total_completed_err_io; 337 struct worker_thread *worker; 338 struct ns_worker_ctx *ns_ctx; 339 340 total_completed_io = 0; 341 total_submitted_io = 0; 342 total_completed_err_io = 0; 343 344 worker = g_worker; 345 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 346 io_completed = ns_ctx->io_completed; 347 io_submitted = ns_ctx->io_submitted; 348 io_completed_error = ns_ctx->io_completed_error; 349 total_completed_io += io_completed; 350 total_submitted_io += io_submitted; 351 total_completed_err_io += io_completed_error; 352 } 353 354 printf("========================================================\n"); 355 printf("%16lu IO completed successfully\n", total_completed_io); 356 printf("%16lu IO completed with error\n", total_completed_err_io); 357 printf("--------------------------------------------------------\n"); 358 printf("%16lu IO completed total\n", total_completed_io + total_completed_err_io); 359 printf("%16lu IO submitted\n", total_submitted_io); 360 361 if (total_submitted_io != (total_completed_io + total_completed_err_io)) { 362 fprintf(stderr, "Some IO are missing......\n"); 363 return -1; 364 } 365 366 return 0; 367 } 368 369 static int 370 parse_args(int argc, char **argv) 371 { 372 const char *workload_type; 373 int op; 374 bool mix_specified = false; 375 long int val; 376 377 /* default value */ 378 g_queue_depth = 0; 379 g_io_size_bytes = 0; 380 workload_type = NULL; 381 g_time_in_sec = 0; 382 g_rw_percentage = -1; 383 384 while ((op = getopt(argc, argv, "m:q:s:t:w:M:")) != -1) { 385 if (op == 'w') { 386 workload_type = optarg; 387 } else if (op == '?') { 388 usage(argv[0]); 389 return -EINVAL; 390 } else { 391 val = spdk_strtol(optarg, 10); 392 if (val < 0) { 393 fprintf(stderr, "Converting a string to integer failed\n"); 394 return val; 395 } 396 switch (op) { 397 case 'q': 398 g_queue_depth = val; 399 break; 400 case 's': 401 g_io_size_bytes = val; 402 break; 403 case 't': 404 g_time_in_sec = val; 405 break; 406 case 'M': 407 g_rw_percentage = val; 408 mix_specified = true; 409 break; 410 default: 411 usage(argv[0]); 412 return -EINVAL; 413 } 414 } 415 } 416 417 if (!g_queue_depth) { 418 usage(argv[0]); 419 return 1; 420 } 421 if (!g_io_size_bytes) { 422 usage(argv[0]); 423 return 1; 424 } 425 if (!workload_type) { 426 usage(argv[0]); 427 return 1; 428 } 429 if (!g_time_in_sec) { 430 usage(argv[0]); 431 return 1; 432 } 433 434 if (strcmp(workload_type, "read") && 435 strcmp(workload_type, "write") && 436 strcmp(workload_type, "randread") && 437 strcmp(workload_type, "randwrite") && 438 strcmp(workload_type, "rw") && 439 strcmp(workload_type, "randrw")) { 440 fprintf(stderr, 441 "io pattern type must be one of\n" 442 "(read, write, randread, randwrite, rw, randrw)\n"); 443 return 1; 444 } 445 446 if (!strcmp(workload_type, "read") || 447 !strcmp(workload_type, "randread")) { 448 g_rw_percentage = 100; 449 } 450 451 if (!strcmp(workload_type, "write") || 452 !strcmp(workload_type, "randwrite")) { 453 g_rw_percentage = 0; 454 } 455 456 if (!strcmp(workload_type, "read") || 457 !strcmp(workload_type, "randread") || 458 !strcmp(workload_type, "write") || 459 !strcmp(workload_type, "randwrite")) { 460 if (mix_specified) { 461 fprintf(stderr, "Ignoring -M option... Please use -M option" 462 " only when using rw or randrw.\n"); 463 } 464 } 465 466 if (!strcmp(workload_type, "rw") || 467 !strcmp(workload_type, "randrw")) { 468 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 469 fprintf(stderr, 470 "-M must be specified to value from 0 to 100 " 471 "for rw or randrw.\n"); 472 return 1; 473 } 474 } 475 476 if (!strcmp(workload_type, "read") || 477 !strcmp(workload_type, "write") || 478 !strcmp(workload_type, "rw")) { 479 g_is_random = 0; 480 } else { 481 g_is_random = 1; 482 } 483 484 return 0; 485 } 486 487 static int 488 register_worker(void) 489 { 490 struct worker_thread *worker; 491 492 worker = malloc(sizeof(struct worker_thread)); 493 if (worker == NULL) { 494 perror("worker_thread malloc"); 495 return -1; 496 } 497 498 memset(worker, 0, sizeof(struct worker_thread)); 499 TAILQ_INIT(&worker->ns_ctx); 500 worker->lcore = spdk_env_get_current_core(); 501 502 g_worker = worker; 503 504 return 0; 505 } 506 507 508 static bool 509 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 510 struct spdk_nvme_ctrlr_opts *opts) 511 { 512 opts->disable_error_logging = true; 513 return true; 514 } 515 516 static void 517 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 518 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 519 { 520 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { 521 struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); 522 523 /* QEMU emulated SSDs can't handle this test, so we will skip 524 * them. QEMU NVMe SSDs report themselves as VID == Intel. So we need 525 * to check this specific 0x5845 device ID to know whether it's QEMU 526 * or not. 527 */ 528 if (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL && 529 spdk_pci_device_get_device_id(dev) == 0x5845) { 530 g_qemu_ssd_found = true; 531 printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr); 532 return; 533 } 534 } 535 536 register_ctrlr(ctrlr); 537 } 538 539 static int 540 register_controllers(void) 541 { 542 printf("Initializing NVMe Controllers\n"); 543 544 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 545 fprintf(stderr, "spdk_nvme_probe() failed\n"); 546 return 1; 547 } 548 549 return 0; 550 } 551 552 static void 553 unregister_controllers(void) 554 { 555 struct ctrlr_entry *entry, *tmp; 556 557 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 558 TAILQ_REMOVE(&g_controllers, entry, link); 559 spdk_nvme_detach(entry->ctrlr); 560 free(entry); 561 } 562 } 563 564 static int 565 associate_workers_with_ns(void) 566 { 567 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 568 struct worker_thread *worker = g_worker; 569 struct ns_worker_ctx *ns_ctx; 570 int i, count; 571 572 count = g_num_namespaces; 573 574 for (i = 0; i < count; i++) { 575 if (entry == NULL) { 576 break; 577 } 578 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 579 if (!ns_ctx) { 580 return -1; 581 } 582 memset(ns_ctx, 0, sizeof(*ns_ctx)); 583 584 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 585 ns_ctx->entry = entry; 586 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 587 588 entry = TAILQ_NEXT(entry, link);; 589 if (entry == NULL) { 590 entry = TAILQ_FIRST(&g_namespaces); 591 } 592 } 593 594 return 0; 595 } 596 597 static void 598 unregister_worker(void) 599 { 600 struct ns_worker_ctx *ns_ctx, *tmp; 601 602 assert(g_worker != NULL); 603 604 TAILQ_FOREACH_SAFE(ns_ctx, &g_worker->ns_ctx, link, tmp) { 605 TAILQ_REMOVE(&g_worker->ns_ctx, ns_ctx, link); 606 free(ns_ctx); 607 } 608 609 free(g_worker); 610 g_worker = NULL; 611 } 612 613 static int 614 run_nvme_reset_cycle(void) 615 { 616 struct worker_thread *worker = g_worker; 617 struct ns_worker_ctx *ns_ctx; 618 619 if (work_fn(worker) != 0) { 620 return -1; 621 } 622 623 if (print_stats() != 0) { 624 return -1; 625 } 626 627 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 628 ns_ctx->io_completed = 0; 629 ns_ctx->io_completed_error = 0; 630 ns_ctx->io_submitted = 0; 631 ns_ctx->is_draining = false; 632 } 633 634 return 0; 635 } 636 637 static void 638 free_tasks(void) 639 { 640 if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) { 641 fprintf(stderr, "task_pool count is %zu but should be %d\n", 642 spdk_mempool_count(task_pool), TASK_POOL_NUM); 643 } 644 spdk_mempool_free(task_pool); 645 } 646 647 int main(int argc, char **argv) 648 { 649 int rc; 650 int i; 651 struct spdk_env_opts opts; 652 653 654 rc = parse_args(argc, argv); 655 if (rc != 0) { 656 return rc; 657 } 658 659 spdk_env_opts_init(&opts); 660 opts.name = "reset"; 661 opts.core_mask = "0x1"; 662 opts.shm_id = 0; 663 if (spdk_env_init(&opts) < 0) { 664 fprintf(stderr, "Unable to initialize SPDK env\n"); 665 return 1; 666 } 667 668 if (register_controllers() != 0) { 669 return 1; 670 } 671 672 if (TAILQ_EMPTY(&g_controllers)) { 673 printf("No NVMe controller found, %s exiting\n", argv[0]); 674 return g_qemu_ssd_found ? 0 : 1; 675 } 676 677 task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM, 678 sizeof(struct reset_task), 679 64, SPDK_ENV_SOCKET_ID_ANY); 680 if (!task_pool) { 681 fprintf(stderr, "Cannot create task pool\n"); 682 return 1; 683 } 684 685 g_tsc_rate = spdk_get_ticks_hz(); 686 687 if (register_worker() != 0) { 688 return 1; 689 } 690 691 if (associate_workers_with_ns() != 0) { 692 rc = 1; 693 goto cleanup; 694 } 695 696 printf("Initialization complete. Launching workers.\n"); 697 698 for (i = 2; i >= 0; i--) { 699 rc = run_nvme_reset_cycle(); 700 if (rc != 0) { 701 goto cleanup; 702 } 703 } 704 705 cleanup: 706 unregister_worker(); 707 unregister_controllers(); 708 free_tasks(); 709 710 if (rc != 0) { 711 fprintf(stderr, "%s: errors occured\n", argv[0]); 712 } 713 714 return rc; 715 } 716