1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/barrier.h" 37 #include "spdk/fd.h" 38 #include "spdk/nvme.h" 39 #include "spdk/env.h" 40 #include "spdk/string.h" 41 #include "spdk/nvme_intel.h" 42 #include "spdk/histogram_data.h" 43 44 #if HAVE_LIBAIO 45 #include <libaio.h> 46 #endif 47 48 struct ctrlr_entry { 49 struct spdk_nvme_ctrlr *ctrlr; 50 struct ctrlr_entry *next; 51 char name[1024]; 52 }; 53 54 enum entry_type { 55 ENTRY_TYPE_NVME_NS, 56 ENTRY_TYPE_AIO_FILE, 57 }; 58 59 struct ns_entry { 60 enum entry_type type; 61 62 union { 63 struct { 64 struct spdk_nvme_ctrlr *ctrlr; 65 struct spdk_nvme_ns *ns; 66 struct spdk_nvme_qpair *qpair; 67 } nvme; 68 #if HAVE_LIBAIO 69 struct { 70 int fd; 71 struct io_event *events; 72 io_context_t ctx; 73 } aio; 74 #endif 75 } u; 76 77 uint32_t io_size_blocks; 78 uint64_t size_in_ios; 79 bool is_draining; 80 uint32_t current_queue_depth; 81 char name[1024]; 82 struct ns_entry *next; 83 84 struct spdk_histogram_data *submit_histogram; 85 struct spdk_histogram_data *complete_histogram; 86 }; 87 88 struct perf_task { 89 void *buf; 90 uint64_t submit_tsc; 91 #if HAVE_LIBAIO 92 struct iocb iocb; 93 #endif 94 }; 95 96 static bool g_enable_histogram = false; 97 98 static struct ctrlr_entry *g_ctrlr = NULL; 99 static struct ns_entry *g_ns = NULL; 100 101 static uint64_t g_tsc_rate; 102 103 static uint32_t g_io_size_bytes; 104 static int g_time_in_sec; 105 106 static int g_aio_optind; /* Index of first AIO filename in argv */ 107 108 struct perf_task *g_task; 109 uint64_t g_tsc_submit = 0; 110 uint64_t g_tsc_submit_min = UINT64_MAX; 111 uint64_t g_tsc_submit_max = 0; 112 uint64_t g_tsc_complete = 0; 113 uint64_t g_tsc_complete_min = UINT64_MAX; 114 uint64_t g_tsc_complete_max = 0; 115 uint64_t g_io_completed = 0; 116 117 static void 118 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 119 { 120 struct ns_entry *entry; 121 const struct spdk_nvme_ctrlr_data *cdata; 122 123 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 124 125 if (!spdk_nvme_ns_is_active(ns)) { 126 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", 127 cdata->mn, cdata->sn, 128 spdk_nvme_ns_get_id(ns)); 129 return; 130 } 131 132 if (spdk_nvme_ns_get_size(ns) < g_io_size_bytes || 133 spdk_nvme_ns_get_sector_size(ns) > g_io_size_bytes) { 134 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " 135 "ns size %" PRIu64 " / block size %u for I/O size %u\n", 136 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), 137 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); 138 return; 139 } 140 141 entry = calloc(1, sizeof(struct ns_entry)); 142 if (entry == NULL) { 143 perror("ns_entry malloc"); 144 exit(1); 145 } 146 147 entry->type = ENTRY_TYPE_NVME_NS; 148 entry->u.nvme.ctrlr = ctrlr; 149 entry->u.nvme.ns = ns; 150 151 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / 152 g_io_size_bytes; 153 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 154 entry->submit_histogram = spdk_histogram_data_alloc(); 155 entry->complete_histogram = spdk_histogram_data_alloc(); 156 157 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 158 159 entry->next = g_ns; 160 g_ns = entry; 161 } 162 163 static void 164 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 165 { 166 int num_ns; 167 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); 168 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); 169 170 if (entry == NULL) { 171 perror("ctrlr_entry malloc"); 172 exit(1); 173 } 174 175 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 176 177 entry->ctrlr = ctrlr; 178 179 entry->next = g_ctrlr; 180 g_ctrlr = entry; 181 182 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 183 /* Only register the first namespace. */ 184 if (num_ns < 1) { 185 fprintf(stderr, "controller found with no namespaces\n"); 186 exit(1); 187 } 188 189 register_ns(ctrlr, spdk_nvme_ctrlr_get_ns(ctrlr, 1)); 190 } 191 192 #if HAVE_LIBAIO 193 static int 194 register_aio_file(const char *path) 195 { 196 struct ns_entry *entry; 197 198 int fd; 199 uint64_t size; 200 uint32_t blklen; 201 202 fd = open(path, O_RDWR | O_DIRECT); 203 if (fd < 0) { 204 fprintf(stderr, "Could not open AIO device %s: %s\n", path, strerror(errno)); 205 return -1; 206 } 207 208 size = spdk_fd_get_size(fd); 209 if (size == 0) { 210 fprintf(stderr, "Could not determine size of AIO device %s\n", path); 211 close(fd); 212 return -1; 213 } 214 215 blklen = spdk_fd_get_blocklen(fd); 216 if (blklen == 0) { 217 fprintf(stderr, "Could not determine block size of AIO device %s\n", path); 218 close(fd); 219 return -1; 220 } 221 222 entry = calloc(1, sizeof(struct ns_entry)); 223 if (entry == NULL) { 224 close(fd); 225 perror("aio ns_entry malloc"); 226 return -1; 227 } 228 229 entry->type = ENTRY_TYPE_AIO_FILE; 230 entry->u.aio.fd = fd; 231 entry->size_in_ios = size / g_io_size_bytes; 232 entry->io_size_blocks = g_io_size_bytes / blklen; 233 entry->submit_histogram = spdk_histogram_data_alloc(); 234 entry->complete_histogram = spdk_histogram_data_alloc(); 235 236 snprintf(entry->name, sizeof(entry->name), "%s", path); 237 238 g_ns = entry; 239 240 return 0; 241 } 242 243 static int 244 aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, void *buf, 245 unsigned long nbytes, uint64_t offset, void *cb_ctx) 246 { 247 iocb->aio_fildes = fd; 248 iocb->aio_reqprio = 0; 249 iocb->aio_lio_opcode = cmd; 250 iocb->u.c.buf = buf; 251 iocb->u.c.nbytes = nbytes; 252 iocb->u.c.offset = offset; 253 iocb->data = cb_ctx; 254 255 if (io_submit(aio_ctx, 1, &iocb) < 0) { 256 printf("io_submit"); 257 return -1; 258 } 259 260 return 0; 261 } 262 263 static void 264 aio_check_io(void) 265 { 266 int count, i; 267 struct timespec timeout; 268 269 timeout.tv_sec = 0; 270 timeout.tv_nsec = 0; 271 272 count = io_getevents(g_ns->u.aio.ctx, 1, 1, g_ns->u.aio.events, &timeout); 273 if (count < 0) { 274 fprintf(stderr, "io_getevents error\n"); 275 exit(1); 276 } 277 278 for (i = 0; i < count; i++) { 279 g_ns->current_queue_depth--; 280 } 281 } 282 #endif /* HAVE_LIBAIO */ 283 284 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 285 286 static __thread unsigned int seed = 0; 287 288 static void 289 submit_single_io(void) 290 { 291 uint64_t offset_in_ios; 292 uint64_t start; 293 int rc; 294 struct ns_entry *entry = g_ns; 295 uint64_t tsc_submit; 296 297 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 298 299 start = spdk_get_ticks(); 300 spdk_rmb(); 301 #if HAVE_LIBAIO 302 if (entry->type == ENTRY_TYPE_AIO_FILE) { 303 rc = aio_submit(g_ns->u.aio.ctx, &g_task->iocb, entry->u.aio.fd, IO_CMD_PREAD, g_task->buf, 304 g_io_size_bytes, offset_in_ios * g_io_size_bytes, g_task); 305 } else 306 #endif 307 { 308 rc = spdk_nvme_ns_cmd_read(entry->u.nvme.ns, g_ns->u.nvme.qpair, g_task->buf, 309 offset_in_ios * entry->io_size_blocks, 310 entry->io_size_blocks, io_complete, g_task, 0); 311 } 312 313 spdk_rmb(); 314 tsc_submit = spdk_get_ticks() - start; 315 g_tsc_submit += tsc_submit; 316 if (tsc_submit < g_tsc_submit_min) { 317 g_tsc_submit_min = tsc_submit; 318 } 319 if (tsc_submit > g_tsc_submit_max) { 320 g_tsc_submit_max = tsc_submit; 321 } 322 if (g_enable_histogram) { 323 spdk_histogram_data_tally(entry->submit_histogram, tsc_submit); 324 } 325 326 if (rc != 0) { 327 fprintf(stderr, "starting I/O failed\n"); 328 } 329 330 g_ns->current_queue_depth++; 331 } 332 333 static void 334 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 335 { 336 g_ns->current_queue_depth--; 337 } 338 339 uint64_t g_complete_tsc_start; 340 341 static uint64_t 342 check_io(void) 343 { 344 uint64_t end, tsc_complete; 345 346 spdk_rmb(); 347 #if HAVE_LIBAIO 348 if (g_ns->type == ENTRY_TYPE_AIO_FILE) { 349 aio_check_io(); 350 } else 351 #endif 352 { 353 spdk_nvme_qpair_process_completions(g_ns->u.nvme.qpair, 0); 354 } 355 spdk_rmb(); 356 end = spdk_get_ticks(); 357 if (g_ns->current_queue_depth == 1) { 358 /* 359 * Account for race condition in AIO case where interrupt occurs 360 * after checking for queue depth. If the timestamp capture 361 * is too big compared to the last capture, assume that an 362 * interrupt fired, and do not bump the start tsc forward. This 363 * will ensure this extra time is accounted for next time through 364 * when we see current_queue_depth drop to 0. 365 */ 366 if (g_ns->type == ENTRY_TYPE_NVME_NS || (end - g_complete_tsc_start) < 500) { 367 g_complete_tsc_start = end; 368 } 369 } else { 370 tsc_complete = end - g_complete_tsc_start; 371 g_tsc_complete += tsc_complete; 372 if (tsc_complete < g_tsc_complete_min) { 373 g_tsc_complete_min = tsc_complete; 374 } 375 if (tsc_complete > g_tsc_complete_max) { 376 g_tsc_complete_max = tsc_complete; 377 } 378 if (g_enable_histogram) { 379 spdk_histogram_data_tally(g_ns->complete_histogram, tsc_complete); 380 } 381 g_io_completed++; 382 if (!g_ns->is_draining) { 383 submit_single_io(); 384 } 385 end = g_complete_tsc_start = spdk_get_ticks(); 386 } 387 388 return end; 389 } 390 391 static void 392 drain_io(void) 393 { 394 g_ns->is_draining = true; 395 while (g_ns->current_queue_depth > 0) { 396 check_io(); 397 } 398 } 399 400 static int 401 init_ns_worker_ctx(void) 402 { 403 if (g_ns->type == ENTRY_TYPE_AIO_FILE) { 404 #ifdef HAVE_LIBAIO 405 g_ns->u.aio.events = calloc(1, sizeof(struct io_event)); 406 if (!g_ns->u.aio.events) { 407 return -1; 408 } 409 g_ns->u.aio.ctx = 0; 410 if (io_setup(1, &g_ns->u.aio.ctx) < 0) { 411 free(g_ns->u.aio.events); 412 perror("io_setup"); 413 return -1; 414 } 415 #endif 416 } else { 417 /* 418 * TODO: If a controller has multiple namespaces, they could all use the same queue. 419 * For now, give each namespace/thread combination its own queue. 420 */ 421 g_ns->u.nvme.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_ns->u.nvme.ctrlr, NULL, 0); 422 if (!g_ns->u.nvme.qpair) { 423 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); 424 return -1; 425 } 426 } 427 428 return 0; 429 } 430 431 static void 432 cleanup_ns_worker_ctx(void) 433 { 434 if (g_ns->type == ENTRY_TYPE_AIO_FILE) { 435 #ifdef HAVE_LIBAIO 436 io_destroy(g_ns->u.aio.ctx); 437 free(g_ns->u.aio.events); 438 #endif 439 } else { 440 spdk_nvme_ctrlr_free_io_qpair(g_ns->u.nvme.qpair); 441 } 442 } 443 444 static int 445 work_fn(void) 446 { 447 uint64_t tsc_end, current; 448 449 /* Allocate a queue pair for each namespace. */ 450 if (init_ns_worker_ctx() != 0) { 451 printf("ERROR: init_ns_worker_ctx() failed\n"); 452 return 1; 453 } 454 455 tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 456 457 /* Submit initial I/O for each namespace. */ 458 submit_single_io(); 459 g_complete_tsc_start = spdk_get_ticks(); 460 461 while (1) { 462 /* 463 * Check for completed I/O for each controller. A new 464 * I/O will be submitted in the io_complete callback 465 * to replace each I/O that is completed. 466 */ 467 current = check_io(); 468 469 if (current > tsc_end) { 470 break; 471 } 472 } 473 474 drain_io(); 475 cleanup_ns_worker_ctx(); 476 477 return 0; 478 } 479 480 static void usage(char *program_name) 481 { 482 printf("%s options", program_name); 483 #if HAVE_LIBAIO 484 printf(" [AIO device(s)]..."); 485 #endif 486 printf("\n"); 487 printf("\t[-s io size in bytes]\n"); 488 printf("\t[-t time in seconds]\n"); 489 printf("\t\t(default: 1)]\n"); 490 printf("\t[-H enable histograms]\n"); 491 } 492 493 static void 494 print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, 495 uint64_t total, uint64_t so_far) 496 { 497 double so_far_pct; 498 499 if (count == 0) { 500 return; 501 } 502 503 so_far_pct = (double)so_far * 100 / total; 504 505 printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", 506 (double)start * 1000 * 1000 / g_tsc_rate, 507 (double)end * 1000 * 1000 / g_tsc_rate, 508 so_far_pct, count); 509 } 510 511 static void 512 print_stats(void) 513 { 514 double divisor = (double)g_tsc_rate / (1000 * 1000 * 1000); 515 516 printf("submit (in ns) avg, min, max = %8.1f, %8.1f, %8.1f\n", 517 (double)g_tsc_submit / g_io_completed / divisor, 518 (double)g_tsc_submit_min / divisor, 519 (double)g_tsc_submit_max / divisor); 520 printf("complete (in ns) avg, min, max = %8.1f, %8.1f, %8.1f\n", 521 (double)g_tsc_complete / g_io_completed / divisor, 522 (double)g_tsc_complete_min / divisor, 523 (double)g_tsc_complete_max / divisor); 524 525 if (!g_enable_histogram) { 526 return; 527 } 528 529 printf("\n"); 530 printf("Submit histogram\n"); 531 printf("================\n"); 532 printf(" Range in us Cumulative Count\n"); 533 spdk_histogram_data_iterate(g_ns->submit_histogram, print_bucket, NULL); 534 printf("\n"); 535 536 printf("Complete histogram\n"); 537 printf("==================\n"); 538 printf(" Range in us Cumulative Count\n"); 539 spdk_histogram_data_iterate(g_ns->complete_histogram, print_bucket, NULL); 540 printf("\n"); 541 542 } 543 544 static int 545 parse_args(int argc, char **argv) 546 { 547 int op; 548 long int val; 549 550 /* default value */ 551 g_io_size_bytes = 0; 552 g_time_in_sec = 0; 553 554 while ((op = getopt(argc, argv, "hs:t:H")) != -1) { 555 switch (op) { 556 case 'h': 557 usage(argv[0]); 558 exit(0); 559 break; 560 case 's': 561 val = spdk_strtol(optarg, 10); 562 if (val < 0) { 563 fprintf(stderr, "Invalid io size\n"); 564 return val; 565 } 566 g_io_size_bytes = (uint32_t)val; 567 break; 568 case 't': 569 g_time_in_sec = spdk_strtol(optarg, 10); 570 if (g_time_in_sec < 0) { 571 fprintf(stderr, "Invalid run time\n"); 572 return g_time_in_sec; 573 } 574 break; 575 case 'H': 576 g_enable_histogram = true; 577 break; 578 default: 579 usage(argv[0]); 580 return 1; 581 } 582 } 583 584 if (!g_io_size_bytes) { 585 usage(argv[0]); 586 return 1; 587 } 588 if (!g_time_in_sec) { 589 usage(argv[0]); 590 return 1; 591 } 592 593 g_aio_optind = optind; 594 595 return 0; 596 } 597 598 static bool 599 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 600 struct spdk_nvme_ctrlr_opts *opts) 601 { 602 static uint32_t ctrlr_found = 0; 603 604 if (ctrlr_found == 1) { 605 fprintf(stderr, "only attaching to one controller, so skipping\n"); 606 fprintf(stderr, " controller at PCI address %s\n", 607 trid->traddr); 608 return false; 609 } 610 ctrlr_found = 1; 611 612 printf("Attaching to %s\n", trid->traddr); 613 614 return true; 615 } 616 617 static void 618 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 619 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 620 { 621 printf("Attached to %s\n", trid->traddr); 622 623 register_ctrlr(ctrlr); 624 } 625 626 static int 627 register_controllers(void) 628 { 629 printf("Initializing NVMe Controllers\n"); 630 631 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 632 fprintf(stderr, "spdk_nvme_probe() failed\n"); 633 return 1; 634 } 635 636 if (g_ns == NULL) { 637 fprintf(stderr, "no NVMe controller found - check that device is bound to uio/vfio\n"); 638 return 1; 639 } 640 641 return 0; 642 } 643 644 static void 645 cleanup(void) 646 { 647 struct ns_entry *ns_entry = g_ns; 648 struct ctrlr_entry *ctrlr_entry = g_ctrlr; 649 650 while (ns_entry) { 651 struct ns_entry *next = ns_entry->next; 652 653 spdk_histogram_data_free(ns_entry->submit_histogram); 654 spdk_histogram_data_free(ns_entry->complete_histogram); 655 free(ns_entry); 656 ns_entry = next; 657 } 658 659 while (ctrlr_entry) { 660 struct ctrlr_entry *next = ctrlr_entry->next; 661 662 spdk_nvme_detach(ctrlr_entry->ctrlr); 663 free(ctrlr_entry); 664 ctrlr_entry = next; 665 } 666 } 667 668 int main(int argc, char **argv) 669 { 670 int rc; 671 struct spdk_env_opts opts; 672 673 rc = parse_args(argc, argv); 674 if (rc != 0) { 675 return rc; 676 } 677 678 spdk_env_opts_init(&opts); 679 opts.name = "overhead"; 680 opts.core_mask = "0x1"; 681 opts.shm_id = 0; 682 if (spdk_env_init(&opts) < 0) { 683 fprintf(stderr, "Unable to initialize SPDK env\n"); 684 return 1; 685 } 686 687 g_task = spdk_dma_zmalloc(sizeof(struct perf_task), 0, NULL); 688 if (g_task == NULL) { 689 fprintf(stderr, "g_task alloc failed\n"); 690 exit(1); 691 } 692 693 g_task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x1000, NULL); 694 if (g_task->buf == NULL) { 695 fprintf(stderr, "g_task->buf spdk_dma_zmalloc failed\n"); 696 exit(1); 697 } 698 699 g_tsc_rate = spdk_get_ticks_hz(); 700 701 #if HAVE_LIBAIO 702 if (g_aio_optind < argc) { 703 printf("Measuring overhead for AIO device %s.\n", argv[g_aio_optind]); 704 if (register_aio_file(argv[g_aio_optind]) != 0) { 705 cleanup(); 706 return -1; 707 } 708 } else 709 #endif 710 { 711 if (register_controllers() != 0) { 712 cleanup(); 713 return -1; 714 } 715 } 716 717 printf("Initialization complete. Launching workers.\n"); 718 719 rc = work_fn(); 720 721 print_stats(); 722 723 cleanup(); 724 725 if (rc != 0) { 726 fprintf(stderr, "%s: errors occured\n", argv[0]); 727 } 728 729 return rc; 730 } 731