1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/log.h" 37 #include "spdk/nvme.h" 38 #include "spdk/env.h" 39 #include "spdk/string.h" 40 #include "spdk/nvme_intel.h" 41 #include "spdk/string.h" 42 43 struct ctrlr_entry { 44 struct spdk_nvme_ctrlr *ctrlr; 45 struct spdk_nvme_intel_rw_latency_page latency_page; 46 TAILQ_ENTRY(ctrlr_entry) link; 47 char name[1024]; 48 }; 49 50 struct ns_entry { 51 struct { 52 struct spdk_nvme_ctrlr *ctrlr; 53 struct spdk_nvme_ns *ns; 54 } nvme; 55 56 TAILQ_ENTRY(ns_entry) link; 57 uint32_t io_size_blocks; 58 uint64_t size_in_ios; 59 char name[1024]; 60 }; 61 62 struct ns_worker_ctx { 63 struct ns_entry *entry; 64 uint64_t io_completed; 65 uint64_t current_queue_depth; 66 uint64_t offset_in_ios; 67 bool is_draining; 68 struct spdk_nvme_qpair *qpair; 69 TAILQ_ENTRY(ns_worker_ctx) link; 70 }; 71 72 struct arb_task { 73 struct ns_worker_ctx *ns_ctx; 74 void *buf; 75 }; 76 77 struct worker_thread { 78 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 79 TAILQ_ENTRY(worker_thread) link; 80 unsigned lcore; 81 enum spdk_nvme_qprio qprio; 82 }; 83 84 struct arb_context { 85 int shm_id; 86 int outstanding_commands; 87 int num_namespaces; 88 int num_workers; 89 int rw_percentage; 90 int is_random; 91 int queue_depth; 92 int time_in_sec; 93 int io_count; 94 uint8_t latency_tracking_enable; 95 uint8_t arbitration_mechanism; 96 uint8_t arbitration_config; 97 uint32_t io_size_bytes; 98 uint32_t max_completions; 99 uint64_t tsc_rate; 100 const char *core_mask; 101 const char *workload_type; 102 }; 103 104 struct feature { 105 uint32_t result; 106 bool valid; 107 }; 108 109 static struct spdk_mempool *task_pool = NULL; 110 111 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 112 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 113 static TAILQ_HEAD(, worker_thread) g_workers = TAILQ_HEAD_INITIALIZER(g_workers); 114 115 static struct feature features[SPDK_NVME_FEAT_ARBITRATION + 1] = {}; 116 static struct spdk_nvme_transport_id g_trid = {}; 117 118 static struct arb_context g_arbitration = { 119 .shm_id = -1, 120 .outstanding_commands = 0, 121 .num_workers = 0, 122 .num_namespaces = 0, 123 .rw_percentage = 50, 124 .queue_depth = 64, 125 .time_in_sec = 60, 126 .io_count = 100000, 127 .latency_tracking_enable = 0, 128 .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, 129 .arbitration_config = 0, 130 .io_size_bytes = 131072, 131 .max_completions = 0, 132 /* Default 4 cores for urgent/high/medium/low */ 133 .core_mask = "0xf", 134 .workload_type = "randrw", 135 }; 136 137 static int g_dpdk_mem = 0; 138 static bool g_dpdk_mem_single_seg = false; 139 140 /* 141 * For weighted round robin arbitration mechanism, the smaller value between 142 * weight and burst will be picked to execute the commands in one queue. 143 */ 144 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 145 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 146 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 147 148 static void task_complete(struct arb_task *task); 149 150 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 151 152 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 153 154 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 155 156 static const char *print_qprio(enum spdk_nvme_qprio); 157 158 159 static void 160 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 161 { 162 struct ns_entry *entry; 163 const struct spdk_nvme_ctrlr_data *cdata; 164 165 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 166 167 if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || 168 spdk_nvme_ns_get_extended_sector_size(ns) > g_arbitration.io_size_bytes || 169 g_arbitration.io_size_bytes % spdk_nvme_ns_get_extended_sector_size(ns)) { 170 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " 171 "ns size %" PRIu64 " / block size %u for I/O size %u\n", 172 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), 173 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_extended_sector_size(ns), 174 g_arbitration.io_size_bytes); 175 return; 176 } 177 178 entry = malloc(sizeof(struct ns_entry)); 179 if (entry == NULL) { 180 perror("ns_entry malloc"); 181 exit(1); 182 } 183 184 entry->nvme.ctrlr = ctrlr; 185 entry->nvme.ns = ns; 186 187 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; 188 entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 189 190 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 191 192 g_arbitration.num_namespaces++; 193 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 194 } 195 196 static void 197 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) 198 { 199 if (spdk_nvme_cpl_is_error(cpl)) { 200 printf("enable_latency_tracking_complete failed\n"); 201 } 202 g_arbitration.outstanding_commands--; 203 } 204 205 static void 206 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) 207 { 208 int res; 209 union spdk_nvme_intel_feat_latency_tracking latency_tracking; 210 211 if (enable) { 212 latency_tracking.bits.enable = 0x01; 213 } else { 214 latency_tracking.bits.enable = 0x00; 215 } 216 217 res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, 218 latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); 219 if (res) { 220 printf("fail to allocate nvme request.\n"); 221 return; 222 } 223 g_arbitration.outstanding_commands++; 224 225 while (g_arbitration.outstanding_commands) { 226 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 227 } 228 } 229 230 static void 231 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 232 { 233 uint32_t nsid; 234 struct spdk_nvme_ns *ns; 235 struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); 236 union spdk_nvme_cap_register cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 237 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); 238 239 if (entry == NULL) { 240 perror("ctrlr_entry malloc"); 241 exit(1); 242 } 243 244 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 245 246 entry->ctrlr = ctrlr; 247 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 248 249 if ((g_arbitration.latency_tracking_enable != 0) && 250 spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 251 set_latency_tracking_feature(ctrlr, true); 252 } 253 254 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; 255 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 256 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 257 if (ns == NULL) { 258 continue; 259 } 260 register_ns(ctrlr, ns); 261 } 262 263 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR && 264 (cap.bits.ams & SPDK_NVME_CAP_AMS_WRR)) { 265 get_arb_feature(ctrlr); 266 267 if (g_arbitration.arbitration_config != 0) { 268 set_arb_feature(ctrlr); 269 get_arb_feature(ctrlr); 270 } 271 } 272 } 273 274 static __thread unsigned int seed = 0; 275 276 static void 277 submit_single_io(struct ns_worker_ctx *ns_ctx) 278 { 279 struct arb_task *task = NULL; 280 uint64_t offset_in_ios; 281 int rc; 282 struct ns_entry *entry = ns_ctx->entry; 283 284 task = spdk_mempool_get(task_pool); 285 if (!task) { 286 fprintf(stderr, "Failed to get task from task_pool\n"); 287 exit(1); 288 } 289 290 task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); 291 if (!task->buf) { 292 spdk_mempool_put(task_pool, task); 293 fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); 294 exit(1); 295 } 296 297 task->ns_ctx = ns_ctx; 298 299 if (g_arbitration.is_random) { 300 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 301 } else { 302 offset_in_ios = ns_ctx->offset_in_ios++; 303 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 304 ns_ctx->offset_in_ios = 0; 305 } 306 } 307 308 if ((g_arbitration.rw_percentage == 100) || 309 (g_arbitration.rw_percentage != 0 && 310 ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { 311 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, 312 offset_in_ios * entry->io_size_blocks, 313 entry->io_size_blocks, io_complete, task, 0); 314 } else { 315 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, 316 offset_in_ios * entry->io_size_blocks, 317 entry->io_size_blocks, io_complete, task, 0); 318 } 319 320 if (rc != 0) { 321 fprintf(stderr, "starting I/O failed\n"); 322 } else { 323 ns_ctx->current_queue_depth++; 324 } 325 } 326 327 static void 328 task_complete(struct arb_task *task) 329 { 330 struct ns_worker_ctx *ns_ctx; 331 332 ns_ctx = task->ns_ctx; 333 ns_ctx->current_queue_depth--; 334 ns_ctx->io_completed++; 335 336 spdk_dma_free(task->buf); 337 spdk_mempool_put(task_pool, task); 338 339 /* 340 * is_draining indicates when time has expired for the test run 341 * and we are just waiting for the previously submitted I/O 342 * to complete. In this case, do not submit a new I/O to replace 343 * the one just completed. 344 */ 345 if (!ns_ctx->is_draining) { 346 submit_single_io(ns_ctx); 347 } 348 } 349 350 static void 351 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 352 { 353 task_complete((struct arb_task *)ctx); 354 } 355 356 static void 357 check_io(struct ns_worker_ctx *ns_ctx) 358 { 359 spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); 360 } 361 362 static void 363 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 364 { 365 while (queue_depth-- > 0) { 366 submit_single_io(ns_ctx); 367 } 368 } 369 370 static void 371 drain_io(struct ns_worker_ctx *ns_ctx) 372 { 373 ns_ctx->is_draining = true; 374 while (ns_ctx->current_queue_depth > 0) { 375 check_io(ns_ctx); 376 } 377 } 378 379 static int 380 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) 381 { 382 struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; 383 struct spdk_nvme_io_qpair_opts opts; 384 385 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 386 opts.qprio = qprio; 387 388 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); 389 if (!ns_ctx->qpair) { 390 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); 391 return 1; 392 } 393 394 return 0; 395 } 396 397 static void 398 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) 399 { 400 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 401 } 402 403 static void 404 cleanup(uint32_t task_count) 405 { 406 struct ns_entry *entry, *tmp_entry; 407 struct worker_thread *worker, *tmp_worker; 408 struct ns_worker_ctx *ns_ctx, *tmp_ns_ctx; 409 410 TAILQ_FOREACH_SAFE(entry, &g_namespaces, link, tmp_entry) { 411 TAILQ_REMOVE(&g_namespaces, entry, link); 412 free(entry); 413 }; 414 415 TAILQ_FOREACH_SAFE(worker, &g_workers, link, tmp_worker) { 416 TAILQ_REMOVE(&g_workers, worker, link); 417 418 /* ns_worker_ctx is a list in the worker */ 419 TAILQ_FOREACH_SAFE(ns_ctx, &worker->ns_ctx, link, tmp_ns_ctx) { 420 TAILQ_REMOVE(&worker->ns_ctx, ns_ctx, link); 421 free(ns_ctx); 422 } 423 424 free(worker); 425 }; 426 427 if (spdk_mempool_count(task_pool) != (size_t)task_count) { 428 fprintf(stderr, "task_pool count is %zu but should be %u\n", 429 spdk_mempool_count(task_pool), task_count); 430 } 431 spdk_mempool_free(task_pool); 432 } 433 434 static int 435 work_fn(void *arg) 436 { 437 uint64_t tsc_end; 438 struct worker_thread *worker = (struct worker_thread *)arg; 439 struct ns_worker_ctx *ns_ctx; 440 441 printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); 442 443 /* Allocate a queue pair for each namespace. */ 444 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 445 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { 446 printf("ERROR: init_ns_worker_ctx() failed\n"); 447 return 1; 448 } 449 } 450 451 tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; 452 453 /* Submit initial I/O for each namespace. */ 454 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 455 submit_io(ns_ctx, g_arbitration.queue_depth); 456 } 457 458 while (1) { 459 /* 460 * Check for completed I/O for each controller. A new 461 * I/O will be submitted in the io_complete callback 462 * to replace each I/O that is completed. 463 */ 464 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 465 check_io(ns_ctx); 466 } 467 468 if (spdk_get_ticks() > tsc_end) { 469 break; 470 } 471 } 472 473 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 474 drain_io(ns_ctx); 475 cleanup_ns_worker_ctx(ns_ctx); 476 } 477 478 return 0; 479 } 480 481 static void 482 usage(char *program_name) 483 { 484 printf("%s options", program_name); 485 printf("\t\n"); 486 printf("\t[-d DPDK huge memory size in MB]\n"); 487 printf("\t[-q io depth]\n"); 488 printf("\t[-s io size in bytes]\n"); 489 printf("\t[-w io pattern type, must be one of\n"); 490 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 491 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 492 #ifdef DEBUG 493 printf("\t[-L enable debug logging]\n"); 494 #else 495 printf("\t[-L enable debug logging (flag disabled, must reconfigure with --enable-debug)\n"); 496 #endif 497 spdk_log_usage(stdout, "\t\t-L"); 498 printf("\t[-l enable latency tracking, default: disabled]\n"); 499 printf("\t\t(0 - disabled; 1 - enabled)\n"); 500 printf("\t[-t time in seconds]\n"); 501 printf("\t[-c core mask for I/O submission/completion.]\n"); 502 printf("\t\t(default: 0xf - 4 cores)]\n"); 503 printf("\t[-m max completions per poll]\n"); 504 printf("\t\t(default: 0 - unlimited)\n"); 505 printf("\t[-a arbitration mechanism, must be one of below]\n"); 506 printf("\t\t(0, 1, 2)]\n"); 507 printf("\t\t(0: default round robin mechanism)]\n"); 508 printf("\t\t(1: weighted round robin mechanism)]\n"); 509 printf("\t\t(2: vendor specific mechanism)]\n"); 510 printf("\t[-b enable arbitration user configuration, default: disabled]\n"); 511 printf("\t\t(0 - disabled; 1 - enabled)\n"); 512 printf("\t[-n subjected IOs for performance comparison]\n"); 513 printf("\t[-i shared memory group ID]\n"); 514 printf("\t[-r remote NVMe over Fabrics target address]\n"); 515 printf("\t[-g use single file descriptor for DPDK memory segments]\n"); 516 } 517 518 static const char * 519 print_qprio(enum spdk_nvme_qprio qprio) 520 { 521 switch (qprio) { 522 case SPDK_NVME_QPRIO_URGENT: 523 return "urgent priority queue"; 524 case SPDK_NVME_QPRIO_HIGH: 525 return "high priority queue"; 526 case SPDK_NVME_QPRIO_MEDIUM: 527 return "medium priority queue"; 528 case SPDK_NVME_QPRIO_LOW: 529 return "low priority queue"; 530 default: 531 return "invalid priority queue"; 532 } 533 } 534 535 536 static void 537 print_configuration(char *program_name) 538 { 539 printf("%s run with configuration:\n", program_name); 540 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", 541 program_name, 542 g_arbitration.queue_depth, 543 g_arbitration.io_size_bytes, 544 g_arbitration.workload_type, 545 g_arbitration.rw_percentage, 546 g_arbitration.latency_tracking_enable, 547 g_arbitration.time_in_sec, 548 g_arbitration.core_mask, 549 g_arbitration.max_completions, 550 g_arbitration.arbitration_mechanism, 551 g_arbitration.arbitration_config, 552 g_arbitration.io_count, 553 g_arbitration.shm_id); 554 } 555 556 557 static void 558 print_performance(void) 559 { 560 float io_per_second, sent_all_io_in_secs; 561 struct worker_thread *worker; 562 struct ns_worker_ctx *ns_ctx; 563 564 TAILQ_FOREACH(worker, &g_workers, link) { 565 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 566 io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; 567 sent_all_io_in_secs = g_arbitration.io_count / io_per_second; 568 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", 569 ns_ctx->entry->name, worker->lcore, 570 io_per_second, sent_all_io_in_secs, g_arbitration.io_count); 571 } 572 } 573 printf("========================================================\n"); 574 575 printf("\n"); 576 } 577 578 static void 579 print_latency_page(struct ctrlr_entry *entry) 580 { 581 int i; 582 583 printf("\n"); 584 printf("%s\n", entry->name); 585 printf("--------------------------------------------------------\n"); 586 587 for (i = 0; i < 32; i++) { 588 if (entry->latency_page.buckets_32us[i]) 589 printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, 590 entry->latency_page.buckets_32us[i]); 591 } 592 for (i = 0; i < 31; i++) { 593 if (entry->latency_page.buckets_1ms[i]) 594 printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, 595 entry->latency_page.buckets_1ms[i]); 596 } 597 for (i = 0; i < 31; i++) { 598 if (entry->latency_page.buckets_32ms[i]) 599 printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, 600 entry->latency_page.buckets_32ms[i]); 601 } 602 } 603 604 static void 605 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) 606 { 607 struct ctrlr_entry *ctrlr; 608 609 printf("%s Latency Statistics:\n", op_name); 610 printf("========================================================\n"); 611 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 612 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 613 if (spdk_nvme_ctrlr_cmd_get_log_page( 614 ctrlr->ctrlr, log_page, 615 SPDK_NVME_GLOBAL_NS_TAG, 616 &ctrlr->latency_page, 617 sizeof(struct spdk_nvme_intel_rw_latency_page), 618 0, 619 enable_latency_tracking_complete, 620 NULL)) { 621 printf("nvme_ctrlr_cmd_get_log_page() failed\n"); 622 exit(1); 623 } 624 625 g_arbitration.outstanding_commands++; 626 } else { 627 printf("Controller %s: %s latency statistics not supported\n", 628 ctrlr->name, op_name); 629 } 630 } 631 632 while (g_arbitration.outstanding_commands) { 633 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 634 spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); 635 } 636 } 637 638 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 639 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 640 print_latency_page(ctrlr); 641 } 642 } 643 printf("\n"); 644 } 645 646 static void 647 print_stats(void) 648 { 649 print_performance(); 650 if (g_arbitration.latency_tracking_enable) { 651 if (g_arbitration.rw_percentage != 0) { 652 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); 653 } 654 if (g_arbitration.rw_percentage != 100) { 655 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); 656 } 657 } 658 } 659 660 static int 661 parse_args(int argc, char **argv) 662 { 663 const char *workload_type = NULL; 664 int op = 0; 665 bool mix_specified = false; 666 int rc; 667 long int val; 668 669 spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE); 670 snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); 671 672 while ((op = getopt(argc, argv, "a:b:c:d:ghi:l:m:n:q:r:s:t:w:M:L:")) != -1) { 673 switch (op) { 674 case 'c': 675 g_arbitration.core_mask = optarg; 676 break; 677 case 'd': 678 g_dpdk_mem = spdk_strtol(optarg, 10); 679 if (g_dpdk_mem < 0) { 680 fprintf(stderr, "Invalid DPDK memory size\n"); 681 return g_dpdk_mem; 682 } 683 break; 684 case 'w': 685 g_arbitration.workload_type = optarg; 686 break; 687 case 'r': 688 if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { 689 fprintf(stderr, "Error parsing transport address\n"); 690 return 1; 691 } 692 break; 693 case 'g': 694 g_dpdk_mem_single_seg = true; 695 break; 696 case 'h': 697 case '?': 698 usage(argv[0]); 699 return 1; 700 case 'L': 701 rc = spdk_log_set_flag(optarg); 702 if (rc < 0) { 703 fprintf(stderr, "unknown flag\n"); 704 usage(argv[0]); 705 exit(EXIT_FAILURE); 706 } 707 #ifdef DEBUG 708 spdk_log_set_print_level(SPDK_LOG_DEBUG); 709 #endif 710 break; 711 default: 712 val = spdk_strtol(optarg, 10); 713 if (val < 0) { 714 fprintf(stderr, "Converting a string to integer failed\n"); 715 return val; 716 } 717 switch (op) { 718 case 'i': 719 g_arbitration.shm_id = val; 720 break; 721 case 'l': 722 g_arbitration.latency_tracking_enable = val; 723 break; 724 case 'm': 725 g_arbitration.max_completions = val; 726 break; 727 case 'q': 728 g_arbitration.queue_depth = val; 729 break; 730 case 's': 731 g_arbitration.io_size_bytes = val; 732 break; 733 case 't': 734 g_arbitration.time_in_sec = val; 735 break; 736 case 'M': 737 g_arbitration.rw_percentage = val; 738 mix_specified = true; 739 break; 740 case 'a': 741 g_arbitration.arbitration_mechanism = val; 742 break; 743 case 'b': 744 g_arbitration.arbitration_config = val; 745 break; 746 case 'n': 747 g_arbitration.io_count = val; 748 break; 749 default: 750 usage(argv[0]); 751 return -EINVAL; 752 } 753 } 754 } 755 756 workload_type = g_arbitration.workload_type; 757 758 if (strcmp(workload_type, "read") && 759 strcmp(workload_type, "write") && 760 strcmp(workload_type, "randread") && 761 strcmp(workload_type, "randwrite") && 762 strcmp(workload_type, "rw") && 763 strcmp(workload_type, "randrw")) { 764 fprintf(stderr, 765 "io pattern type must be one of\n" 766 "(read, write, randread, randwrite, rw, randrw)\n"); 767 return 1; 768 } 769 770 if (!strcmp(workload_type, "read") || 771 !strcmp(workload_type, "randread")) { 772 g_arbitration.rw_percentage = 100; 773 } 774 775 if (!strcmp(workload_type, "write") || 776 !strcmp(workload_type, "randwrite")) { 777 g_arbitration.rw_percentage = 0; 778 } 779 780 if (!strcmp(workload_type, "read") || 781 !strcmp(workload_type, "randread") || 782 !strcmp(workload_type, "write") || 783 !strcmp(workload_type, "randwrite")) { 784 if (mix_specified) { 785 fprintf(stderr, "Ignoring -M option... Please use -M option" 786 " only when using rw or randrw.\n"); 787 } 788 } 789 790 if (!strcmp(workload_type, "rw") || 791 !strcmp(workload_type, "randrw")) { 792 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { 793 fprintf(stderr, 794 "-M must be specified to value from 0 to 100 " 795 "for rw or randrw.\n"); 796 return 1; 797 } 798 } 799 800 if (!strcmp(workload_type, "read") || 801 !strcmp(workload_type, "write") || 802 !strcmp(workload_type, "rw")) { 803 g_arbitration.is_random = 0; 804 } else { 805 g_arbitration.is_random = 1; 806 } 807 808 if (g_arbitration.latency_tracking_enable != 0 && 809 g_arbitration.latency_tracking_enable != 1) { 810 fprintf(stderr, 811 "-l must be specified to value 0 or 1.\n"); 812 return 1; 813 } 814 815 switch (g_arbitration.arbitration_mechanism) { 816 case SPDK_NVME_CC_AMS_RR: 817 case SPDK_NVME_CC_AMS_WRR: 818 case SPDK_NVME_CC_AMS_VS: 819 break; 820 default: 821 fprintf(stderr, 822 "-a must be specified to value 0, 1, or 7.\n"); 823 return 1; 824 } 825 826 if (g_arbitration.arbitration_config != 0 && 827 g_arbitration.arbitration_config != 1) { 828 fprintf(stderr, 829 "-b must be specified to value 0 or 1.\n"); 830 return 1; 831 } else if (g_arbitration.arbitration_config == 1 && 832 g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { 833 fprintf(stderr, 834 "-a must be specified to 1 (WRR) together.\n"); 835 return 1; 836 } 837 838 return 0; 839 } 840 841 static int 842 register_workers(void) 843 { 844 uint32_t i; 845 struct worker_thread *worker; 846 enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; 847 848 SPDK_ENV_FOREACH_CORE(i) { 849 worker = calloc(1, sizeof(*worker)); 850 if (worker == NULL) { 851 fprintf(stderr, "Unable to allocate worker\n"); 852 return -1; 853 } 854 855 TAILQ_INIT(&worker->ns_ctx); 856 worker->lcore = i; 857 TAILQ_INSERT_TAIL(&g_workers, worker, link); 858 g_arbitration.num_workers++; 859 860 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { 861 qprio++; 862 } 863 864 worker->qprio = qprio & SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK; 865 } 866 867 return 0; 868 } 869 870 static bool 871 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 872 struct spdk_nvme_ctrlr_opts *opts) 873 { 874 /* Update with user specified arbitration configuration */ 875 opts->arb_mechanism = g_arbitration.arbitration_mechanism; 876 877 printf("Attaching to %s\n", trid->traddr); 878 879 return true; 880 } 881 882 static void 883 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 884 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 885 { 886 printf("Attached to %s\n", trid->traddr); 887 888 /* Update with actual arbitration configuration in use */ 889 g_arbitration.arbitration_mechanism = opts->arb_mechanism; 890 891 register_ctrlr(ctrlr); 892 } 893 894 static int 895 register_controllers(void) 896 { 897 printf("Initializing NVMe Controllers\n"); 898 899 if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { 900 fprintf(stderr, "spdk_nvme_probe() failed\n"); 901 return 1; 902 } 903 904 if (g_arbitration.num_namespaces == 0) { 905 fprintf(stderr, "No valid namespaces to continue IO testing\n"); 906 return 1; 907 } 908 909 return 0; 910 } 911 912 static void 913 unregister_controllers(void) 914 { 915 struct ctrlr_entry *entry, *tmp; 916 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 917 918 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 919 TAILQ_REMOVE(&g_controllers, entry, link); 920 if (g_arbitration.latency_tracking_enable && 921 spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 922 set_latency_tracking_feature(entry->ctrlr, false); 923 } 924 spdk_nvme_detach_async(entry->ctrlr, &detach_ctx); 925 free(entry); 926 } 927 928 while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) { 929 ; 930 } 931 } 932 933 static int 934 associate_workers_with_ns(void) 935 { 936 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 937 struct worker_thread *worker = TAILQ_FIRST(&g_workers); 938 struct ns_worker_ctx *ns_ctx; 939 int i, count; 940 941 count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 942 g_arbitration.num_namespaces : g_arbitration.num_workers; 943 944 for (i = 0; i < count; i++) { 945 if (entry == NULL) { 946 break; 947 } 948 949 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 950 if (!ns_ctx) { 951 return 1; 952 } 953 memset(ns_ctx, 0, sizeof(*ns_ctx)); 954 955 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 956 ns_ctx->entry = entry; 957 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 958 959 worker = TAILQ_NEXT(worker, link); 960 if (worker == NULL) { 961 worker = TAILQ_FIRST(&g_workers); 962 } 963 964 entry = TAILQ_NEXT(entry, link); 965 if (entry == NULL) { 966 entry = TAILQ_FIRST(&g_namespaces); 967 } 968 969 } 970 971 return 0; 972 } 973 974 static void 975 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 976 { 977 struct feature *feature = cb_arg; 978 int fid = feature - features; 979 980 if (spdk_nvme_cpl_is_error(cpl)) { 981 printf("get_feature(0x%02X) failed\n", fid); 982 } else { 983 feature->result = cpl->cdw0; 984 feature->valid = true; 985 } 986 987 g_arbitration.outstanding_commands--; 988 } 989 990 static int 991 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) 992 { 993 struct spdk_nvme_cmd cmd = {}; 994 struct feature *feature = &features[fid]; 995 996 feature->valid = false; 997 998 cmd.opc = SPDK_NVME_OPC_GET_FEATURES; 999 cmd.cdw10_bits.get_features.fid = fid; 1000 1001 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, feature); 1002 } 1003 1004 static void 1005 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 1006 { 1007 get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); 1008 1009 g_arbitration.outstanding_commands++; 1010 1011 while (g_arbitration.outstanding_commands) { 1012 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1013 } 1014 1015 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1016 union spdk_nvme_cmd_cdw11 arb; 1017 arb.feat_arbitration.raw = features[SPDK_NVME_FEAT_ARBITRATION].result; 1018 1019 printf("Current Arbitration Configuration\n"); 1020 printf("===========\n"); 1021 printf("Arbitration Burst: "); 1022 if (arb.feat_arbitration.bits.ab == SPDK_NVME_ARBITRATION_BURST_UNLIMITED) { 1023 printf("no limit\n"); 1024 } else { 1025 printf("%u\n", 1u << arb.feat_arbitration.bits.ab); 1026 } 1027 1028 printf("Low Priority Weight: %u\n", arb.feat_arbitration.bits.lpw + 1); 1029 printf("Medium Priority Weight: %u\n", arb.feat_arbitration.bits.mpw + 1); 1030 printf("High Priority Weight: %u\n", arb.feat_arbitration.bits.hpw + 1); 1031 printf("\n"); 1032 } 1033 } 1034 1035 static void 1036 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 1037 { 1038 struct feature *feature = cb_arg; 1039 int fid = feature - features; 1040 1041 if (spdk_nvme_cpl_is_error(cpl)) { 1042 printf("set_feature(0x%02X) failed\n", fid); 1043 feature->valid = false; 1044 } else { 1045 printf("Set Arbitration Feature Successfully\n"); 1046 } 1047 1048 g_arbitration.outstanding_commands--; 1049 } 1050 1051 static int 1052 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 1053 { 1054 int ret; 1055 struct spdk_nvme_cmd cmd = {}; 1056 1057 cmd.opc = SPDK_NVME_OPC_SET_FEATURES; 1058 cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_ARBITRATION; 1059 1060 g_arbitration.outstanding_commands = 0; 1061 1062 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1063 cmd.cdw11_bits.feat_arbitration.bits.ab = SPDK_NVME_ARBITRATION_BURST_UNLIMITED; 1064 cmd.cdw11_bits.feat_arbitration.bits.lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT; 1065 cmd.cdw11_bits.feat_arbitration.bits.mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT; 1066 cmd.cdw11_bits.feat_arbitration.bits.hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT; 1067 } 1068 1069 ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, 1070 set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); 1071 if (ret) { 1072 printf("Set Arbitration Feature: Failed 0x%x\n", ret); 1073 return 1; 1074 } 1075 1076 g_arbitration.outstanding_commands++; 1077 1078 while (g_arbitration.outstanding_commands) { 1079 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1080 } 1081 1082 if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1083 printf("Set Arbitration Feature failed and use default configuration\n"); 1084 } 1085 1086 return 0; 1087 } 1088 1089 int 1090 main(int argc, char **argv) 1091 { 1092 int rc; 1093 struct worker_thread *worker, *main_worker; 1094 unsigned main_core; 1095 char task_pool_name[30]; 1096 uint32_t task_count = 0; 1097 struct spdk_env_opts opts; 1098 1099 rc = parse_args(argc, argv); 1100 if (rc != 0) { 1101 return rc; 1102 } 1103 1104 spdk_env_opts_init(&opts); 1105 opts.name = "arb"; 1106 opts.mem_size = g_dpdk_mem; 1107 opts.hugepage_single_segments = g_dpdk_mem_single_seg; 1108 opts.core_mask = g_arbitration.core_mask; 1109 opts.shm_id = g_arbitration.shm_id; 1110 if (spdk_env_init(&opts) < 0) { 1111 return 1; 1112 } 1113 1114 g_arbitration.tsc_rate = spdk_get_ticks_hz(); 1115 1116 if (register_workers() != 0) { 1117 rc = 1; 1118 goto exit; 1119 } 1120 1121 if (register_controllers() != 0) { 1122 rc = 1; 1123 goto exit; 1124 } 1125 1126 if (associate_workers_with_ns() != 0) { 1127 rc = 1; 1128 goto exit; 1129 } 1130 1131 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); 1132 1133 /* 1134 * The task_count will be dynamically calculated based on the 1135 * number of attached active namespaces, queue depth and number 1136 * of cores (workers) involved in the IO perations. 1137 */ 1138 task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 1139 g_arbitration.num_namespaces : g_arbitration.num_workers; 1140 task_count *= g_arbitration.queue_depth; 1141 1142 task_pool = spdk_mempool_create(task_pool_name, task_count, 1143 sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY); 1144 if (task_pool == NULL) { 1145 fprintf(stderr, "could not initialize task pool\n"); 1146 rc = 1; 1147 goto exit; 1148 } 1149 1150 print_configuration(argv[0]); 1151 1152 printf("Initialization complete. Launching workers.\n"); 1153 1154 /* Launch all of the secondary workers */ 1155 main_core = spdk_env_get_current_core(); 1156 main_worker = NULL; 1157 TAILQ_FOREACH(worker, &g_workers, link) { 1158 if (worker->lcore != main_core) { 1159 spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); 1160 } else { 1161 assert(main_worker == NULL); 1162 main_worker = worker; 1163 } 1164 } 1165 1166 assert(main_worker != NULL); 1167 rc = work_fn(main_worker); 1168 1169 spdk_env_thread_wait_all(); 1170 1171 print_stats(); 1172 1173 exit: 1174 unregister_controllers(); 1175 cleanup(task_count); 1176 1177 spdk_env_fini(); 1178 1179 if (rc != 0) { 1180 fprintf(stderr, "%s: errors occurred\n", argv[0]); 1181 } 1182 1183 return rc; 1184 } 1185