1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/log.h" 9 #include "spdk/nvme.h" 10 #include "spdk/env.h" 11 #include "spdk/string.h" 12 #include "spdk/nvme_intel.h" 13 14 struct ctrlr_entry { 15 struct spdk_nvme_ctrlr *ctrlr; 16 struct spdk_nvme_intel_rw_latency_page latency_page; 17 TAILQ_ENTRY(ctrlr_entry) link; 18 char name[1024]; 19 }; 20 21 struct ns_entry { 22 struct { 23 struct spdk_nvme_ctrlr *ctrlr; 24 struct spdk_nvme_ns *ns; 25 } nvme; 26 27 TAILQ_ENTRY(ns_entry) link; 28 uint32_t io_size_blocks; 29 uint64_t size_in_ios; 30 char name[1024]; 31 }; 32 33 struct ns_worker_ctx { 34 struct ns_entry *entry; 35 uint64_t io_completed; 36 uint64_t current_queue_depth; 37 uint64_t offset_in_ios; 38 bool is_draining; 39 struct spdk_nvme_qpair *qpair; 40 TAILQ_ENTRY(ns_worker_ctx) link; 41 }; 42 43 struct arb_task { 44 struct ns_worker_ctx *ns_ctx; 45 void *buf; 46 }; 47 48 struct worker_thread { 49 TAILQ_HEAD(, ns_worker_ctx) ns_ctx; 50 TAILQ_ENTRY(worker_thread) link; 51 unsigned lcore; 52 enum spdk_nvme_qprio qprio; 53 }; 54 55 struct arb_context { 56 int shm_id; 57 int outstanding_commands; 58 int num_namespaces; 59 int num_workers; 60 int rw_percentage; 61 int is_random; 62 int queue_depth; 63 int time_in_sec; 64 int io_count; 65 uint8_t latency_tracking_enable; 66 uint8_t arbitration_mechanism; 67 uint8_t arbitration_config; 68 uint32_t io_size_bytes; 69 uint32_t max_completions; 70 uint64_t tsc_rate; 71 const char *core_mask; 72 const char *workload_type; 73 }; 74 75 struct feature { 76 uint32_t result; 77 bool valid; 78 }; 79 80 static struct spdk_mempool *task_pool = NULL; 81 82 static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers); 83 static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces); 84 static TAILQ_HEAD(, worker_thread) g_workers = TAILQ_HEAD_INITIALIZER(g_workers); 85 86 static struct feature features[SPDK_NVME_FEAT_ARBITRATION + 1] = {}; 87 static struct spdk_nvme_transport_id g_trid = {}; 88 89 static struct arb_context g_arbitration = { 90 .shm_id = -1, 91 .outstanding_commands = 0, 92 .num_workers = 0, 93 .num_namespaces = 0, 94 .rw_percentage = 50, 95 .queue_depth = 64, 96 .time_in_sec = 60, 97 .io_count = 100000, 98 .latency_tracking_enable = 0, 99 .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, 100 .arbitration_config = 0, 101 .io_size_bytes = 131072, 102 .max_completions = 0, 103 /* Default 4 cores for urgent/high/medium/low */ 104 .core_mask = "0xf", 105 .workload_type = "randrw", 106 }; 107 108 static int g_dpdk_mem = 0; 109 static bool g_dpdk_mem_single_seg = false; 110 111 /* 112 * For weighted round robin arbitration mechanism, the smaller value between 113 * weight and burst will be picked to execute the commands in one queue. 114 */ 115 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 116 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 117 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 118 119 static void task_complete(struct arb_task *task); 120 121 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 122 123 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 124 125 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 126 127 static const char *print_qprio(enum spdk_nvme_qprio); 128 129 130 static void 131 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 132 { 133 struct ns_entry *entry; 134 const struct spdk_nvme_ctrlr_data *cdata; 135 136 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 137 138 if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || 139 spdk_nvme_ns_get_extended_sector_size(ns) > g_arbitration.io_size_bytes || 140 g_arbitration.io_size_bytes % spdk_nvme_ns_get_extended_sector_size(ns)) { 141 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " 142 "ns size %" PRIu64 " / block size %u for I/O size %u\n", 143 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), 144 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_extended_sector_size(ns), 145 g_arbitration.io_size_bytes); 146 return; 147 } 148 149 entry = malloc(sizeof(struct ns_entry)); 150 if (entry == NULL) { 151 perror("ns_entry malloc"); 152 exit(1); 153 } 154 155 entry->nvme.ctrlr = ctrlr; 156 entry->nvme.ns = ns; 157 158 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; 159 entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 160 161 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 162 163 g_arbitration.num_namespaces++; 164 TAILQ_INSERT_TAIL(&g_namespaces, entry, link); 165 } 166 167 static void 168 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) 169 { 170 if (spdk_nvme_cpl_is_error(cpl)) { 171 printf("enable_latency_tracking_complete failed\n"); 172 } 173 g_arbitration.outstanding_commands--; 174 } 175 176 static void 177 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) 178 { 179 int res; 180 union spdk_nvme_intel_feat_latency_tracking latency_tracking; 181 182 if (enable) { 183 latency_tracking.bits.enable = 0x01; 184 } else { 185 latency_tracking.bits.enable = 0x00; 186 } 187 188 res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, 189 latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); 190 if (res) { 191 printf("fail to allocate nvme request.\n"); 192 return; 193 } 194 g_arbitration.outstanding_commands++; 195 196 while (g_arbitration.outstanding_commands) { 197 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 198 } 199 } 200 201 static void 202 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 203 { 204 uint32_t nsid; 205 struct spdk_nvme_ns *ns; 206 struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); 207 union spdk_nvme_cap_register cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); 208 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); 209 210 if (entry == NULL) { 211 perror("ctrlr_entry malloc"); 212 exit(1); 213 } 214 215 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 216 217 entry->ctrlr = ctrlr; 218 TAILQ_INSERT_TAIL(&g_controllers, entry, link); 219 220 if ((g_arbitration.latency_tracking_enable != 0) && 221 spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 222 set_latency_tracking_feature(ctrlr, true); 223 } 224 225 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; 226 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { 227 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 228 if (ns == NULL) { 229 continue; 230 } 231 register_ns(ctrlr, ns); 232 } 233 234 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR && 235 (cap.bits.ams & SPDK_NVME_CAP_AMS_WRR)) { 236 get_arb_feature(ctrlr); 237 238 if (g_arbitration.arbitration_config != 0) { 239 set_arb_feature(ctrlr); 240 get_arb_feature(ctrlr); 241 } 242 } 243 } 244 245 static __thread unsigned int seed = 0; 246 247 static void 248 submit_single_io(struct ns_worker_ctx *ns_ctx) 249 { 250 struct arb_task *task = NULL; 251 uint64_t offset_in_ios; 252 int rc; 253 struct ns_entry *entry = ns_ctx->entry; 254 255 task = spdk_mempool_get(task_pool); 256 if (!task) { 257 fprintf(stderr, "Failed to get task from task_pool\n"); 258 exit(1); 259 } 260 261 task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); 262 if (!task->buf) { 263 spdk_mempool_put(task_pool, task); 264 fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); 265 exit(1); 266 } 267 268 task->ns_ctx = ns_ctx; 269 270 if (g_arbitration.is_random) { 271 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 272 } else { 273 offset_in_ios = ns_ctx->offset_in_ios++; 274 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 275 ns_ctx->offset_in_ios = 0; 276 } 277 } 278 279 if ((g_arbitration.rw_percentage == 100) || 280 (g_arbitration.rw_percentage != 0 && 281 ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { 282 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, 283 offset_in_ios * entry->io_size_blocks, 284 entry->io_size_blocks, io_complete, task, 0); 285 } else { 286 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, 287 offset_in_ios * entry->io_size_blocks, 288 entry->io_size_blocks, io_complete, task, 0); 289 } 290 291 if (rc != 0) { 292 fprintf(stderr, "starting I/O failed\n"); 293 } else { 294 ns_ctx->current_queue_depth++; 295 } 296 } 297 298 static void 299 task_complete(struct arb_task *task) 300 { 301 struct ns_worker_ctx *ns_ctx; 302 303 ns_ctx = task->ns_ctx; 304 ns_ctx->current_queue_depth--; 305 ns_ctx->io_completed++; 306 307 spdk_dma_free(task->buf); 308 spdk_mempool_put(task_pool, task); 309 310 /* 311 * is_draining indicates when time has expired for the test run 312 * and we are just waiting for the previously submitted I/O 313 * to complete. In this case, do not submit a new I/O to replace 314 * the one just completed. 315 */ 316 if (!ns_ctx->is_draining) { 317 submit_single_io(ns_ctx); 318 } 319 } 320 321 static void 322 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 323 { 324 task_complete((struct arb_task *)ctx); 325 } 326 327 static void 328 check_io(struct ns_worker_ctx *ns_ctx) 329 { 330 spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); 331 } 332 333 static void 334 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 335 { 336 while (queue_depth-- > 0) { 337 submit_single_io(ns_ctx); 338 } 339 } 340 341 static void 342 drain_io(struct ns_worker_ctx *ns_ctx) 343 { 344 ns_ctx->is_draining = true; 345 while (ns_ctx->current_queue_depth > 0) { 346 check_io(ns_ctx); 347 } 348 } 349 350 static int 351 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) 352 { 353 struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; 354 struct spdk_nvme_io_qpair_opts opts; 355 356 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 357 opts.qprio = qprio; 358 359 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); 360 if (!ns_ctx->qpair) { 361 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); 362 return 1; 363 } 364 365 return 0; 366 } 367 368 static void 369 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) 370 { 371 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 372 } 373 374 static void 375 cleanup(uint32_t task_count) 376 { 377 struct ns_entry *entry, *tmp_entry; 378 struct worker_thread *worker, *tmp_worker; 379 struct ns_worker_ctx *ns_ctx, *tmp_ns_ctx; 380 381 TAILQ_FOREACH_SAFE(entry, &g_namespaces, link, tmp_entry) { 382 TAILQ_REMOVE(&g_namespaces, entry, link); 383 free(entry); 384 }; 385 386 TAILQ_FOREACH_SAFE(worker, &g_workers, link, tmp_worker) { 387 TAILQ_REMOVE(&g_workers, worker, link); 388 389 /* ns_worker_ctx is a list in the worker */ 390 TAILQ_FOREACH_SAFE(ns_ctx, &worker->ns_ctx, link, tmp_ns_ctx) { 391 TAILQ_REMOVE(&worker->ns_ctx, ns_ctx, link); 392 free(ns_ctx); 393 } 394 395 free(worker); 396 }; 397 398 if (spdk_mempool_count(task_pool) != (size_t)task_count) { 399 fprintf(stderr, "task_pool count is %zu but should be %u\n", 400 spdk_mempool_count(task_pool), task_count); 401 } 402 spdk_mempool_free(task_pool); 403 } 404 405 static int 406 work_fn(void *arg) 407 { 408 uint64_t tsc_end; 409 struct worker_thread *worker = (struct worker_thread *)arg; 410 struct ns_worker_ctx *ns_ctx; 411 412 printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); 413 414 /* Allocate a queue pair for each namespace. */ 415 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 416 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { 417 printf("ERROR: init_ns_worker_ctx() failed\n"); 418 return 1; 419 } 420 } 421 422 tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; 423 424 /* Submit initial I/O for each namespace. */ 425 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 426 submit_io(ns_ctx, g_arbitration.queue_depth); 427 } 428 429 while (1) { 430 /* 431 * Check for completed I/O for each controller. A new 432 * I/O will be submitted in the io_complete callback 433 * to replace each I/O that is completed. 434 */ 435 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 436 check_io(ns_ctx); 437 } 438 439 if (spdk_get_ticks() > tsc_end) { 440 break; 441 } 442 } 443 444 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 445 drain_io(ns_ctx); 446 cleanup_ns_worker_ctx(ns_ctx); 447 } 448 449 return 0; 450 } 451 452 static void 453 usage(char *program_name) 454 { 455 printf("%s options", program_name); 456 printf("\t\n"); 457 printf("\t[-d DPDK huge memory size in MB]\n"); 458 printf("\t[-q io depth]\n"); 459 printf("\t[-o io size in bytes]\n"); 460 printf("\t[-w io pattern type, must be one of\n"); 461 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 462 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 463 #ifdef DEBUG 464 printf("\t[-L enable debug logging]\n"); 465 #else 466 printf("\t[-L enable debug logging (flag disabled, must reconfigure with --enable-debug)]\n"); 467 #endif 468 spdk_log_usage(stdout, "\t\t-L"); 469 printf("\t[-l enable latency tracking, default: disabled]\n"); 470 printf("\t\t(0 - disabled; 1 - enabled)\n"); 471 printf("\t[-t time in seconds]\n"); 472 printf("\t[-c core mask for I/O submission/completion.]\n"); 473 printf("\t\t(default: 0xf - 4 cores)]\n"); 474 printf("\t[-m max completions per poll]\n"); 475 printf("\t\t(default: 0 - unlimited)\n"); 476 printf("\t[-a arbitration mechanism, must be one of below]\n"); 477 printf("\t\t(0, 1, 2)]\n"); 478 printf("\t\t(0: default round robin mechanism)]\n"); 479 printf("\t\t(1: weighted round robin mechanism)]\n"); 480 printf("\t\t(2: vendor specific mechanism)]\n"); 481 printf("\t[-b enable arbitration user configuration, default: disabled]\n"); 482 printf("\t\t(0 - disabled; 1 - enabled)\n"); 483 printf("\t[-n subjected IOs for performance comparison]\n"); 484 printf("\t[-i shared memory group ID]\n"); 485 printf("\t[-r remote NVMe over Fabrics target address]\n"); 486 printf("\t[-g use single file descriptor for DPDK memory segments]\n"); 487 } 488 489 static const char * 490 print_qprio(enum spdk_nvme_qprio qprio) 491 { 492 switch (qprio) { 493 case SPDK_NVME_QPRIO_URGENT: 494 return "urgent priority queue"; 495 case SPDK_NVME_QPRIO_HIGH: 496 return "high priority queue"; 497 case SPDK_NVME_QPRIO_MEDIUM: 498 return "medium priority queue"; 499 case SPDK_NVME_QPRIO_LOW: 500 return "low priority queue"; 501 default: 502 return "invalid priority queue"; 503 } 504 } 505 506 507 static void 508 print_configuration(char *program_name) 509 { 510 printf("%s run with configuration:\n", program_name); 511 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", 512 program_name, 513 g_arbitration.queue_depth, 514 g_arbitration.io_size_bytes, 515 g_arbitration.workload_type, 516 g_arbitration.rw_percentage, 517 g_arbitration.latency_tracking_enable, 518 g_arbitration.time_in_sec, 519 g_arbitration.core_mask, 520 g_arbitration.max_completions, 521 g_arbitration.arbitration_mechanism, 522 g_arbitration.arbitration_config, 523 g_arbitration.io_count, 524 g_arbitration.shm_id); 525 } 526 527 528 static void 529 print_performance(void) 530 { 531 float io_per_second, sent_all_io_in_secs; 532 struct worker_thread *worker; 533 struct ns_worker_ctx *ns_ctx; 534 535 TAILQ_FOREACH(worker, &g_workers, link) { 536 TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) { 537 io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; 538 sent_all_io_in_secs = g_arbitration.io_count / io_per_second; 539 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", 540 ns_ctx->entry->name, worker->lcore, 541 io_per_second, sent_all_io_in_secs, g_arbitration.io_count); 542 } 543 } 544 printf("========================================================\n"); 545 546 printf("\n"); 547 } 548 549 static void 550 print_latency_page(struct ctrlr_entry *entry) 551 { 552 int i; 553 554 printf("\n"); 555 printf("%s\n", entry->name); 556 printf("--------------------------------------------------------\n"); 557 558 for (i = 0; i < 32; i++) { 559 if (entry->latency_page.buckets_32us[i]) 560 printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, 561 entry->latency_page.buckets_32us[i]); 562 } 563 for (i = 0; i < 31; i++) { 564 if (entry->latency_page.buckets_1ms[i]) 565 printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, 566 entry->latency_page.buckets_1ms[i]); 567 } 568 for (i = 0; i < 31; i++) { 569 if (entry->latency_page.buckets_32ms[i]) 570 printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, 571 entry->latency_page.buckets_32ms[i]); 572 } 573 } 574 575 static void 576 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) 577 { 578 struct ctrlr_entry *ctrlr; 579 580 printf("%s Latency Statistics:\n", op_name); 581 printf("========================================================\n"); 582 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 583 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 584 if (spdk_nvme_ctrlr_cmd_get_log_page( 585 ctrlr->ctrlr, log_page, 586 SPDK_NVME_GLOBAL_NS_TAG, 587 &ctrlr->latency_page, 588 sizeof(struct spdk_nvme_intel_rw_latency_page), 589 0, 590 enable_latency_tracking_complete, 591 NULL)) { 592 printf("nvme_ctrlr_cmd_get_log_page() failed\n"); 593 exit(1); 594 } 595 596 g_arbitration.outstanding_commands++; 597 } else { 598 printf("Controller %s: %s latency statistics not supported\n", 599 ctrlr->name, op_name); 600 } 601 } 602 603 while (g_arbitration.outstanding_commands) { 604 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 605 spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); 606 } 607 } 608 609 TAILQ_FOREACH(ctrlr, &g_controllers, link) { 610 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 611 print_latency_page(ctrlr); 612 } 613 } 614 printf("\n"); 615 } 616 617 static void 618 print_stats(void) 619 { 620 print_performance(); 621 if (g_arbitration.latency_tracking_enable) { 622 if (g_arbitration.rw_percentage != 0) { 623 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); 624 } 625 if (g_arbitration.rw_percentage != 100) { 626 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); 627 } 628 } 629 } 630 631 static int 632 parse_args(int argc, char **argv) 633 { 634 const char *workload_type = NULL; 635 int op = 0; 636 bool mix_specified = false; 637 int rc; 638 long int val; 639 640 spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE); 641 snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); 642 643 while ((op = getopt(argc, argv, "a:b:c:d:ghi:l:m:n:o:q:r:t:w:M:L:")) != -1) { 644 switch (op) { 645 case 'c': 646 g_arbitration.core_mask = optarg; 647 break; 648 case 'd': 649 g_dpdk_mem = spdk_strtol(optarg, 10); 650 if (g_dpdk_mem < 0) { 651 fprintf(stderr, "Invalid DPDK memory size\n"); 652 return g_dpdk_mem; 653 } 654 break; 655 case 'w': 656 g_arbitration.workload_type = optarg; 657 break; 658 case 'r': 659 if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { 660 fprintf(stderr, "Error parsing transport address\n"); 661 return 1; 662 } 663 break; 664 case 'g': 665 g_dpdk_mem_single_seg = true; 666 break; 667 case 'h': 668 case '?': 669 usage(argv[0]); 670 return 1; 671 case 'L': 672 rc = spdk_log_set_flag(optarg); 673 if (rc < 0) { 674 fprintf(stderr, "unknown flag\n"); 675 usage(argv[0]); 676 exit(EXIT_FAILURE); 677 } 678 #ifdef DEBUG 679 spdk_log_set_print_level(SPDK_LOG_DEBUG); 680 #endif 681 break; 682 default: 683 val = spdk_strtol(optarg, 10); 684 if (val < 0) { 685 fprintf(stderr, "Converting a string to integer failed\n"); 686 return val; 687 } 688 switch (op) { 689 case 'i': 690 g_arbitration.shm_id = val; 691 break; 692 case 'l': 693 g_arbitration.latency_tracking_enable = val; 694 break; 695 case 'm': 696 g_arbitration.max_completions = val; 697 break; 698 case 'q': 699 g_arbitration.queue_depth = val; 700 break; 701 case 'o': 702 g_arbitration.io_size_bytes = val; 703 break; 704 case 't': 705 g_arbitration.time_in_sec = val; 706 break; 707 case 'M': 708 g_arbitration.rw_percentage = val; 709 mix_specified = true; 710 break; 711 case 'a': 712 g_arbitration.arbitration_mechanism = val; 713 break; 714 case 'b': 715 g_arbitration.arbitration_config = val; 716 break; 717 case 'n': 718 g_arbitration.io_count = val; 719 break; 720 default: 721 usage(argv[0]); 722 return -EINVAL; 723 } 724 } 725 } 726 727 workload_type = g_arbitration.workload_type; 728 729 if (strcmp(workload_type, "read") && 730 strcmp(workload_type, "write") && 731 strcmp(workload_type, "randread") && 732 strcmp(workload_type, "randwrite") && 733 strcmp(workload_type, "rw") && 734 strcmp(workload_type, "randrw")) { 735 fprintf(stderr, 736 "io pattern type must be one of\n" 737 "(read, write, randread, randwrite, rw, randrw)\n"); 738 return 1; 739 } 740 741 if (!strcmp(workload_type, "read") || 742 !strcmp(workload_type, "randread")) { 743 g_arbitration.rw_percentage = 100; 744 } 745 746 if (!strcmp(workload_type, "write") || 747 !strcmp(workload_type, "randwrite")) { 748 g_arbitration.rw_percentage = 0; 749 } 750 751 if (!strcmp(workload_type, "read") || 752 !strcmp(workload_type, "randread") || 753 !strcmp(workload_type, "write") || 754 !strcmp(workload_type, "randwrite")) { 755 if (mix_specified) { 756 fprintf(stderr, "Ignoring -M option... Please use -M option" 757 " only when using rw or randrw.\n"); 758 } 759 } 760 761 if (!strcmp(workload_type, "rw") || 762 !strcmp(workload_type, "randrw")) { 763 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { 764 fprintf(stderr, 765 "-M must be specified to value from 0 to 100 " 766 "for rw or randrw.\n"); 767 return 1; 768 } 769 } 770 771 if (!strcmp(workload_type, "read") || 772 !strcmp(workload_type, "write") || 773 !strcmp(workload_type, "rw")) { 774 g_arbitration.is_random = 0; 775 } else { 776 g_arbitration.is_random = 1; 777 } 778 779 if (g_arbitration.latency_tracking_enable != 0 && 780 g_arbitration.latency_tracking_enable != 1) { 781 fprintf(stderr, 782 "-l must be specified to value 0 or 1.\n"); 783 return 1; 784 } 785 786 switch (g_arbitration.arbitration_mechanism) { 787 case SPDK_NVME_CC_AMS_RR: 788 case SPDK_NVME_CC_AMS_WRR: 789 case SPDK_NVME_CC_AMS_VS: 790 break; 791 default: 792 fprintf(stderr, 793 "-a must be specified to value 0, 1, or 7.\n"); 794 return 1; 795 } 796 797 if (g_arbitration.arbitration_config != 0 && 798 g_arbitration.arbitration_config != 1) { 799 fprintf(stderr, 800 "-b must be specified to value 0 or 1.\n"); 801 return 1; 802 } else if (g_arbitration.arbitration_config == 1 && 803 g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { 804 fprintf(stderr, 805 "-a must be specified to 1 (WRR) together.\n"); 806 return 1; 807 } 808 809 return 0; 810 } 811 812 static int 813 register_workers(void) 814 { 815 uint32_t i; 816 struct worker_thread *worker; 817 enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; 818 819 SPDK_ENV_FOREACH_CORE(i) { 820 worker = calloc(1, sizeof(*worker)); 821 if (worker == NULL) { 822 fprintf(stderr, "Unable to allocate worker\n"); 823 return -1; 824 } 825 826 TAILQ_INIT(&worker->ns_ctx); 827 worker->lcore = i; 828 TAILQ_INSERT_TAIL(&g_workers, worker, link); 829 g_arbitration.num_workers++; 830 831 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { 832 qprio++; 833 } 834 835 worker->qprio = qprio & SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK; 836 } 837 838 return 0; 839 } 840 841 static bool 842 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 843 struct spdk_nvme_ctrlr_opts *opts) 844 { 845 /* Update with user specified arbitration configuration */ 846 opts->arb_mechanism = g_arbitration.arbitration_mechanism; 847 848 printf("Attaching to %s\n", trid->traddr); 849 850 return true; 851 } 852 853 static void 854 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 855 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 856 { 857 printf("Attached to %s\n", trid->traddr); 858 859 /* Update with actual arbitration configuration in use */ 860 g_arbitration.arbitration_mechanism = opts->arb_mechanism; 861 862 register_ctrlr(ctrlr); 863 } 864 865 static int 866 register_controllers(void) 867 { 868 printf("Initializing NVMe Controllers\n"); 869 870 if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { 871 fprintf(stderr, "spdk_nvme_probe() failed\n"); 872 return 1; 873 } 874 875 if (g_arbitration.num_namespaces == 0) { 876 fprintf(stderr, "No valid namespaces to continue IO testing\n"); 877 return 1; 878 } 879 880 return 0; 881 } 882 883 static void 884 unregister_controllers(void) 885 { 886 struct ctrlr_entry *entry, *tmp; 887 struct spdk_nvme_detach_ctx *detach_ctx = NULL; 888 889 TAILQ_FOREACH_SAFE(entry, &g_controllers, link, tmp) { 890 TAILQ_REMOVE(&g_controllers, entry, link); 891 if (g_arbitration.latency_tracking_enable && 892 spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 893 set_latency_tracking_feature(entry->ctrlr, false); 894 } 895 spdk_nvme_detach_async(entry->ctrlr, &detach_ctx); 896 free(entry); 897 } 898 899 while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) { 900 ; 901 } 902 } 903 904 static int 905 associate_workers_with_ns(void) 906 { 907 struct ns_entry *entry = TAILQ_FIRST(&g_namespaces); 908 struct worker_thread *worker = TAILQ_FIRST(&g_workers); 909 struct ns_worker_ctx *ns_ctx; 910 int i, count; 911 912 count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 913 g_arbitration.num_namespaces : g_arbitration.num_workers; 914 915 for (i = 0; i < count; i++) { 916 if (entry == NULL) { 917 break; 918 } 919 920 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 921 if (!ns_ctx) { 922 return 1; 923 } 924 memset(ns_ctx, 0, sizeof(*ns_ctx)); 925 926 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 927 ns_ctx->entry = entry; 928 TAILQ_INSERT_TAIL(&worker->ns_ctx, ns_ctx, link); 929 930 worker = TAILQ_NEXT(worker, link); 931 if (worker == NULL) { 932 worker = TAILQ_FIRST(&g_workers); 933 } 934 935 entry = TAILQ_NEXT(entry, link); 936 if (entry == NULL) { 937 entry = TAILQ_FIRST(&g_namespaces); 938 } 939 940 } 941 942 return 0; 943 } 944 945 static void 946 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 947 { 948 struct feature *feature = cb_arg; 949 int fid = feature - features; 950 951 if (spdk_nvme_cpl_is_error(cpl)) { 952 printf("get_feature(0x%02X) failed\n", fid); 953 } else { 954 feature->result = cpl->cdw0; 955 feature->valid = true; 956 } 957 958 g_arbitration.outstanding_commands--; 959 } 960 961 static int 962 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) 963 { 964 struct spdk_nvme_cmd cmd = {}; 965 struct feature *feature = &features[fid]; 966 967 feature->valid = false; 968 969 cmd.opc = SPDK_NVME_OPC_GET_FEATURES; 970 cmd.cdw10_bits.get_features.fid = fid; 971 972 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, feature); 973 } 974 975 static void 976 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 977 { 978 get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); 979 980 g_arbitration.outstanding_commands++; 981 982 while (g_arbitration.outstanding_commands) { 983 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 984 } 985 986 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 987 union spdk_nvme_cmd_cdw11 arb; 988 arb.feat_arbitration.raw = features[SPDK_NVME_FEAT_ARBITRATION].result; 989 990 printf("Current Arbitration Configuration\n"); 991 printf("===========\n"); 992 printf("Arbitration Burst: "); 993 if (arb.feat_arbitration.bits.ab == SPDK_NVME_ARBITRATION_BURST_UNLIMITED) { 994 printf("no limit\n"); 995 } else { 996 printf("%u\n", 1u << arb.feat_arbitration.bits.ab); 997 } 998 999 printf("Low Priority Weight: %u\n", arb.feat_arbitration.bits.lpw + 1); 1000 printf("Medium Priority Weight: %u\n", arb.feat_arbitration.bits.mpw + 1); 1001 printf("High Priority Weight: %u\n", arb.feat_arbitration.bits.hpw + 1); 1002 printf("\n"); 1003 } 1004 } 1005 1006 static void 1007 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 1008 { 1009 struct feature *feature = cb_arg; 1010 int fid = feature - features; 1011 1012 if (spdk_nvme_cpl_is_error(cpl)) { 1013 printf("set_feature(0x%02X) failed\n", fid); 1014 feature->valid = false; 1015 } else { 1016 printf("Set Arbitration Feature Successfully\n"); 1017 } 1018 1019 g_arbitration.outstanding_commands--; 1020 } 1021 1022 static int 1023 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 1024 { 1025 int ret; 1026 struct spdk_nvme_cmd cmd = {}; 1027 1028 cmd.opc = SPDK_NVME_OPC_SET_FEATURES; 1029 cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_ARBITRATION; 1030 1031 g_arbitration.outstanding_commands = 0; 1032 1033 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1034 cmd.cdw11_bits.feat_arbitration.bits.ab = SPDK_NVME_ARBITRATION_BURST_UNLIMITED; 1035 cmd.cdw11_bits.feat_arbitration.bits.lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT; 1036 cmd.cdw11_bits.feat_arbitration.bits.mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT; 1037 cmd.cdw11_bits.feat_arbitration.bits.hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT; 1038 } 1039 1040 ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, 1041 set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); 1042 if (ret) { 1043 printf("Set Arbitration Feature: Failed 0x%x\n", ret); 1044 return 1; 1045 } 1046 1047 g_arbitration.outstanding_commands++; 1048 1049 while (g_arbitration.outstanding_commands) { 1050 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1051 } 1052 1053 if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1054 printf("Set Arbitration Feature failed and use default configuration\n"); 1055 } 1056 1057 return 0; 1058 } 1059 1060 int 1061 main(int argc, char **argv) 1062 { 1063 int rc; 1064 struct worker_thread *worker, *main_worker; 1065 unsigned main_core; 1066 char task_pool_name[30]; 1067 uint32_t task_count = 0; 1068 struct spdk_env_opts opts; 1069 1070 rc = parse_args(argc, argv); 1071 if (rc != 0) { 1072 return rc; 1073 } 1074 1075 opts.opts_size = sizeof(opts); 1076 spdk_env_opts_init(&opts); 1077 opts.name = "arb"; 1078 opts.mem_size = g_dpdk_mem; 1079 opts.hugepage_single_segments = g_dpdk_mem_single_seg; 1080 opts.core_mask = g_arbitration.core_mask; 1081 opts.shm_id = g_arbitration.shm_id; 1082 if (spdk_env_init(&opts) < 0) { 1083 return 1; 1084 } 1085 1086 g_arbitration.tsc_rate = spdk_get_ticks_hz(); 1087 1088 if (register_workers() != 0) { 1089 rc = 1; 1090 goto exit; 1091 } 1092 1093 if (register_controllers() != 0) { 1094 rc = 1; 1095 goto exit; 1096 } 1097 1098 if (associate_workers_with_ns() != 0) { 1099 rc = 1; 1100 goto exit; 1101 } 1102 1103 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); 1104 1105 /* 1106 * The task_count will be dynamically calculated based on the 1107 * number of attached active namespaces, queue depth and number 1108 * of cores (workers) involved in the IO perations. 1109 */ 1110 task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 1111 g_arbitration.num_namespaces : g_arbitration.num_workers; 1112 task_count *= g_arbitration.queue_depth; 1113 1114 task_pool = spdk_mempool_create(task_pool_name, task_count, 1115 sizeof(struct arb_task), 0, SPDK_ENV_NUMA_ID_ANY); 1116 if (task_pool == NULL) { 1117 fprintf(stderr, "could not initialize task pool\n"); 1118 rc = 1; 1119 goto exit; 1120 } 1121 1122 print_configuration(argv[0]); 1123 1124 printf("Initialization complete. Launching workers.\n"); 1125 1126 /* Launch all of the secondary workers */ 1127 main_core = spdk_env_get_current_core(); 1128 main_worker = NULL; 1129 TAILQ_FOREACH(worker, &g_workers, link) { 1130 if (worker->lcore != main_core) { 1131 spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); 1132 } else { 1133 assert(main_worker == NULL); 1134 main_worker = worker; 1135 } 1136 } 1137 1138 assert(main_worker != NULL); 1139 rc = work_fn(main_worker); 1140 1141 spdk_env_thread_wait_all(); 1142 1143 print_stats(); 1144 1145 exit: 1146 unregister_controllers(); 1147 cleanup(task_count); 1148 1149 spdk_env_fini(); 1150 1151 if (rc != 0) { 1152 fprintf(stderr, "%s: errors occurred\n", argv[0]); 1153 } 1154 1155 return rc; 1156 } 1157