1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/nvme.h" 37 #include "spdk/env.h" 38 #include "spdk/string.h" 39 #include "spdk/nvme_intel.h" 40 41 struct ctrlr_entry { 42 struct spdk_nvme_ctrlr *ctrlr; 43 struct spdk_nvme_intel_rw_latency_page latency_page; 44 struct ctrlr_entry *next; 45 char name[1024]; 46 }; 47 48 struct ns_entry { 49 struct { 50 struct spdk_nvme_ctrlr *ctrlr; 51 struct spdk_nvme_ns *ns; 52 } nvme; 53 54 struct ns_entry *next; 55 uint32_t io_size_blocks; 56 uint64_t size_in_ios; 57 char name[1024]; 58 }; 59 60 struct ns_worker_ctx { 61 struct ns_entry *entry; 62 uint64_t io_completed; 63 uint64_t current_queue_depth; 64 uint64_t offset_in_ios; 65 bool is_draining; 66 struct spdk_nvme_qpair *qpair; 67 struct ns_worker_ctx *next; 68 }; 69 70 struct arb_task { 71 struct ns_worker_ctx *ns_ctx; 72 void *buf; 73 }; 74 75 struct worker_thread { 76 struct ns_worker_ctx *ns_ctx; 77 struct worker_thread *next; 78 unsigned lcore; 79 enum spdk_nvme_qprio qprio; 80 }; 81 82 struct arb_context { 83 int shm_id; 84 int outstanding_commands; 85 int num_namespaces; 86 int num_workers; 87 int rw_percentage; 88 int is_random; 89 int queue_depth; 90 int time_in_sec; 91 int io_count; 92 uint8_t latency_tracking_enable; 93 uint8_t arbitration_mechanism; 94 uint8_t arbitration_config; 95 uint32_t io_size_bytes; 96 uint32_t max_completions; 97 uint64_t tsc_rate; 98 const char *core_mask; 99 const char *workload_type; 100 }; 101 102 struct feature { 103 uint32_t result; 104 bool valid; 105 }; 106 107 static struct spdk_mempool *task_pool = NULL; 108 109 static struct ctrlr_entry *g_controllers = NULL; 110 static struct ns_entry *g_namespaces = NULL; 111 static struct worker_thread *g_workers = NULL; 112 113 static struct feature features[256]; 114 115 static struct arb_context g_arbitration = { 116 .shm_id = -1, 117 .outstanding_commands = 0, 118 .num_workers = 0, 119 .num_namespaces = 0, 120 .rw_percentage = 50, 121 .queue_depth = 64, 122 .time_in_sec = 60, 123 .io_count = 100000, 124 .latency_tracking_enable = 0, 125 .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, 126 .arbitration_config = 0, 127 .io_size_bytes = 131072, 128 .max_completions = 0, 129 /* Default 4 cores for urgent/high/medium/low */ 130 .core_mask = "0xf", 131 .workload_type = "randrw", 132 }; 133 134 /* 135 * For weighted round robin arbitration mechanism, the smaller value between 136 * weight and burst will be picked to execute the commands in one queue. 137 */ 138 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 139 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 140 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 141 #define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */ 142 143 /* 144 * Description of dword for priority weight and arbitration burst 145 * ------------------------------------------------------------------------------ 146 * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00 147 * ------------------------------------------------------------------------------ 148 * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst 149 * ------------------------------------------------------------------------------ 150 * 151 * The priority weights are zero based value. 152 */ 153 #define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24 154 #define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16 155 #define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8 156 #define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF 157 #define SPDK_NVME_ARB_BURST_MASK 0x7 158 159 #define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1) 160 161 static void task_complete(struct arb_task *task); 162 163 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 164 165 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 166 167 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); 168 169 static const char *print_qprio(enum spdk_nvme_qprio); 170 171 172 static void 173 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) 174 { 175 struct ns_entry *entry; 176 const struct spdk_nvme_ctrlr_data *cdata; 177 178 cdata = spdk_nvme_ctrlr_get_data(ctrlr); 179 180 if (!spdk_nvme_ns_is_active(ns)) { 181 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", 182 cdata->mn, cdata->sn, 183 spdk_nvme_ns_get_id(ns)); 184 return; 185 } 186 187 if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || 188 spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) { 189 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " 190 "ns size %" PRIu64 " / block size %u for I/O size %u\n", 191 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), 192 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), 193 g_arbitration.io_size_bytes); 194 return; 195 } 196 197 entry = malloc(sizeof(struct ns_entry)); 198 if (entry == NULL) { 199 perror("ns_entry malloc"); 200 exit(1); 201 } 202 203 entry->nvme.ctrlr = ctrlr; 204 entry->nvme.ns = ns; 205 206 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; 207 entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); 208 209 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 210 211 g_arbitration.num_namespaces++; 212 entry->next = g_namespaces; 213 g_namespaces = entry; 214 } 215 216 static void 217 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) 218 { 219 if (spdk_nvme_cpl_is_error(cpl)) { 220 printf("enable_latency_tracking_complete failed\n"); 221 } 222 g_arbitration.outstanding_commands--; 223 } 224 225 static void 226 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) 227 { 228 int res; 229 union spdk_nvme_intel_feat_latency_tracking latency_tracking; 230 231 if (enable) { 232 latency_tracking.bits.enable = 0x01; 233 } else { 234 latency_tracking.bits.enable = 0x00; 235 } 236 237 res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, 238 latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); 239 if (res) { 240 printf("fail to allocate nvme request.\n"); 241 return; 242 } 243 g_arbitration.outstanding_commands++; 244 245 while (g_arbitration.outstanding_commands) { 246 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 247 } 248 } 249 250 static void 251 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 252 { 253 int nsid, num_ns; 254 struct spdk_nvme_ns *ns; 255 struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); 256 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); 257 258 if (entry == NULL) { 259 perror("ctrlr_entry malloc"); 260 exit(1); 261 } 262 263 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 264 265 entry->ctrlr = ctrlr; 266 entry->next = g_controllers; 267 g_controllers = entry; 268 269 if ((g_arbitration.latency_tracking_enable != 0) && 270 spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 271 set_latency_tracking_feature(ctrlr, true); 272 } 273 274 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); 275 for (nsid = 1; nsid <= num_ns; nsid++) { 276 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); 277 if (ns == NULL) { 278 continue; 279 } 280 register_ns(ctrlr, ns); 281 } 282 283 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { 284 get_arb_feature(ctrlr); 285 286 if (g_arbitration.arbitration_config != 0) { 287 set_arb_feature(ctrlr); 288 get_arb_feature(ctrlr); 289 } 290 } 291 } 292 293 static __thread unsigned int seed = 0; 294 295 static void 296 submit_single_io(struct ns_worker_ctx *ns_ctx) 297 { 298 struct arb_task *task = NULL; 299 uint64_t offset_in_ios; 300 int rc; 301 struct ns_entry *entry = ns_ctx->entry; 302 303 task = spdk_mempool_get(task_pool); 304 if (!task) { 305 fprintf(stderr, "Failed to get task from task_pool\n"); 306 exit(1); 307 } 308 309 task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); 310 if (!task->buf) { 311 spdk_mempool_put(task_pool, task); 312 fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); 313 exit(1); 314 } 315 316 task->ns_ctx = ns_ctx; 317 318 if (g_arbitration.is_random) { 319 offset_in_ios = rand_r(&seed) % entry->size_in_ios; 320 } else { 321 offset_in_ios = ns_ctx->offset_in_ios++; 322 if (ns_ctx->offset_in_ios == entry->size_in_ios) { 323 ns_ctx->offset_in_ios = 0; 324 } 325 } 326 327 if ((g_arbitration.rw_percentage == 100) || 328 (g_arbitration.rw_percentage != 0 && 329 ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { 330 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, 331 offset_in_ios * entry->io_size_blocks, 332 entry->io_size_blocks, io_complete, task, 0); 333 } else { 334 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, 335 offset_in_ios * entry->io_size_blocks, 336 entry->io_size_blocks, io_complete, task, 0); 337 } 338 339 if (rc != 0) { 340 fprintf(stderr, "starting I/O failed\n"); 341 } 342 343 ns_ctx->current_queue_depth++; 344 } 345 346 static void 347 task_complete(struct arb_task *task) 348 { 349 struct ns_worker_ctx *ns_ctx; 350 351 ns_ctx = task->ns_ctx; 352 ns_ctx->current_queue_depth--; 353 ns_ctx->io_completed++; 354 355 spdk_dma_free(task->buf); 356 spdk_mempool_put(task_pool, task); 357 358 /* 359 * is_draining indicates when time has expired for the test run 360 * and we are just waiting for the previously submitted I/O 361 * to complete. In this case, do not submit a new I/O to replace 362 * the one just completed. 363 */ 364 if (!ns_ctx->is_draining) { 365 submit_single_io(ns_ctx); 366 } 367 } 368 369 static void 370 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 371 { 372 task_complete((struct arb_task *)ctx); 373 } 374 375 static void 376 check_io(struct ns_worker_ctx *ns_ctx) 377 { 378 spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); 379 } 380 381 static void 382 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) 383 { 384 while (queue_depth-- > 0) { 385 submit_single_io(ns_ctx); 386 } 387 } 388 389 static void 390 drain_io(struct ns_worker_ctx *ns_ctx) 391 { 392 ns_ctx->is_draining = true; 393 while (ns_ctx->current_queue_depth > 0) { 394 check_io(ns_ctx); 395 } 396 } 397 398 static int 399 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) 400 { 401 struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; 402 struct spdk_nvme_io_qpair_opts opts; 403 404 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); 405 opts.qprio = qprio; 406 407 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); 408 if (!ns_ctx->qpair) { 409 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); 410 return 1; 411 } 412 413 return 0; 414 } 415 416 static void 417 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) 418 { 419 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); 420 } 421 422 static void 423 cleanup(uint32_t task_count) 424 { 425 struct ns_entry *entry = g_namespaces; 426 struct ns_entry *next_entry = NULL; 427 struct worker_thread *worker = g_workers; 428 struct worker_thread *next_worker = NULL; 429 430 while (entry) { 431 next_entry = entry->next; 432 free(entry); 433 entry = next_entry; 434 }; 435 436 while (worker) { 437 next_worker = worker->next; 438 free(worker->ns_ctx); 439 free(worker); 440 worker = next_worker; 441 }; 442 443 if (spdk_mempool_count(task_pool) != (size_t)task_count) { 444 fprintf(stderr, "task_pool count is %zu but should be %u\n", 445 spdk_mempool_count(task_pool), task_count); 446 } 447 spdk_mempool_free(task_pool); 448 } 449 450 static int 451 work_fn(void *arg) 452 { 453 uint64_t tsc_end; 454 struct worker_thread *worker = (struct worker_thread *)arg; 455 struct ns_worker_ctx *ns_ctx = NULL; 456 457 printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); 458 459 /* Allocate a queue pair for each namespace. */ 460 ns_ctx = worker->ns_ctx; 461 while (ns_ctx != NULL) { 462 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { 463 printf("ERROR: init_ns_worker_ctx() failed\n"); 464 return 1; 465 } 466 ns_ctx = ns_ctx->next; 467 } 468 469 tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; 470 471 /* Submit initial I/O for each namespace. */ 472 ns_ctx = worker->ns_ctx; 473 474 while (ns_ctx != NULL) { 475 submit_io(ns_ctx, g_arbitration.queue_depth); 476 ns_ctx = ns_ctx->next; 477 } 478 479 while (1) { 480 /* 481 * Check for completed I/O for each controller. A new 482 * I/O will be submitted in the io_complete callback 483 * to replace each I/O that is completed. 484 */ 485 ns_ctx = worker->ns_ctx; 486 while (ns_ctx != NULL) { 487 check_io(ns_ctx); 488 ns_ctx = ns_ctx->next; 489 } 490 491 if (spdk_get_ticks() > tsc_end) { 492 break; 493 } 494 } 495 496 ns_ctx = worker->ns_ctx; 497 while (ns_ctx != NULL) { 498 drain_io(ns_ctx); 499 cleanup_ns_worker_ctx(ns_ctx); 500 ns_ctx = ns_ctx->next; 501 } 502 503 return 0; 504 } 505 506 static void 507 usage(char *program_name) 508 { 509 printf("%s options", program_name); 510 printf("\n"); 511 printf("\t[-q io depth]\n"); 512 printf("\t[-s io size in bytes]\n"); 513 printf("\t[-w io pattern type, must be one of\n"); 514 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); 515 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); 516 printf("\t[-l enable latency tracking, default: disabled]\n"); 517 printf("\t\t(0 - disabled; 1 - enabled)\n"); 518 printf("\t[-t time in seconds]\n"); 519 printf("\t[-c core mask for I/O submission/completion.]\n"); 520 printf("\t\t(default: 0xf - 4 cores)]\n"); 521 printf("\t[-m max completions per poll]\n"); 522 printf("\t\t(default: 0 - unlimited)\n"); 523 printf("\t[-a arbitration mechanism, must be one of below]\n"); 524 printf("\t\t(0, 1, 2)]\n"); 525 printf("\t\t(0: default round robin mechanism)]\n"); 526 printf("\t\t(1: weighted round robin mechanism)]\n"); 527 printf("\t\t(2: vendor specific mechanism)]\n"); 528 printf("\t[-b enable arbitration user configuration, default: disabled]\n"); 529 printf("\t\t(0 - disabled; 1 - enabled)\n"); 530 printf("\t[-n subjected IOs for performance comparison]\n"); 531 printf("\t[-i shared memory group ID]\n"); 532 } 533 534 static const char * 535 print_qprio(enum spdk_nvme_qprio qprio) 536 { 537 switch (qprio) { 538 case SPDK_NVME_QPRIO_URGENT: 539 return "urgent priority queue"; 540 case SPDK_NVME_QPRIO_HIGH: 541 return "high priority queue"; 542 case SPDK_NVME_QPRIO_MEDIUM: 543 return "medium priority queue"; 544 case SPDK_NVME_QPRIO_LOW: 545 return "low priority queue"; 546 default: 547 return "invalid priority queue"; 548 } 549 } 550 551 552 static void 553 print_configuration(char *program_name) 554 { 555 printf("%s run with configuration:\n", program_name); 556 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", 557 program_name, 558 g_arbitration.queue_depth, 559 g_arbitration.io_size_bytes, 560 g_arbitration.workload_type, 561 g_arbitration.rw_percentage, 562 g_arbitration.latency_tracking_enable, 563 g_arbitration.time_in_sec, 564 g_arbitration.core_mask, 565 g_arbitration.max_completions, 566 g_arbitration.arbitration_mechanism, 567 g_arbitration.arbitration_config, 568 g_arbitration.io_count, 569 g_arbitration.shm_id); 570 } 571 572 573 static void 574 print_performance(void) 575 { 576 float io_per_second, sent_all_io_in_secs; 577 struct worker_thread *worker; 578 struct ns_worker_ctx *ns_ctx; 579 580 worker = g_workers; 581 while (worker) { 582 ns_ctx = worker->ns_ctx; 583 while (ns_ctx) { 584 io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; 585 sent_all_io_in_secs = g_arbitration.io_count / io_per_second; 586 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", 587 ns_ctx->entry->name, worker->lcore, 588 io_per_second, sent_all_io_in_secs, g_arbitration.io_count); 589 ns_ctx = ns_ctx->next; 590 } 591 worker = worker->next; 592 } 593 printf("========================================================\n"); 594 595 printf("\n"); 596 } 597 598 static void 599 print_latency_page(struct ctrlr_entry *entry) 600 { 601 int i; 602 603 printf("\n"); 604 printf("%s\n", entry->name); 605 printf("--------------------------------------------------------\n"); 606 607 for (i = 0; i < 32; i++) { 608 if (entry->latency_page.buckets_32us[i]) 609 printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, 610 entry->latency_page.buckets_32us[i]); 611 } 612 for (i = 0; i < 31; i++) { 613 if (entry->latency_page.buckets_1ms[i]) 614 printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, 615 entry->latency_page.buckets_1ms[i]); 616 } 617 for (i = 0; i < 31; i++) { 618 if (entry->latency_page.buckets_32ms[i]) 619 printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, 620 entry->latency_page.buckets_32ms[i]); 621 } 622 } 623 624 static void 625 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) 626 { 627 struct ctrlr_entry *ctrlr; 628 629 printf("%s Latency Statistics:\n", op_name); 630 printf("========================================================\n"); 631 ctrlr = g_controllers; 632 while (ctrlr) { 633 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 634 if (spdk_nvme_ctrlr_cmd_get_log_page( 635 ctrlr->ctrlr, log_page, 636 SPDK_NVME_GLOBAL_NS_TAG, 637 &ctrlr->latency_page, 638 sizeof(struct spdk_nvme_intel_rw_latency_page), 639 0, 640 enable_latency_tracking_complete, 641 NULL)) { 642 printf("nvme_ctrlr_cmd_get_log_page() failed\n"); 643 exit(1); 644 } 645 646 g_arbitration.outstanding_commands++; 647 } else { 648 printf("Controller %s: %s latency statistics not supported\n", 649 ctrlr->name, op_name); 650 } 651 ctrlr = ctrlr->next; 652 } 653 654 while (g_arbitration.outstanding_commands) { 655 ctrlr = g_controllers; 656 while (ctrlr) { 657 spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); 658 ctrlr = ctrlr->next; 659 } 660 } 661 662 ctrlr = g_controllers; 663 while (ctrlr) { 664 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { 665 print_latency_page(ctrlr); 666 } 667 ctrlr = ctrlr->next; 668 } 669 printf("\n"); 670 } 671 672 static void 673 print_stats(void) 674 { 675 print_performance(); 676 if (g_arbitration.latency_tracking_enable) { 677 if (g_arbitration.rw_percentage != 0) { 678 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); 679 } 680 if (g_arbitration.rw_percentage != 100) { 681 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); 682 } 683 } 684 } 685 686 static int 687 parse_args(int argc, char **argv) 688 { 689 const char *workload_type = NULL; 690 int op = 0; 691 bool mix_specified = false; 692 long int val; 693 694 while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) { 695 switch (op) { 696 case 'c': 697 g_arbitration.core_mask = optarg; 698 break; 699 case 'w': 700 g_arbitration.workload_type = optarg; 701 break; 702 case 'h': 703 case '?': 704 usage(argv[0]); 705 return 1; 706 default: 707 val = spdk_strtol(optarg, 10); 708 if (val < 0) { 709 fprintf(stderr, "Converting a string to integer failed\n"); 710 return val; 711 } 712 switch (op) { 713 case 'i': 714 g_arbitration.shm_id = val; 715 break; 716 case 'l': 717 g_arbitration.latency_tracking_enable = val; 718 break; 719 case 'm': 720 g_arbitration.max_completions = val; 721 break; 722 case 'q': 723 g_arbitration.queue_depth = val; 724 break; 725 case 's': 726 g_arbitration.io_size_bytes = val; 727 break; 728 case 't': 729 g_arbitration.time_in_sec = val; 730 break; 731 case 'M': 732 g_arbitration.rw_percentage = val; 733 mix_specified = true; 734 break; 735 case 'a': 736 g_arbitration.arbitration_mechanism = val; 737 break; 738 case 'b': 739 g_arbitration.arbitration_config = val; 740 break; 741 case 'n': 742 g_arbitration.io_count = val; 743 break; 744 default: 745 usage(argv[0]); 746 return -EINVAL; 747 } 748 } 749 } 750 751 workload_type = g_arbitration.workload_type; 752 753 if (strcmp(workload_type, "read") && 754 strcmp(workload_type, "write") && 755 strcmp(workload_type, "randread") && 756 strcmp(workload_type, "randwrite") && 757 strcmp(workload_type, "rw") && 758 strcmp(workload_type, "randrw")) { 759 fprintf(stderr, 760 "io pattern type must be one of\n" 761 "(read, write, randread, randwrite, rw, randrw)\n"); 762 return 1; 763 } 764 765 if (!strcmp(workload_type, "read") || 766 !strcmp(workload_type, "randread")) { 767 g_arbitration.rw_percentage = 100; 768 } 769 770 if (!strcmp(workload_type, "write") || 771 !strcmp(workload_type, "randwrite")) { 772 g_arbitration.rw_percentage = 0; 773 } 774 775 if (!strcmp(workload_type, "read") || 776 !strcmp(workload_type, "randread") || 777 !strcmp(workload_type, "write") || 778 !strcmp(workload_type, "randwrite")) { 779 if (mix_specified) { 780 fprintf(stderr, "Ignoring -M option... Please use -M option" 781 " only when using rw or randrw.\n"); 782 } 783 } 784 785 if (!strcmp(workload_type, "rw") || 786 !strcmp(workload_type, "randrw")) { 787 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { 788 fprintf(stderr, 789 "-M must be specified to value from 0 to 100 " 790 "for rw or randrw.\n"); 791 return 1; 792 } 793 } 794 795 if (!strcmp(workload_type, "read") || 796 !strcmp(workload_type, "write") || 797 !strcmp(workload_type, "rw")) { 798 g_arbitration.is_random = 0; 799 } else { 800 g_arbitration.is_random = 1; 801 } 802 803 if (g_arbitration.latency_tracking_enable != 0 && 804 g_arbitration.latency_tracking_enable != 1) { 805 fprintf(stderr, 806 "-l must be specified to value 0 or 1.\n"); 807 return 1; 808 } 809 810 switch (g_arbitration.arbitration_mechanism) { 811 case SPDK_NVME_CC_AMS_RR: 812 case SPDK_NVME_CC_AMS_WRR: 813 case SPDK_NVME_CC_AMS_VS: 814 break; 815 default: 816 fprintf(stderr, 817 "-a must be specified to value 0, 1, or 7.\n"); 818 return 1; 819 } 820 821 if (g_arbitration.arbitration_config != 0 && 822 g_arbitration.arbitration_config != 1) { 823 fprintf(stderr, 824 "-b must be specified to value 0 or 1.\n"); 825 return 1; 826 } else if (g_arbitration.arbitration_config == 1 && 827 g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { 828 fprintf(stderr, 829 "-a must be specified to 1 (WRR) together.\n"); 830 return 1; 831 } 832 833 return 0; 834 } 835 836 static int 837 register_workers(void) 838 { 839 uint32_t i; 840 struct worker_thread *worker; 841 enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; 842 843 g_workers = NULL; 844 g_arbitration.num_workers = 0; 845 846 SPDK_ENV_FOREACH_CORE(i) { 847 worker = calloc(1, sizeof(*worker)); 848 if (worker == NULL) { 849 fprintf(stderr, "Unable to allocate worker\n"); 850 return -1; 851 } 852 853 worker->lcore = i; 854 worker->next = g_workers; 855 g_workers = worker; 856 g_arbitration.num_workers++; 857 858 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { 859 qprio++; 860 } 861 862 worker->qprio = qprio % SPDK_NVME_QPRIO_MAX; 863 } 864 865 return 0; 866 } 867 868 static bool 869 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 870 struct spdk_nvme_ctrlr_opts *opts) 871 { 872 /* Update with user specified arbitration configuration */ 873 opts->arb_mechanism = g_arbitration.arbitration_mechanism; 874 875 printf("Attaching to %s\n", trid->traddr); 876 877 return true; 878 } 879 880 static void 881 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 882 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 883 { 884 printf("Attached to %s\n", trid->traddr); 885 886 /* Update with actual arbitration configuration in use */ 887 g_arbitration.arbitration_mechanism = opts->arb_mechanism; 888 889 register_ctrlr(ctrlr); 890 } 891 892 static int 893 register_controllers(void) 894 { 895 printf("Initializing NVMe Controllers\n"); 896 897 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { 898 fprintf(stderr, "spdk_nvme_probe() failed\n"); 899 return 1; 900 } 901 902 if (g_arbitration.num_namespaces == 0) { 903 fprintf(stderr, "No valid namespaces to continue IO testing\n"); 904 return 1; 905 } 906 907 return 0; 908 } 909 910 static void 911 unregister_controllers(void) 912 { 913 struct ctrlr_entry *entry = g_controllers; 914 915 while (entry) { 916 struct ctrlr_entry *next = entry->next; 917 if (g_arbitration.latency_tracking_enable && 918 spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { 919 set_latency_tracking_feature(entry->ctrlr, false); 920 } 921 spdk_nvme_detach(entry->ctrlr); 922 free(entry); 923 entry = next; 924 } 925 } 926 927 static int 928 associate_workers_with_ns(void) 929 { 930 struct ns_entry *entry = g_namespaces; 931 struct worker_thread *worker = g_workers; 932 struct ns_worker_ctx *ns_ctx; 933 int i, count; 934 935 count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 936 g_arbitration.num_namespaces : g_arbitration.num_workers; 937 938 for (i = 0; i < count; i++) { 939 if (entry == NULL) { 940 break; 941 } 942 943 ns_ctx = malloc(sizeof(struct ns_worker_ctx)); 944 if (!ns_ctx) { 945 return 1; 946 } 947 memset(ns_ctx, 0, sizeof(*ns_ctx)); 948 949 printf("Associating %s with lcore %d\n", entry->name, worker->lcore); 950 ns_ctx->entry = entry; 951 ns_ctx->next = worker->ns_ctx; 952 worker->ns_ctx = ns_ctx; 953 954 worker = worker->next; 955 if (worker == NULL) { 956 worker = g_workers; 957 } 958 959 entry = entry->next; 960 if (entry == NULL) { 961 entry = g_namespaces; 962 } 963 964 } 965 966 return 0; 967 } 968 969 static void 970 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 971 { 972 struct feature *feature = cb_arg; 973 int fid = feature - features; 974 975 if (spdk_nvme_cpl_is_error(cpl)) { 976 printf("get_feature(0x%02X) failed\n", fid); 977 } else { 978 feature->result = cpl->cdw0; 979 feature->valid = true; 980 } 981 982 g_arbitration.outstanding_commands--; 983 } 984 985 static int 986 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) 987 { 988 struct spdk_nvme_cmd cmd = {}; 989 990 cmd.opc = SPDK_NVME_OPC_GET_FEATURES; 991 cmd.cdw10 = fid; 992 993 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]); 994 } 995 996 static void 997 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 998 { 999 get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); 1000 1001 g_arbitration.outstanding_commands++; 1002 1003 while (g_arbitration.outstanding_commands) { 1004 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1005 } 1006 1007 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1008 uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; 1009 unsigned ab, lpw, mpw, hpw; 1010 1011 ab = arb & SPDK_NVME_ARB_BURST_MASK; 1012 lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; 1013 mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; 1014 hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; 1015 1016 printf("Current Arbitration Configuration\n"); 1017 printf("===========\n"); 1018 printf("Arbitration Burst: "); 1019 if (ab == SPDK_NVME_ARB_BURST_MASK) { 1020 printf("no limit\n"); 1021 } else { 1022 printf("%u\n", 1u << ab); 1023 } 1024 1025 printf("Low Priority Weight: %u\n", lpw); 1026 printf("Medium Priority Weight: %u\n", mpw); 1027 printf("High Priority Weight: %u\n", hpw); 1028 printf("\n"); 1029 } 1030 } 1031 1032 static void 1033 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) 1034 { 1035 struct feature *feature = cb_arg; 1036 int fid = feature - features; 1037 1038 if (spdk_nvme_cpl_is_error(cpl)) { 1039 printf("set_feature(0x%02X) failed\n", fid); 1040 feature->valid = false; 1041 } else { 1042 printf("Set Arbitration Feature Successfully\n"); 1043 } 1044 1045 g_arbitration.outstanding_commands--; 1046 } 1047 1048 static int 1049 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) 1050 { 1051 int ret; 1052 struct spdk_nvme_cmd cmd = {}; 1053 uint32_t arb = 0; 1054 unsigned ab, lpw, mpw, hpw; 1055 1056 cmd.opc = SPDK_NVME_OPC_SET_FEATURES; 1057 cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION; 1058 1059 g_arbitration.outstanding_commands = 0; 1060 1061 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1062 ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK; 1063 hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT; 1064 mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT; 1065 lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT; 1066 arb = hpw | mpw | lpw | ab; 1067 cmd.cdw11 = arb; 1068 } 1069 1070 ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, 1071 set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); 1072 if (ret) { 1073 printf("Set Arbitration Feature: Failed 0x%x\n", ret); 1074 return 1; 1075 } 1076 1077 g_arbitration.outstanding_commands++; 1078 1079 while (g_arbitration.outstanding_commands) { 1080 spdk_nvme_ctrlr_process_admin_completions(ctrlr); 1081 } 1082 1083 if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { 1084 printf("Set Arbitration Feature failed and use default configuration\n"); 1085 } 1086 1087 return 0; 1088 } 1089 1090 int 1091 main(int argc, char **argv) 1092 { 1093 int rc; 1094 struct worker_thread *worker, *master_worker; 1095 unsigned master_core; 1096 char task_pool_name[30]; 1097 uint32_t task_count; 1098 struct spdk_env_opts opts; 1099 1100 rc = parse_args(argc, argv); 1101 if (rc != 0) { 1102 return rc; 1103 } 1104 1105 spdk_env_opts_init(&opts); 1106 opts.name = "arb"; 1107 opts.core_mask = g_arbitration.core_mask; 1108 opts.shm_id = g_arbitration.shm_id; 1109 if (spdk_env_init(&opts) < 0) { 1110 return 1; 1111 } 1112 1113 g_arbitration.tsc_rate = spdk_get_ticks_hz(); 1114 1115 if (register_workers() != 0) { 1116 return 1; 1117 } 1118 1119 if (register_controllers() != 0) { 1120 return 1; 1121 } 1122 1123 if (associate_workers_with_ns() != 0) { 1124 return 1; 1125 } 1126 1127 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); 1128 1129 /* 1130 * The task_count will be dynamically calculated based on the 1131 * number of attached active namespaces, queue depth and number 1132 * of cores (workers) involved in the IO perations. 1133 */ 1134 task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? 1135 g_arbitration.num_namespaces : g_arbitration.num_workers; 1136 task_count *= g_arbitration.queue_depth; 1137 1138 task_pool = spdk_mempool_create(task_pool_name, task_count, 1139 sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY); 1140 if (task_pool == NULL) { 1141 fprintf(stderr, "could not initialize task pool\n"); 1142 return 1; 1143 } 1144 1145 print_configuration(argv[0]); 1146 1147 printf("Initialization complete. Launching workers.\n"); 1148 1149 /* Launch all of the slave workers */ 1150 master_core = spdk_env_get_current_core(); 1151 master_worker = NULL; 1152 worker = g_workers; 1153 while (worker != NULL) { 1154 if (worker->lcore != master_core) { 1155 spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); 1156 } else { 1157 assert(master_worker == NULL); 1158 master_worker = worker; 1159 } 1160 worker = worker->next; 1161 } 1162 1163 assert(master_worker != NULL); 1164 rc = work_fn(master_worker); 1165 1166 spdk_env_thread_wait_all(); 1167 1168 print_stats(); 1169 1170 unregister_controllers(); 1171 1172 cleanup(task_count); 1173 1174 if (rc != 0) { 1175 fprintf(stderr, "%s: errors occured\n", argv[0]); 1176 } 1177 1178 return rc; 1179 } 1180