1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2021 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/idxd.h" 7 #include "spdk/stdinc.h" 8 #include "spdk/env.h" 9 #include "spdk/event.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/crc32.h" 13 #include "spdk/util.h" 14 15 enum idxd_capability { 16 IDXD_COPY = 1, 17 IDXD_FILL, 18 IDXD_DUALCAST, 19 IDXD_COMPARE, 20 IDXD_CRC32C, 21 IDXD_DIF, 22 IDXD_COPY_CRC32C, 23 }; 24 25 #define DATA_PATTERN 0x5a 26 #define ALIGN_4K 0x1000 27 28 static int g_xfer_size_bytes = 4096; 29 30 /* g_allocate_depth indicates how many tasks we allocate per work_chan. It will 31 * be at least as much as the queue depth. 32 */ 33 static int g_queue_depth = 32; 34 static int g_idxd_max_per_core = 1; 35 static char *g_core_mask = "0x1"; 36 static bool g_idxd_kernel_mode = false; 37 static int g_allocate_depth = 0; 38 static int g_time_in_sec = 5; 39 static uint32_t g_crc32c_seed = 0; 40 static uint32_t g_crc32c_chained_count = 1; 41 static int g_fail_percent_goal = 0; 42 static uint8_t g_fill_pattern = 255; 43 static bool g_verify = false; 44 static const char *g_workload_type = NULL; 45 static enum idxd_capability g_workload_selection; 46 static struct worker_thread *g_workers = NULL; 47 static int g_num_workers = 0; 48 49 struct worker_thread; 50 struct idxd_chan_entry; 51 static void idxd_done(void *ref, int status); 52 53 struct idxd_device { 54 struct spdk_idxd_device *idxd; 55 TAILQ_ENTRY(idxd_device) tailq; 56 }; 57 static uint32_t g_num_devices = 0; 58 59 static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices); 60 static struct idxd_device *g_next_device; 61 62 struct idxd_task { 63 void *src; 64 struct iovec *iovs; 65 uint32_t iov_cnt; 66 void *dst; 67 void *dst2; 68 uint32_t crc_dst; 69 struct idxd_chan_entry *worker_chan; 70 int status; 71 int expected_status; /* used for the compare operation */ 72 TAILQ_ENTRY(idxd_task) link; 73 }; 74 75 struct idxd_chan_entry { 76 int idxd_chan_id; 77 struct spdk_idxd_io_channel *ch; 78 uint64_t xfer_completed; 79 uint64_t xfer_failed; 80 uint64_t injected_miscompares; 81 uint64_t current_queue_depth; 82 TAILQ_HEAD(, idxd_task) tasks_pool_head; 83 TAILQ_HEAD(, idxd_task) resubmits; 84 unsigned core; 85 bool is_draining; 86 void *task_base; 87 struct idxd_chan_entry *next; 88 }; 89 90 struct worker_thread { 91 struct idxd_chan_entry *ctx; 92 struct worker_thread *next; 93 int chan_num; 94 unsigned core; 95 }; 96 97 static void 98 dump_user_config(void) 99 { 100 printf("SPDK Configuration:\n"); 101 printf("Core mask: %s\n\n", g_core_mask); 102 printf("Idxd Perf Configuration:\n"); 103 printf("Workload Type: %s\n", g_workload_type); 104 if (g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) { 105 printf("CRC-32C seed: %u\n", g_crc32c_seed); 106 printf("vector count %u\n", g_crc32c_chained_count); 107 } else if (g_workload_selection == IDXD_FILL) { 108 printf("Fill pattern: 0x%x\n", g_fill_pattern); 109 } else if ((g_workload_selection == IDXD_COMPARE) && g_fail_percent_goal > 0) { 110 printf("Failure inject: %u percent\n", g_fail_percent_goal); 111 } 112 if (g_workload_selection == IDXD_COPY_CRC32C) { 113 printf("Vector size: %u bytes\n", g_xfer_size_bytes); 114 printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count); 115 } else { 116 printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 117 } 118 printf("Queue depth: %u\n", g_queue_depth); 119 printf("Allocated depth: %u\n", g_allocate_depth); 120 printf("Run time: %u seconds\n", g_time_in_sec); 121 printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 122 } 123 124 static void 125 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd) 126 { 127 struct idxd_device *dev; 128 129 dev = calloc(1, sizeof(*dev)); 130 if (dev == NULL) { 131 fprintf(stderr, "Failed to allocate device struct\n"); 132 return; 133 } 134 135 dev->idxd = idxd; 136 137 TAILQ_INSERT_TAIL(&g_idxd_devices, dev, tailq); 138 g_num_devices++; 139 } 140 141 static bool 142 probe_cb(void *cb_ctx, struct spdk_pci_device *dev) 143 { 144 /* this tool will gladly claim all types of IDXD devices. */ 145 return true; 146 } 147 148 static int 149 idxd_init(void) 150 { 151 spdk_idxd_set_config(g_idxd_kernel_mode); 152 153 if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) { 154 fprintf(stderr, "spdk_idxd_probe() failed\n"); 155 return 1; 156 } 157 158 return 0; 159 } 160 161 static void 162 idxd_exit(void) 163 { 164 struct idxd_device *dev; 165 166 while (!TAILQ_EMPTY(&g_idxd_devices)) { 167 dev = TAILQ_FIRST(&g_idxd_devices); 168 TAILQ_REMOVE(&g_idxd_devices, dev, tailq); 169 if (dev->idxd) { 170 spdk_idxd_detach(dev->idxd); 171 } 172 free(dev); 173 } 174 } 175 176 static void 177 usage(void) 178 { 179 printf("idxd_perf options:\n"); 180 printf("\t[-h help message]\n"); 181 printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 182 printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n"); 183 printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 184 printf("\t[-k use kernel idxd driver]\n"); 185 printf("\t[-m core mask for distributing I/O submission/completion work]\n"); 186 printf("\t[-o transfer size in bytes]\n"); 187 printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 188 printf("\t[-q queue depth per core]\n"); 189 printf("\t[-r max idxd devices per core can drive (default 1)]\n"); 190 printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 191 printf("\t[-t time in seconds]\n"); 192 printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); 193 printf("\t[-y verify result if this switch is on]\n"); 194 printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 195 } 196 197 static int 198 parse_args(int argc, char **argv) 199 { 200 int argval = 0; 201 int op; 202 203 while ((op = getopt(argc, argv, "a:C:f:hkm:o:P:q:r:t:yw:")) != -1) { 204 switch (op) { 205 case 'a': 206 case 'C': 207 case 'f': 208 case 'o': 209 case 'P': 210 case 'q': 211 case 'r': 212 case 's': 213 case 't': 214 argval = spdk_strtol(optarg, 10); 215 if (argval < 0) { 216 fprintf(stderr, "-%c option must be non-negative.\n", argc); 217 usage(); 218 return 1; 219 } 220 break; 221 default: 222 break; 223 }; 224 225 switch (op) { 226 case 'a': 227 g_allocate_depth = argval; 228 break; 229 case 'C': 230 g_crc32c_chained_count = argval; 231 break; 232 case 'f': 233 g_fill_pattern = (uint8_t)argval; 234 break; 235 case 'k': 236 g_idxd_kernel_mode = true; 237 break; 238 case 'm': 239 g_core_mask = optarg; 240 break; 241 case 'o': 242 g_xfer_size_bytes = argval; 243 break; 244 case 'P': 245 g_fail_percent_goal = argval; 246 break; 247 case 'q': 248 g_queue_depth = argval; 249 break; 250 case 'r': 251 g_idxd_max_per_core = argval; 252 break; 253 case 's': 254 g_crc32c_seed = argval; 255 break; 256 case 't': 257 g_time_in_sec = argval; 258 break; 259 case 'y': 260 g_verify = true; 261 break; 262 case 'w': 263 g_workload_type = optarg; 264 if (!strcmp(g_workload_type, "copy")) { 265 g_workload_selection = IDXD_COPY; 266 } else if (!strcmp(g_workload_type, "fill")) { 267 g_workload_selection = IDXD_FILL; 268 } else if (!strcmp(g_workload_type, "crc32c")) { 269 g_workload_selection = IDXD_CRC32C; 270 } else if (!strcmp(g_workload_type, "copy_crc32c")) { 271 g_workload_selection = IDXD_COPY_CRC32C; 272 } else if (!strcmp(g_workload_type, "compare")) { 273 g_workload_selection = IDXD_COMPARE; 274 } else if (!strcmp(g_workload_type, "dualcast")) { 275 g_workload_selection = IDXD_DUALCAST; 276 } 277 break; 278 case 'h': 279 usage(); 280 exit(0); 281 default: 282 usage(); 283 return 1; 284 } 285 } 286 287 return 0; 288 } 289 290 static int 291 register_workers(void) 292 { 293 uint32_t i; 294 struct worker_thread *worker; 295 296 g_workers = NULL; 297 g_num_workers = 0; 298 299 SPDK_ENV_FOREACH_CORE(i) { 300 worker = calloc(1, sizeof(*worker)); 301 if (worker == NULL) { 302 fprintf(stderr, "Unable to allocate worker\n"); 303 return 1; 304 } 305 306 worker->core = i; 307 worker->next = g_workers; 308 g_workers = worker; 309 g_num_workers++; 310 } 311 312 return 0; 313 } 314 315 static void 316 _free_task_buffers(struct idxd_task *task) 317 { 318 uint32_t i; 319 320 if (g_workload_selection == IDXD_CRC32C) { 321 if (task->iovs) { 322 for (i = 0; i < task->iov_cnt; i++) { 323 if (task->iovs[i].iov_base) { 324 spdk_dma_free(task->iovs[i].iov_base); 325 } 326 } 327 free(task->iovs); 328 } 329 } else { 330 spdk_dma_free(task->src); 331 } 332 333 spdk_dma_free(task->dst); 334 if (g_workload_selection == IDXD_DUALCAST) { 335 spdk_dma_free(task->dst2); 336 } 337 } 338 339 static inline void 340 _free_task_buffers_in_pool(struct idxd_chan_entry *t) 341 { 342 struct idxd_task *task; 343 344 assert(t); 345 while ((task = TAILQ_FIRST(&t->tasks_pool_head))) { 346 TAILQ_REMOVE(&t->tasks_pool_head, task, link); 347 _free_task_buffers(task); 348 } 349 } 350 351 static void 352 free_idxd_chan_entry_resource(struct idxd_chan_entry *entry) 353 { 354 assert(entry != NULL); 355 356 if (entry->ch) { 357 spdk_idxd_put_channel(entry->ch); 358 } 359 360 _free_task_buffers_in_pool(entry); 361 free(entry->task_base); 362 free(entry); 363 } 364 365 static void 366 unregister_workers(void) 367 { 368 struct worker_thread *worker = g_workers, *next_worker; 369 struct idxd_chan_entry *entry, *entry1; 370 371 /* Free worker thread */ 372 while (worker) { 373 next_worker = worker->next; 374 375 entry = worker->ctx; 376 while (entry) { 377 entry1 = entry->next; 378 free_idxd_chan_entry_resource(entry); 379 entry = entry1; 380 } 381 382 free(worker); 383 worker = next_worker; 384 g_num_workers--; 385 } 386 387 assert(g_num_workers == 0); 388 } 389 390 static int 391 _get_task_data_bufs(struct idxd_task *task) 392 { 393 uint32_t align = 0; 394 uint32_t i = 0; 395 int dst_buff_len = g_xfer_size_bytes; 396 397 /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 398 * we do this for all engines to keep it simple. 399 */ 400 if (g_workload_selection == IDXD_DUALCAST) { 401 align = ALIGN_4K; 402 } 403 404 if (g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) { 405 assert(g_crc32c_chained_count > 0); 406 task->iov_cnt = g_crc32c_chained_count; 407 task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); 408 if (!task->iovs) { 409 fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); 410 return -ENOMEM; 411 } 412 413 if (g_workload_selection == IDXD_COPY_CRC32C) { 414 dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count; 415 } 416 417 for (i = 0; i < task->iov_cnt; i++) { 418 task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 419 if (task->iovs[i].iov_base == NULL) { 420 return -ENOMEM; 421 } 422 memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 423 task->iovs[i].iov_len = g_xfer_size_bytes; 424 } 425 426 } else { 427 task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 428 if (task->src == NULL) { 429 fprintf(stderr, "Unable to alloc src buffer\n"); 430 return -ENOMEM; 431 } 432 433 /* For fill, set the entire src buffer so we can check if verify is enabled. */ 434 if (g_workload_selection == IDXD_FILL) { 435 memset(task->src, g_fill_pattern, g_xfer_size_bytes); 436 } else { 437 memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 438 } 439 } 440 441 if (g_workload_selection != IDXD_CRC32C) { 442 task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 443 if (task->dst == NULL) { 444 fprintf(stderr, "Unable to alloc dst buffer\n"); 445 return -ENOMEM; 446 } 447 448 /* For compare we want the buffers to match, otherwise not. */ 449 if (g_workload_selection == IDXD_COMPARE) { 450 memset(task->dst, DATA_PATTERN, dst_buff_len); 451 } else { 452 memset(task->dst, ~DATA_PATTERN, dst_buff_len); 453 } 454 } 455 456 if (g_workload_selection == IDXD_DUALCAST) { 457 task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 458 if (task->dst2 == NULL) { 459 fprintf(stderr, "Unable to alloc dst buffer\n"); 460 return -ENOMEM; 461 } 462 memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 463 } 464 465 return 0; 466 } 467 468 inline static struct idxd_task * 469 _get_task(struct idxd_chan_entry *t) 470 { 471 struct idxd_task *task; 472 473 if (!TAILQ_EMPTY(&t->tasks_pool_head)) { 474 task = TAILQ_FIRST(&t->tasks_pool_head); 475 TAILQ_REMOVE(&t->tasks_pool_head, task, link); 476 } else { 477 fprintf(stderr, "Unable to get idxd_task\n"); 478 return NULL; 479 } 480 481 return task; 482 } 483 484 static int idxd_chan_poll(struct idxd_chan_entry *chan); 485 486 static void 487 drain_io(struct idxd_chan_entry *t) 488 { 489 while (t->current_queue_depth > 0) { 490 idxd_chan_poll(t); 491 } 492 } 493 494 /* Submit one operation using the same idxd task that just completed. */ 495 static void 496 _submit_single(struct idxd_chan_entry *t, struct idxd_task *task) 497 { 498 int random_num; 499 int rc = 0; 500 struct iovec siov = {}; 501 struct iovec diov = {}; 502 int flags = 0; 503 504 assert(t); 505 506 t->current_queue_depth++; 507 508 if (!TAILQ_EMPTY(&t->resubmits)) { 509 rc = -EBUSY; 510 goto queue; 511 } 512 513 switch (g_workload_selection) { 514 case IDXD_COPY: 515 siov.iov_base = task->src; 516 siov.iov_len = g_xfer_size_bytes; 517 diov.iov_base = task->dst; 518 diov.iov_len = g_xfer_size_bytes; 519 rc = spdk_idxd_submit_copy(t->ch, &diov, 1, &siov, 1, flags, 520 idxd_done, task); 521 break; 522 case IDXD_FILL: 523 /* For fill use the first byte of the task->dst buffer */ 524 diov.iov_base = task->dst; 525 diov.iov_len = g_xfer_size_bytes; 526 rc = spdk_idxd_submit_fill(t->ch, &diov, 1, *(uint8_t *)task->src, 527 flags, idxd_done, task); 528 break; 529 case IDXD_CRC32C: 530 assert(task->iovs != NULL); 531 assert(task->iov_cnt > 0); 532 rc = spdk_idxd_submit_crc32c(t->ch, task->iovs, task->iov_cnt, 533 g_crc32c_seed, &task->crc_dst, 534 flags, idxd_done, task); 535 break; 536 case IDXD_COMPARE: 537 random_num = rand() % 100; 538 assert(task->dst != NULL); 539 if (random_num < g_fail_percent_goal) { 540 task->expected_status = -EILSEQ; 541 *(uint8_t *)task->dst = ~DATA_PATTERN; 542 } else { 543 task->expected_status = 0; 544 *(uint8_t *)task->dst = DATA_PATTERN; 545 } 546 siov.iov_base = task->src; 547 siov.iov_len = g_xfer_size_bytes; 548 diov.iov_base = task->dst; 549 diov.iov_len = g_xfer_size_bytes; 550 rc = spdk_idxd_submit_compare(t->ch, &siov, 1, &diov, 1, flags, idxd_done, task); 551 break; 552 case IDXD_DUALCAST: 553 rc = spdk_idxd_submit_dualcast(t->ch, task->dst, task->dst2, 554 task->src, g_xfer_size_bytes, flags, idxd_done, task); 555 break; 556 case IDXD_COPY_CRC32C: 557 diov.iov_base = task->dst; 558 diov.iov_len = g_xfer_size_bytes; 559 rc = spdk_idxd_submit_copy_crc32c(t->ch, &diov, 1, task->iovs, task->iov_cnt, g_crc32c_seed, 560 &task->crc_dst, 561 flags, idxd_done, task); 562 break; 563 default: 564 assert(false); 565 break; 566 } 567 568 queue: 569 if (rc) { 570 /* Queue the task to be resubmitted on the next poll. */ 571 if (rc != -EBUSY && rc != -EAGAIN) { 572 t->xfer_failed++; 573 } 574 575 TAILQ_INSERT_TAIL(&t->resubmits, task, link); 576 } 577 } 578 579 static int 580 _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) 581 { 582 uint32_t i; 583 uint32_t ttl_len = 0; 584 uint8_t *dst = (uint8_t *)_dst; 585 586 for (i = 0; i < iovcnt; i++) { 587 if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) { 588 return -1; 589 } 590 dst += src_iovs[i].iov_len; 591 ttl_len += src_iovs[i].iov_len; 592 } 593 594 if (ttl_len != iovcnt * g_xfer_size_bytes) { 595 return -1; 596 } 597 598 return 0; 599 } 600 601 static void 602 idxd_done(void *arg1, int status) 603 { 604 struct idxd_task *task = arg1; 605 struct idxd_chan_entry *chan = task->worker_chan; 606 uint32_t sw_crc32c; 607 608 assert(chan); 609 assert(chan->current_queue_depth > 0); 610 611 if (g_verify && status == 0) { 612 switch (g_workload_selection) { 613 case IDXD_COPY_CRC32C: 614 sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 615 if (task->crc_dst != sw_crc32c) { 616 SPDK_NOTICELOG("CRC-32C miscompare\n"); 617 chan->xfer_failed++; 618 } 619 if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) { 620 SPDK_NOTICELOG("Data miscompare\n"); 621 chan->xfer_failed++; 622 } 623 break; 624 case IDXD_CRC32C: 625 sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 626 if (task->crc_dst != sw_crc32c) { 627 SPDK_NOTICELOG("CRC-32C miscompare\n"); 628 chan->xfer_failed++; 629 } 630 break; 631 case IDXD_COPY: 632 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 633 SPDK_NOTICELOG("Data miscompare\n"); 634 chan->xfer_failed++; 635 } 636 break; 637 case IDXD_DUALCAST: 638 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 639 SPDK_NOTICELOG("Data miscompare, first destination\n"); 640 chan->xfer_failed++; 641 } 642 if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 643 SPDK_NOTICELOG("Data miscompare, second destination\n"); 644 chan->xfer_failed++; 645 } 646 break; 647 case IDXD_FILL: 648 if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 649 SPDK_NOTICELOG("Data miscompare\n"); 650 chan->xfer_failed++; 651 } 652 break; 653 case IDXD_COMPARE: 654 break; 655 default: 656 assert(false); 657 break; 658 } 659 } 660 661 if (task->expected_status == -EILSEQ) { 662 assert(status != 0); 663 chan->injected_miscompares++; 664 } else if (status) { 665 /* Expected to pass but the idxd module reported an error (ex: COMPARE operation). */ 666 chan->xfer_failed++; 667 } 668 669 chan->xfer_completed++; 670 chan->current_queue_depth--; 671 672 if (!chan->is_draining) { 673 _submit_single(chan, task); 674 } else { 675 TAILQ_INSERT_TAIL(&chan->tasks_pool_head, task, link); 676 } 677 } 678 679 static int 680 dump_result(void) 681 { 682 uint64_t total_completed = 0; 683 uint64_t total_failed = 0; 684 uint64_t total_miscompared = 0; 685 uint64_t total_xfer_per_sec, total_bw_in_MiBps; 686 struct worker_thread *worker = g_workers; 687 struct idxd_chan_entry *t; 688 689 printf("\nIDXD_ChanID Core Transfers Bandwidth Failed Miscompares\n"); 690 printf("------------------------------------------------------------------------\n"); 691 while (worker != NULL) { 692 t = worker->ctx; 693 while (t) { 694 uint64_t xfer_per_sec = t->xfer_completed / g_time_in_sec; 695 uint64_t bw_in_MiBps = (t->xfer_completed * g_xfer_size_bytes) / 696 (g_time_in_sec * 1024 * 1024); 697 698 total_completed += t->xfer_completed; 699 total_failed += t->xfer_failed; 700 total_miscompared += t->injected_miscompares; 701 702 if (xfer_per_sec) { 703 printf("%10d%5u%15" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 704 t->idxd_chan_id, worker->core, xfer_per_sec, bw_in_MiBps, t->xfer_failed, 705 t->injected_miscompares); 706 } 707 t = t->next; 708 } 709 710 worker = worker->next; 711 } 712 713 total_xfer_per_sec = total_completed / g_time_in_sec; 714 total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 715 (g_time_in_sec * 1024 * 1024); 716 717 printf("=========================================================================\n"); 718 printf("Total:%25" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 719 total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 720 721 return total_failed ? 1 : 0; 722 } 723 724 static int 725 submit_all(struct idxd_chan_entry *t) 726 { 727 int i; 728 int remaining = g_queue_depth; 729 struct idxd_task *task; 730 731 for (i = 0; i < remaining; i++) { 732 task = _get_task(t); 733 if (task == NULL) { 734 _free_task_buffers_in_pool(t); 735 return -1; 736 } 737 738 /* Submit as single task */ 739 _submit_single(t, task); 740 } 741 742 return 0; 743 } 744 745 static int 746 idxd_chan_poll(struct idxd_chan_entry *chan) 747 { 748 int rc; 749 struct idxd_task *task, *tmp; 750 TAILQ_HEAD(, idxd_task) swap; 751 752 rc = spdk_idxd_process_events(chan->ch); 753 if (rc < 0) { 754 return rc; 755 } 756 757 if (!TAILQ_EMPTY(&chan->resubmits)) { 758 TAILQ_INIT(&swap); 759 TAILQ_SWAP(&swap, &chan->resubmits, idxd_task, link); 760 TAILQ_FOREACH_SAFE(task, &swap, link, tmp) { 761 TAILQ_REMOVE(&swap, task, link); 762 chan->current_queue_depth--; 763 if (!chan->is_draining) { 764 _submit_single(chan, task); 765 } else { 766 TAILQ_INSERT_TAIL(&chan->tasks_pool_head, task, link); 767 } 768 } 769 } 770 771 return rc; 772 } 773 774 static int 775 work_fn(void *arg) 776 { 777 uint64_t tsc_end; 778 struct worker_thread *worker = (struct worker_thread *)arg; 779 struct idxd_chan_entry *t = NULL; 780 781 printf("Starting thread on core %u\n", worker->core); 782 783 tsc_end = spdk_get_ticks() + g_time_in_sec * spdk_get_ticks_hz(); 784 785 t = worker->ctx; 786 while (t != NULL) { 787 if (submit_all(t) != 0) { 788 return -1; 789 } 790 t = t->next; 791 } 792 793 while (1) { 794 t = worker->ctx; 795 while (t != NULL) { 796 idxd_chan_poll(t); 797 t = t->next; 798 } 799 800 if (spdk_get_ticks() > tsc_end) { 801 break; 802 } 803 } 804 805 t = worker->ctx; 806 while (t != NULL) { 807 /* begin to drain io */ 808 t->is_draining = true; 809 drain_io(t); 810 t = t->next; 811 } 812 813 return 0; 814 } 815 816 static int 817 init_env(void) 818 { 819 struct spdk_env_opts opts; 820 821 spdk_env_opts_init(&opts); 822 opts.name = "idxd_perf"; 823 opts.core_mask = g_core_mask; 824 if (spdk_env_init(&opts) < 0) { 825 return 1; 826 } 827 828 return 0; 829 } 830 831 static struct spdk_idxd_device * 832 get_next_idxd(void) 833 { 834 struct spdk_idxd_device *idxd; 835 836 if (g_next_device == NULL) { 837 return NULL; 838 } 839 840 idxd = g_next_device->idxd; 841 842 g_next_device = TAILQ_NEXT(g_next_device, tailq); 843 844 return idxd; 845 } 846 847 static int 848 init_idxd_chan_entry(struct idxd_chan_entry *t, struct spdk_idxd_device *idxd) 849 { 850 int num_tasks = g_allocate_depth; 851 struct idxd_task *task; 852 int i; 853 854 assert(t != NULL); 855 856 TAILQ_INIT(&t->tasks_pool_head); 857 TAILQ_INIT(&t->resubmits); 858 t->ch = spdk_idxd_get_channel(idxd); 859 if (t->ch == NULL) { 860 fprintf(stderr, "Failed to get channel\n"); 861 goto err; 862 } 863 864 t->task_base = calloc(g_allocate_depth, sizeof(struct idxd_task)); 865 if (t->task_base == NULL) { 866 fprintf(stderr, "Could not allocate task base.\n"); 867 goto err; 868 } 869 870 task = t->task_base; 871 for (i = 0; i < num_tasks; i++) { 872 TAILQ_INSERT_TAIL(&t->tasks_pool_head, task, link); 873 task->worker_chan = t; 874 if (_get_task_data_bufs(task)) { 875 fprintf(stderr, "Unable to get data bufs\n"); 876 goto err; 877 } 878 task++; 879 } 880 881 return 0; 882 883 err: 884 free_idxd_chan_entry_resource(t); 885 return -1; 886 } 887 888 static int 889 associate_workers_with_idxd_device(void) 890 { 891 struct spdk_idxd_device *idxd = get_next_idxd(); 892 struct worker_thread *worker = g_workers; 893 int i = 0; 894 struct idxd_chan_entry *t; 895 896 while (idxd != NULL) { 897 if (worker->chan_num >= g_idxd_max_per_core) { 898 fprintf(stdout, "Notice: we cannot let single worker assign idxd devices\n" 899 "more than %d, you need use -r while starting app to change this value\n", 900 g_idxd_max_per_core); 901 break; 902 } 903 904 t = calloc(1, sizeof(struct idxd_chan_entry)); 905 if (!t) { 906 return -1; 907 } 908 909 t->idxd_chan_id = i; 910 911 if (init_idxd_chan_entry(t, idxd)) { 912 fprintf(stdout, "idxd device=%p is bound on core=%d\n", idxd, worker->core); 913 return -1; 914 915 } 916 fprintf(stdout, "idxd device=%p is bound on core=%d\n", idxd, worker->core); 917 918 t->next = worker->ctx; 919 worker->ctx = t; 920 worker->chan_num++; 921 922 worker = worker->next; 923 if (worker == NULL) { 924 worker = g_workers; 925 } 926 927 idxd = get_next_idxd(); 928 i++; 929 } 930 931 return 0; 932 } 933 934 int 935 main(int argc, char **argv) 936 { 937 int rc; 938 struct worker_thread *worker, *main_worker; 939 unsigned main_core; 940 941 if (parse_args(argc, argv) != 0) { 942 return -1; 943 } 944 945 if (init_env() != 0) { 946 return -1; 947 } 948 949 if (register_workers() != 0) { 950 rc = -1; 951 goto cleanup; 952 } 953 954 if (idxd_init() != 0) { 955 rc = -1; 956 goto cleanup; 957 } 958 959 if (g_num_devices == 0) { 960 printf("No idxd device found\n"); 961 rc = -1; 962 goto cleanup; 963 } 964 965 if ((g_workload_selection != IDXD_COPY) && 966 (g_workload_selection != IDXD_FILL) && 967 (g_workload_selection != IDXD_CRC32C) && 968 (g_workload_selection != IDXD_COPY_CRC32C) && 969 (g_workload_selection != IDXD_COMPARE) && 970 (g_workload_selection != IDXD_DUALCAST)) { 971 usage(); 972 rc = -1; 973 goto cleanup; 974 } 975 976 if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 977 fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 978 usage(); 979 rc = -1; 980 goto cleanup; 981 } 982 983 if (g_allocate_depth == 0) { 984 g_allocate_depth = g_queue_depth; 985 } 986 987 if ((g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) && 988 g_crc32c_chained_count == 0) { 989 usage(); 990 rc = -1; 991 goto cleanup; 992 } 993 994 g_next_device = TAILQ_FIRST(&g_idxd_devices); 995 if (associate_workers_with_idxd_device() != 0) { 996 rc = -1; 997 goto cleanup; 998 } 999 1000 dump_user_config(); 1001 /* Launch all of the secondary workers */ 1002 main_core = spdk_env_get_current_core(); 1003 main_worker = NULL; 1004 worker = g_workers; 1005 while (worker != NULL) { 1006 if (worker->core != main_core) { 1007 spdk_env_thread_launch_pinned(worker->core, work_fn, worker); 1008 } else { 1009 assert(main_worker == NULL); 1010 main_worker = worker; 1011 } 1012 worker = worker->next; 1013 } 1014 1015 assert(main_worker != NULL); 1016 rc = work_fn(main_worker); 1017 if (rc != 0) { 1018 goto cleanup; 1019 } 1020 1021 spdk_env_thread_wait_all(); 1022 1023 rc = dump_result(); 1024 cleanup: 1025 unregister_workers(); 1026 idxd_exit(); 1027 1028 spdk_env_fini(); 1029 return rc; 1030 } 1031