1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/idxd.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/env.h" 37 #include "spdk/event.h" 38 #include "spdk/log.h" 39 #include "spdk/string.h" 40 #include "spdk/crc32.h" 41 #include "spdk/util.h" 42 43 enum idxd_capability { 44 IDXD_COPY = 1, 45 IDXD_FILL, 46 IDXD_DUALCAST, 47 IDXD_COMPARE, 48 IDXD_CRC32C, 49 IDXD_DIF, 50 IDXD_COPY_CRC32C, 51 }; 52 53 #define DATA_PATTERN 0x5a 54 #define ALIGN_4K 0x1000 55 56 static int g_xfer_size_bytes = 4096; 57 58 /* g_allocate_depth indicates how many tasks we allocate per work_chan. It will 59 * be at least as much as the queue depth. 60 */ 61 static int g_queue_depth = 32; 62 static int g_idxd_max_per_core = 1; 63 static char *g_core_mask = "0x1"; 64 static bool g_idxd_kernel_mode = false; 65 static int g_allocate_depth = 0; 66 static int g_time_in_sec = 5; 67 static uint32_t g_crc32c_seed = 0; 68 static uint32_t g_crc32c_chained_count = 1; 69 static int g_fail_percent_goal = 0; 70 static uint8_t g_fill_pattern = 255; 71 static bool g_verify = false; 72 static const char *g_workload_type = NULL; 73 static enum idxd_capability g_workload_selection; 74 static struct worker_thread *g_workers = NULL; 75 static int g_num_workers = 0; 76 77 struct worker_thread; 78 struct idxd_chan_entry; 79 static void idxd_done(void *ref, int status); 80 81 struct idxd_device { 82 struct spdk_idxd_device *idxd; 83 TAILQ_ENTRY(idxd_device) tailq; 84 }; 85 static uint32_t g_num_devices = 0; 86 87 static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices); 88 static struct idxd_device *g_next_device; 89 90 struct idxd_task { 91 void *src; 92 struct iovec *iovs; 93 uint32_t iov_cnt; 94 void *dst; 95 void *dst2; 96 uint32_t crc_dst; 97 struct idxd_chan_entry *worker_chan; 98 int status; 99 int expected_status; /* used for the compare operation */ 100 TAILQ_ENTRY(idxd_task) link; 101 }; 102 103 struct idxd_chan_entry { 104 int idxd_chan_id; 105 struct spdk_idxd_io_channel *ch; 106 uint64_t xfer_completed; 107 uint64_t xfer_failed; 108 uint64_t injected_miscompares; 109 uint64_t current_queue_depth; 110 TAILQ_HEAD(, idxd_task) tasks_pool_head; 111 TAILQ_HEAD(, idxd_task) resubmits; 112 unsigned core; 113 bool is_draining; 114 void *task_base; 115 struct idxd_chan_entry *next; 116 }; 117 118 struct worker_thread { 119 struct idxd_chan_entry *ctx; 120 struct worker_thread *next; 121 int chan_num; 122 unsigned core; 123 }; 124 125 static void 126 dump_user_config(void) 127 { 128 printf("SPDK Configuration:\n"); 129 printf("Core mask: %s\n\n", g_core_mask); 130 printf("Idxd Perf Configuration:\n"); 131 printf("Workload Type: %s\n", g_workload_type); 132 if (g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) { 133 printf("CRC-32C seed: %u\n", g_crc32c_seed); 134 printf("vector count %u\n", g_crc32c_chained_count); 135 } else if (g_workload_selection == IDXD_FILL) { 136 printf("Fill pattern: 0x%x\n", g_fill_pattern); 137 } else if ((g_workload_selection == IDXD_COMPARE) && g_fail_percent_goal > 0) { 138 printf("Failure inject: %u percent\n", g_fail_percent_goal); 139 } 140 if (g_workload_selection == IDXD_COPY_CRC32C) { 141 printf("Vector size: %u bytes\n", g_xfer_size_bytes); 142 printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count); 143 } else { 144 printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 145 } 146 printf("Queue depth: %u\n", g_queue_depth); 147 printf("Allocated depth: %u\n", g_allocate_depth); 148 printf("Run time: %u seconds\n", g_time_in_sec); 149 printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 150 } 151 152 static void 153 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd) 154 { 155 struct idxd_device *dev; 156 157 dev = calloc(1, sizeof(*dev)); 158 if (dev == NULL) { 159 fprintf(stderr, "Failed to allocate device struct\n"); 160 return; 161 } 162 163 dev->idxd = idxd; 164 165 TAILQ_INSERT_TAIL(&g_idxd_devices, dev, tailq); 166 g_num_devices++; 167 } 168 169 static int 170 idxd_init(void) 171 { 172 spdk_idxd_set_config(g_idxd_kernel_mode); 173 174 if (spdk_idxd_probe(NULL, attach_cb) != 0) { 175 fprintf(stderr, "idxd_probe() failed\n"); 176 return 1; 177 } 178 179 return 0; 180 } 181 182 static void 183 idxd_exit(void) 184 { 185 struct idxd_device *dev; 186 187 while (!TAILQ_EMPTY(&g_idxd_devices)) { 188 dev = TAILQ_FIRST(&g_idxd_devices); 189 TAILQ_REMOVE(&g_idxd_devices, dev, tailq); 190 if (dev->idxd) { 191 spdk_idxd_detach(dev->idxd); 192 } 193 free(dev); 194 } 195 } 196 197 static void 198 usage(void) 199 { 200 printf("idxd_perf options:\n"); 201 printf("\t[-h help message]\n"); 202 printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 203 printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n"); 204 printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 205 printf("\t[-k use kernel idxd driver]\n"); 206 printf("\t[-m core mask for distributing I/O submission/completion work]\n"); 207 printf("\t[-o transfer size in bytes]\n"); 208 printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 209 printf("\t[-q queue depth per core]\n"); 210 printf("\t[-r max idxd devices per core can drive (default 1)]\n"); 211 printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 212 printf("\t[-t time in seconds]\n"); 213 printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); 214 printf("\t[-y verify result if this switch is on]\n"); 215 printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 216 } 217 218 static int 219 parse_args(int argc, char **argv) 220 { 221 int argval = 0; 222 int op; 223 224 while ((op = getopt(argc, argv, "a:C:f:hkm:o:P:q:r:t:yw:")) != -1) { 225 switch (op) { 226 case 'a': 227 case 'C': 228 case 'f': 229 case 'o': 230 case 'P': 231 case 'q': 232 case 'r': 233 case 's': 234 case 't': 235 argval = spdk_strtol(optarg, 10); 236 if (argval < 0) { 237 fprintf(stderr, "-%c option must be non-negative.\n", argc); 238 usage(); 239 return 1; 240 } 241 break; 242 default: 243 break; 244 }; 245 246 switch (op) { 247 case 'a': 248 g_allocate_depth = argval; 249 break; 250 case 'C': 251 g_crc32c_chained_count = argval; 252 break; 253 case 'f': 254 g_fill_pattern = (uint8_t)argval; 255 break; 256 case 'k': 257 g_idxd_kernel_mode = true; 258 break; 259 case 'm': 260 g_core_mask = optarg; 261 break; 262 case 'o': 263 g_xfer_size_bytes = argval; 264 break; 265 case 'P': 266 g_fail_percent_goal = argval; 267 break; 268 case 'q': 269 g_queue_depth = argval; 270 break; 271 case 'r': 272 g_idxd_max_per_core = argval; 273 break; 274 case 's': 275 g_crc32c_seed = argval; 276 break; 277 case 't': 278 g_time_in_sec = argval; 279 break; 280 case 'y': 281 g_verify = true; 282 break; 283 case 'w': 284 g_workload_type = optarg; 285 if (!strcmp(g_workload_type, "copy")) { 286 g_workload_selection = IDXD_COPY; 287 } else if (!strcmp(g_workload_type, "fill")) { 288 g_workload_selection = IDXD_FILL; 289 } else if (!strcmp(g_workload_type, "crc32c")) { 290 g_workload_selection = IDXD_CRC32C; 291 } else if (!strcmp(g_workload_type, "copy_crc32c")) { 292 g_workload_selection = IDXD_COPY_CRC32C; 293 } else if (!strcmp(g_workload_type, "compare")) { 294 g_workload_selection = IDXD_COMPARE; 295 } else if (!strcmp(g_workload_type, "dualcast")) { 296 g_workload_selection = IDXD_DUALCAST; 297 } 298 break; 299 case 'h': 300 usage(); 301 exit(0); 302 default: 303 usage(); 304 return 1; 305 } 306 } 307 308 return 0; 309 } 310 311 static int 312 register_workers(void) 313 { 314 uint32_t i; 315 struct worker_thread *worker; 316 317 g_workers = NULL; 318 g_num_workers = 0; 319 320 SPDK_ENV_FOREACH_CORE(i) { 321 worker = calloc(1, sizeof(*worker)); 322 if (worker == NULL) { 323 fprintf(stderr, "Unable to allocate worker\n"); 324 return 1; 325 } 326 327 worker->core = i; 328 worker->next = g_workers; 329 g_workers = worker; 330 g_num_workers++; 331 } 332 333 return 0; 334 } 335 336 static void 337 _free_task_buffers(struct idxd_task *task) 338 { 339 uint32_t i; 340 341 if (g_workload_selection == IDXD_CRC32C) { 342 if (task->iovs) { 343 for (i = 0; i < task->iov_cnt; i++) { 344 if (task->iovs[i].iov_base) { 345 spdk_dma_free(task->iovs[i].iov_base); 346 } 347 } 348 free(task->iovs); 349 } 350 } else { 351 spdk_dma_free(task->src); 352 } 353 354 spdk_dma_free(task->dst); 355 if (g_workload_selection == IDXD_DUALCAST) { 356 spdk_dma_free(task->dst2); 357 } 358 } 359 360 static inline void 361 _free_task_buffers_in_pool(struct idxd_chan_entry *t) 362 { 363 struct idxd_task *task; 364 365 assert(t); 366 while ((task = TAILQ_FIRST(&t->tasks_pool_head))) { 367 TAILQ_REMOVE(&t->tasks_pool_head, task, link); 368 _free_task_buffers(task); 369 } 370 } 371 372 static void 373 free_idxd_chan_entry_resource(struct idxd_chan_entry *entry) 374 { 375 assert(entry != NULL); 376 377 if (entry->ch) { 378 spdk_idxd_put_channel(entry->ch); 379 } 380 381 _free_task_buffers_in_pool(entry); 382 free(entry->task_base); 383 free(entry); 384 } 385 386 static void 387 unregister_workers(void) 388 { 389 struct worker_thread *worker = g_workers, *next_worker; 390 struct idxd_chan_entry *entry, *entry1; 391 392 /* Free worker thread */ 393 while (worker) { 394 next_worker = worker->next; 395 396 entry = worker->ctx; 397 while (entry) { 398 entry1 = entry->next; 399 free_idxd_chan_entry_resource(entry); 400 entry = entry1; 401 } 402 403 free(worker); 404 worker = next_worker; 405 g_num_workers--; 406 } 407 408 assert(g_num_workers == 0); 409 } 410 411 static int 412 _get_task_data_bufs(struct idxd_task *task) 413 { 414 uint32_t align = 0; 415 uint32_t i = 0; 416 int dst_buff_len = g_xfer_size_bytes; 417 418 /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 419 * we do this for all engines to keep it simple. 420 */ 421 if (g_workload_selection == IDXD_DUALCAST) { 422 align = ALIGN_4K; 423 } 424 425 if (g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) { 426 assert(g_crc32c_chained_count > 0); 427 task->iov_cnt = g_crc32c_chained_count; 428 task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); 429 if (!task->iovs) { 430 fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); 431 return -ENOMEM; 432 } 433 434 if (g_workload_selection == IDXD_COPY_CRC32C) { 435 dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count; 436 } 437 438 for (i = 0; i < task->iov_cnt; i++) { 439 task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 440 if (task->iovs[i].iov_base == NULL) { 441 return -ENOMEM; 442 } 443 memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 444 task->iovs[i].iov_len = g_xfer_size_bytes; 445 } 446 447 } else { 448 task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 449 if (task->src == NULL) { 450 fprintf(stderr, "Unable to alloc src buffer\n"); 451 return -ENOMEM; 452 } 453 454 /* For fill, set the entire src buffer so we can check if verify is enabled. */ 455 if (g_workload_selection == IDXD_FILL) { 456 memset(task->src, g_fill_pattern, g_xfer_size_bytes); 457 } else { 458 memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 459 } 460 } 461 462 if (g_workload_selection != IDXD_CRC32C) { 463 task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 464 if (task->dst == NULL) { 465 fprintf(stderr, "Unable to alloc dst buffer\n"); 466 return -ENOMEM; 467 } 468 469 /* For compare we want the buffers to match, otherwise not. */ 470 if (g_workload_selection == IDXD_COMPARE) { 471 memset(task->dst, DATA_PATTERN, dst_buff_len); 472 } else { 473 memset(task->dst, ~DATA_PATTERN, dst_buff_len); 474 } 475 } 476 477 if (g_workload_selection == IDXD_DUALCAST) { 478 task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 479 if (task->dst2 == NULL) { 480 fprintf(stderr, "Unable to alloc dst buffer\n"); 481 return -ENOMEM; 482 } 483 memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 484 } 485 486 return 0; 487 } 488 489 inline static struct idxd_task * 490 _get_task(struct idxd_chan_entry *t) 491 { 492 struct idxd_task *task; 493 494 if (!TAILQ_EMPTY(&t->tasks_pool_head)) { 495 task = TAILQ_FIRST(&t->tasks_pool_head); 496 TAILQ_REMOVE(&t->tasks_pool_head, task, link); 497 } else { 498 fprintf(stderr, "Unable to get idxd_task\n"); 499 return NULL; 500 } 501 502 return task; 503 } 504 505 static int idxd_chan_poll(struct idxd_chan_entry *chan); 506 507 static void 508 drain_io(struct idxd_chan_entry *t) 509 { 510 while (t->current_queue_depth > 0) { 511 idxd_chan_poll(t); 512 } 513 } 514 515 /* Submit one operation using the same idxd task that just completed. */ 516 static void 517 _submit_single(struct idxd_chan_entry *t, struct idxd_task *task) 518 { 519 int random_num; 520 int rc = 0; 521 struct iovec siov = {}; 522 struct iovec diov = {}; 523 int flags = 0; 524 525 assert(t); 526 527 t->current_queue_depth++; 528 529 if (!TAILQ_EMPTY(&t->resubmits)) { 530 rc = -EBUSY; 531 goto queue; 532 } 533 534 switch (g_workload_selection) { 535 case IDXD_COPY: 536 siov.iov_base = task->src; 537 siov.iov_len = g_xfer_size_bytes; 538 diov.iov_base = task->dst; 539 diov.iov_len = g_xfer_size_bytes; 540 rc = spdk_idxd_submit_copy(t->ch, &diov, 1, &siov, 1, flags, 541 idxd_done, task); 542 break; 543 case IDXD_FILL: 544 /* For fill use the first byte of the task->dst buffer */ 545 diov.iov_base = task->dst; 546 diov.iov_len = g_xfer_size_bytes; 547 rc = spdk_idxd_submit_fill(t->ch, &diov, 1, *(uint8_t *)task->src, 548 flags, idxd_done, task); 549 break; 550 case IDXD_CRC32C: 551 assert(task->iovs != NULL); 552 assert(task->iov_cnt > 0); 553 rc = spdk_idxd_submit_crc32c(t->ch, task->iovs, task->iov_cnt, 554 g_crc32c_seed, &task->crc_dst, 555 flags, idxd_done, task); 556 break; 557 case IDXD_COMPARE: 558 random_num = rand() % 100; 559 assert(task->dst != NULL); 560 if (random_num < g_fail_percent_goal) { 561 task->expected_status = -EILSEQ; 562 *(uint8_t *)task->dst = ~DATA_PATTERN; 563 } else { 564 task->expected_status = 0; 565 *(uint8_t *)task->dst = DATA_PATTERN; 566 } 567 siov.iov_base = task->src; 568 siov.iov_len = g_xfer_size_bytes; 569 diov.iov_base = task->dst; 570 diov.iov_len = g_xfer_size_bytes; 571 rc = spdk_idxd_submit_compare(t->ch, &siov, 1, &diov, 1, flags, idxd_done, task); 572 break; 573 case IDXD_DUALCAST: 574 rc = spdk_idxd_submit_dualcast(t->ch, task->dst, task->dst2, 575 task->src, g_xfer_size_bytes, flags, idxd_done, task); 576 break; 577 case IDXD_COPY_CRC32C: 578 diov.iov_base = task->dst; 579 diov.iov_len = g_xfer_size_bytes; 580 rc = spdk_idxd_submit_copy_crc32c(t->ch, &diov, 1, task->iovs, task->iov_cnt, g_crc32c_seed, 581 &task->crc_dst, 582 flags, idxd_done, task); 583 break; 584 default: 585 assert(false); 586 break; 587 } 588 589 queue: 590 if (rc) { 591 /* Queue the task to be resubmitted on the next poll. */ 592 if (rc != -EBUSY && rc != -EAGAIN) { 593 t->xfer_failed++; 594 } 595 596 TAILQ_INSERT_TAIL(&t->resubmits, task, link); 597 } 598 } 599 600 static int 601 _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) 602 { 603 uint32_t i; 604 uint32_t ttl_len = 0; 605 uint8_t *dst = (uint8_t *)_dst; 606 607 for (i = 0; i < iovcnt; i++) { 608 if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) { 609 return -1; 610 } 611 dst += src_iovs[i].iov_len; 612 ttl_len += src_iovs[i].iov_len; 613 } 614 615 if (ttl_len != iovcnt * g_xfer_size_bytes) { 616 return -1; 617 } 618 619 return 0; 620 } 621 622 static void 623 idxd_done(void *arg1, int status) 624 { 625 struct idxd_task *task = arg1; 626 struct idxd_chan_entry *chan = task->worker_chan; 627 uint32_t sw_crc32c; 628 629 assert(chan); 630 assert(chan->current_queue_depth > 0); 631 632 if (g_verify && status == 0) { 633 switch (g_workload_selection) { 634 case IDXD_COPY_CRC32C: 635 sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 636 if (task->crc_dst != sw_crc32c) { 637 SPDK_NOTICELOG("CRC-32C miscompare\n"); 638 chan->xfer_failed++; 639 } 640 if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) { 641 SPDK_NOTICELOG("Data miscompare\n"); 642 chan->xfer_failed++; 643 } 644 break; 645 case IDXD_CRC32C: 646 sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 647 if (task->crc_dst != sw_crc32c) { 648 SPDK_NOTICELOG("CRC-32C miscompare\n"); 649 chan->xfer_failed++; 650 } 651 break; 652 case IDXD_COPY: 653 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 654 SPDK_NOTICELOG("Data miscompare\n"); 655 chan->xfer_failed++; 656 } 657 break; 658 case IDXD_DUALCAST: 659 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 660 SPDK_NOTICELOG("Data miscompare, first destination\n"); 661 chan->xfer_failed++; 662 } 663 if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 664 SPDK_NOTICELOG("Data miscompare, second destination\n"); 665 chan->xfer_failed++; 666 } 667 break; 668 case IDXD_FILL: 669 if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 670 SPDK_NOTICELOG("Data miscompare\n"); 671 chan->xfer_failed++; 672 } 673 break; 674 case IDXD_COMPARE: 675 break; 676 default: 677 assert(false); 678 break; 679 } 680 } 681 682 if (task->expected_status == -EILSEQ) { 683 assert(status != 0); 684 chan->injected_miscompares++; 685 } else if (status) { 686 /* Expected to pass but the idxd module reported an error (ex: COMPARE operation). */ 687 chan->xfer_failed++; 688 } 689 690 chan->xfer_completed++; 691 chan->current_queue_depth--; 692 693 if (!chan->is_draining) { 694 _submit_single(chan, task); 695 } else { 696 TAILQ_INSERT_TAIL(&chan->tasks_pool_head, task, link); 697 } 698 } 699 700 static int 701 dump_result(void) 702 { 703 uint64_t total_completed = 0; 704 uint64_t total_failed = 0; 705 uint64_t total_miscompared = 0; 706 uint64_t total_xfer_per_sec, total_bw_in_MiBps; 707 struct worker_thread *worker = g_workers; 708 struct idxd_chan_entry *t; 709 710 printf("\nIDXD_ChanID Core Transfers Bandwidth Failed Miscompares\n"); 711 printf("------------------------------------------------------------------------\n"); 712 while (worker != NULL) { 713 t = worker->ctx; 714 while (t) { 715 uint64_t xfer_per_sec = t->xfer_completed / g_time_in_sec; 716 uint64_t bw_in_MiBps = (t->xfer_completed * g_xfer_size_bytes) / 717 (g_time_in_sec * 1024 * 1024); 718 719 total_completed += t->xfer_completed; 720 total_failed += t->xfer_failed; 721 total_miscompared += t->injected_miscompares; 722 723 if (xfer_per_sec) { 724 printf("%10d%5u%15" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 725 t->idxd_chan_id, worker->core, xfer_per_sec, bw_in_MiBps, t->xfer_failed, 726 t->injected_miscompares); 727 } 728 t = t->next; 729 } 730 731 worker = worker->next; 732 } 733 734 total_xfer_per_sec = total_completed / g_time_in_sec; 735 total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 736 (g_time_in_sec * 1024 * 1024); 737 738 printf("=========================================================================\n"); 739 printf("Total:%25" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 740 total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 741 742 return total_failed ? 1 : 0; 743 } 744 745 static int 746 submit_all(struct idxd_chan_entry *t) 747 { 748 int i; 749 int remaining = g_queue_depth; 750 struct idxd_task *task; 751 752 for (i = 0; i < remaining; i++) { 753 task = _get_task(t); 754 if (task == NULL) { 755 _free_task_buffers_in_pool(t); 756 return -1; 757 } 758 759 /* Submit as single task */ 760 _submit_single(t, task); 761 } 762 763 return 0; 764 } 765 766 static int 767 idxd_chan_poll(struct idxd_chan_entry *chan) 768 { 769 int rc; 770 struct idxd_task *task, *tmp; 771 TAILQ_HEAD(, idxd_task) swap; 772 773 rc = spdk_idxd_process_events(chan->ch); 774 if (rc < 0) { 775 return rc; 776 } 777 778 if (!TAILQ_EMPTY(&chan->resubmits)) { 779 TAILQ_INIT(&swap); 780 TAILQ_SWAP(&swap, &chan->resubmits, idxd_task, link); 781 TAILQ_FOREACH_SAFE(task, &swap, link, tmp) { 782 TAILQ_REMOVE(&swap, task, link); 783 chan->current_queue_depth--; 784 if (!chan->is_draining) { 785 _submit_single(chan, task); 786 } else { 787 TAILQ_INSERT_TAIL(&chan->tasks_pool_head, task, link); 788 } 789 } 790 } 791 792 return rc; 793 } 794 795 static int 796 work_fn(void *arg) 797 { 798 uint64_t tsc_end; 799 struct worker_thread *worker = (struct worker_thread *)arg; 800 struct idxd_chan_entry *t = NULL; 801 802 printf("Starting thread on core %u\n", worker->core); 803 804 tsc_end = spdk_get_ticks() + g_time_in_sec * spdk_get_ticks_hz(); 805 806 t = worker->ctx; 807 while (t != NULL) { 808 if (submit_all(t) != 0) { 809 return -1; 810 } 811 t = t->next; 812 } 813 814 while (1) { 815 t = worker->ctx; 816 while (t != NULL) { 817 idxd_chan_poll(t); 818 t = t->next; 819 } 820 821 if (spdk_get_ticks() > tsc_end) { 822 break; 823 } 824 } 825 826 t = worker->ctx; 827 while (t != NULL) { 828 /* begin to drain io */ 829 t->is_draining = true; 830 drain_io(t); 831 t = t->next; 832 } 833 834 return 0; 835 } 836 837 static int 838 init_env(void) 839 { 840 struct spdk_env_opts opts; 841 842 spdk_env_opts_init(&opts); 843 opts.name = "idxd_perf"; 844 opts.core_mask = g_core_mask; 845 if (spdk_env_init(&opts) < 0) { 846 return 1; 847 } 848 849 return 0; 850 } 851 852 static struct spdk_idxd_device * 853 get_next_idxd(void) 854 { 855 struct spdk_idxd_device *idxd; 856 857 if (g_next_device == NULL) { 858 return NULL; 859 } 860 861 idxd = g_next_device->idxd; 862 863 g_next_device = TAILQ_NEXT(g_next_device, tailq); 864 865 return idxd; 866 } 867 868 static int 869 init_idxd_chan_entry(struct idxd_chan_entry *t, struct spdk_idxd_device *idxd) 870 { 871 int num_tasks = g_allocate_depth; 872 struct idxd_task *task; 873 int i; 874 875 assert(t != NULL); 876 877 TAILQ_INIT(&t->tasks_pool_head); 878 TAILQ_INIT(&t->resubmits); 879 t->ch = spdk_idxd_get_channel(idxd); 880 if (t->ch == NULL) { 881 fprintf(stderr, "Failed to get channel\n"); 882 goto err; 883 } 884 885 t->task_base = calloc(g_allocate_depth, sizeof(struct idxd_task)); 886 if (t->task_base == NULL) { 887 fprintf(stderr, "Could not allocate task base.\n"); 888 goto err; 889 } 890 891 task = t->task_base; 892 for (i = 0; i < num_tasks; i++) { 893 TAILQ_INSERT_TAIL(&t->tasks_pool_head, task, link); 894 task->worker_chan = t; 895 if (_get_task_data_bufs(task)) { 896 fprintf(stderr, "Unable to get data bufs\n"); 897 goto err; 898 } 899 task++; 900 } 901 902 return 0; 903 904 err: 905 free_idxd_chan_entry_resource(t); 906 return -1; 907 } 908 909 static int 910 associate_workers_with_idxd_device(void) 911 { 912 struct spdk_idxd_device *idxd = get_next_idxd(); 913 struct worker_thread *worker = g_workers; 914 int i = 0; 915 struct idxd_chan_entry *t; 916 917 while (idxd != NULL) { 918 if (worker->chan_num >= g_idxd_max_per_core) { 919 fprintf(stdout, "Notice: we cannot let single worker assign idxd devices\n" 920 "more than %d, you need use -r while starting app to change this value\n", 921 g_idxd_max_per_core); 922 break; 923 } 924 925 t = calloc(1, sizeof(struct idxd_chan_entry)); 926 if (!t) { 927 return -1; 928 } 929 930 t->idxd_chan_id = i; 931 932 if (init_idxd_chan_entry(t, idxd)) { 933 fprintf(stdout, "idxd device=%p is bound on core=%d\n", idxd, worker->core); 934 return -1; 935 936 } 937 fprintf(stdout, "idxd device=%p is bound on core=%d\n", idxd, worker->core); 938 939 t->next = worker->ctx; 940 worker->ctx = t; 941 worker->chan_num++; 942 943 worker = worker->next; 944 if (worker == NULL) { 945 worker = g_workers; 946 } 947 948 idxd = get_next_idxd(); 949 i++; 950 } 951 952 return 0; 953 } 954 955 int 956 main(int argc, char **argv) 957 { 958 int rc; 959 struct worker_thread *worker, *main_worker; 960 unsigned main_core; 961 962 if (parse_args(argc, argv) != 0) { 963 return -1; 964 } 965 966 if (init_env() != 0) { 967 return -1; 968 } 969 970 if (register_workers() != 0) { 971 rc = -1; 972 goto cleanup; 973 } 974 975 if (idxd_init() != 0) { 976 rc = -1; 977 goto cleanup; 978 } 979 980 if (g_num_devices == 0) { 981 printf("No idxd device found\n"); 982 rc = -1; 983 goto cleanup; 984 } 985 986 if ((g_workload_selection != IDXD_COPY) && 987 (g_workload_selection != IDXD_FILL) && 988 (g_workload_selection != IDXD_CRC32C) && 989 (g_workload_selection != IDXD_COPY_CRC32C) && 990 (g_workload_selection != IDXD_COMPARE) && 991 (g_workload_selection != IDXD_DUALCAST)) { 992 usage(); 993 rc = -1; 994 goto cleanup; 995 } 996 997 if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 998 fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 999 usage(); 1000 rc = -1; 1001 goto cleanup; 1002 } 1003 1004 if (g_allocate_depth == 0) { 1005 g_allocate_depth = g_queue_depth; 1006 } 1007 1008 if ((g_workload_selection == IDXD_CRC32C || g_workload_selection == IDXD_COPY_CRC32C) && 1009 g_crc32c_chained_count == 0) { 1010 usage(); 1011 rc = -1; 1012 goto cleanup; 1013 } 1014 1015 g_next_device = TAILQ_FIRST(&g_idxd_devices); 1016 if (associate_workers_with_idxd_device() != 0) { 1017 rc = -1; 1018 goto cleanup; 1019 } 1020 1021 dump_user_config(); 1022 /* Launch all of the secondary workers */ 1023 main_core = spdk_env_get_current_core(); 1024 main_worker = NULL; 1025 worker = g_workers; 1026 while (worker != NULL) { 1027 if (worker->core != main_core) { 1028 spdk_env_thread_launch_pinned(worker->core, work_fn, worker); 1029 } else { 1030 assert(main_worker == NULL); 1031 main_worker = worker; 1032 } 1033 worker = worker->next; 1034 } 1035 1036 assert(main_worker != NULL); 1037 rc = work_fn(main_worker); 1038 if (rc != 0) { 1039 goto cleanup; 1040 } 1041 1042 spdk_env_thread_wait_all(); 1043 1044 rc = dump_result(); 1045 cleanup: 1046 unregister_workers(); 1047 idxd_exit(); 1048 1049 spdk_env_fini(); 1050 return rc; 1051 } 1052