1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 #include "spdk/thread.h" 36 #include "spdk/env.h" 37 #include "spdk/event.h" 38 #include "spdk/log.h" 39 #include "spdk/string.h" 40 #include "spdk/accel_engine.h" 41 #include "spdk/crc32.h" 42 43 #define DATA_PATTERN 0x5a 44 #define ALIGN_4K 0x1000 45 46 static uint64_t g_tsc_rate; 47 static uint64_t g_tsc_us_rate; 48 static uint64_t g_tsc_end; 49 static int g_xfer_size_bytes = 4096; 50 static int g_queue_depth = 32; 51 static int g_time_in_sec = 5; 52 static uint32_t g_crc32c_seed = 0; 53 static int g_fail_percent_goal = 0; 54 static uint8_t g_fill_pattern = 255; 55 static bool g_verify = false; 56 static const char *g_workload_type = NULL; 57 static enum accel_capability g_workload_selection; 58 static struct worker_thread *g_workers = NULL; 59 static int g_num_workers = 0; 60 static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 61 62 struct worker_thread { 63 struct spdk_io_channel *ch; 64 uint64_t xfer_completed; 65 uint64_t xfer_failed; 66 uint64_t injected_miscompares; 67 uint64_t current_queue_depth; 68 struct spdk_mempool *task_pool; 69 struct worker_thread *next; 70 unsigned core; 71 struct spdk_thread *thread; 72 bool is_draining; 73 struct spdk_poller *is_draining_poller; 74 struct spdk_poller *stop_poller; 75 }; 76 77 struct ap_task { 78 void *src; 79 void *dst; 80 void *dst2; 81 struct worker_thread *worker; 82 int status; 83 int expected_status; /* used for compare */ 84 }; 85 86 inline static struct ap_task * 87 __ap_task_from_accel_task(struct spdk_accel_task *at) 88 { 89 return (struct ap_task *)((uintptr_t)at - sizeof(struct ap_task)); 90 } 91 92 inline static struct spdk_accel_task * 93 __accel_task_from_ap_task(struct ap_task *ap) 94 { 95 return (struct spdk_accel_task *)((uintptr_t)ap + sizeof(struct ap_task)); 96 } 97 98 static void 99 dump_user_config(struct spdk_app_opts *opts) 100 { 101 printf("SPDK Configuration:\n"); 102 printf("Core mask: %s\n\n", opts->reactor_mask); 103 printf("Accel Perf Configuration:\n"); 104 printf("Workload Type: %s\n", g_workload_type); 105 if (g_workload_selection == ACCEL_CRC32C) { 106 printf("CRC-32C seed: %u\n", g_crc32c_seed); 107 } else if (g_workload_selection == ACCEL_FILL) { 108 printf("Fill pattern: 0x%x\n", g_fill_pattern); 109 } else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) { 110 printf("Failure inject: %u percent\n", g_fail_percent_goal); 111 } 112 printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 113 printf("Queue depth: %u\n", g_queue_depth); 114 printf("Run time: %u seconds\n", g_time_in_sec); 115 printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 116 } 117 118 static void 119 usage(void) 120 { 121 printf("accel_perf options:\n"); 122 printf("\t[-h help message]\n"); 123 printf("\t[-q queue depth]\n"); 124 printf("\t[-n number of channels]\n"); 125 printf("\t[-o transfer size in bytes]\n"); 126 printf("\t[-t time in seconds]\n"); 127 printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n"); 128 printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 129 printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 130 printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 131 printf("\t[-y verify result if this switch is on]\n"); 132 } 133 134 static int 135 parse_args(int argc, char *argv) 136 { 137 switch (argc) { 138 case 'f': 139 g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10); 140 break; 141 case 'o': 142 g_xfer_size_bytes = spdk_strtol(optarg, 10); 143 break; 144 case 'P': 145 g_fail_percent_goal = spdk_strtol(optarg, 10); 146 break; 147 case 'q': 148 g_queue_depth = spdk_strtol(optarg, 10); 149 break; 150 case 's': 151 g_crc32c_seed = spdk_strtol(optarg, 10); 152 break; 153 case 't': 154 g_time_in_sec = spdk_strtol(optarg, 10); 155 break; 156 case 'y': 157 g_verify = true; 158 break; 159 case 'w': 160 g_workload_type = optarg; 161 if (!strcmp(g_workload_type, "copy")) { 162 g_workload_selection = ACCEL_COPY; 163 } else if (!strcmp(g_workload_type, "fill")) { 164 g_workload_selection = ACCEL_FILL; 165 } else if (!strcmp(g_workload_type, "crc32c")) { 166 g_workload_selection = ACCEL_CRC32C; 167 } else if (!strcmp(g_workload_type, "compare")) { 168 g_workload_selection = ACCEL_COMPARE; 169 } else if (!strcmp(g_workload_type, "dualcast")) { 170 g_workload_selection = ACCEL_DUALCAST; 171 } 172 break; 173 default: 174 usage(); 175 return 1; 176 } 177 return 0; 178 } 179 180 static void 181 unregister_worker(void *arg1) 182 { 183 struct worker_thread *worker = arg1; 184 185 spdk_mempool_free(worker->task_pool); 186 spdk_put_io_channel(worker->ch); 187 pthread_mutex_lock(&g_workers_lock); 188 assert(g_num_workers >= 1); 189 if (--g_num_workers == 0) { 190 pthread_mutex_unlock(&g_workers_lock); 191 spdk_app_stop(0); 192 } 193 pthread_mutex_unlock(&g_workers_lock); 194 } 195 196 static void accel_done(void *ref, int status); 197 198 static void 199 _submit_single(void *arg1, void *arg2) 200 { 201 struct worker_thread *worker = arg1; 202 struct ap_task *task = arg2; 203 int random_num; 204 int rc = 0; 205 206 assert(worker); 207 208 task->worker = worker; 209 task->worker->current_queue_depth++; 210 switch (g_workload_selection) { 211 case ACCEL_COPY: 212 rc = spdk_accel_submit_copy(__accel_task_from_ap_task(task), 213 worker->ch, task->dst, 214 task->src, g_xfer_size_bytes, accel_done); 215 break; 216 case ACCEL_FILL: 217 /* For fill use the first byte of the task->dst buffer */ 218 rc = spdk_accel_submit_fill(__accel_task_from_ap_task(task), 219 worker->ch, task->dst, *(uint8_t *)task->src, 220 g_xfer_size_bytes, accel_done); 221 break; 222 case ACCEL_CRC32C: 223 rc = spdk_accel_submit_crc32c(__accel_task_from_ap_task(task), 224 worker->ch, (uint32_t *)task->dst, task->src, g_crc32c_seed, 225 g_xfer_size_bytes, accel_done); 226 break; 227 case ACCEL_COMPARE: 228 random_num = rand() % 100; 229 if (random_num < g_fail_percent_goal) { 230 task->expected_status = -EILSEQ; 231 *(uint8_t *)task->dst = ~DATA_PATTERN; 232 } else { 233 task->expected_status = 0; 234 *(uint8_t *)task->dst = DATA_PATTERN; 235 } 236 rc = spdk_accel_submit_compare(__accel_task_from_ap_task(task), 237 worker->ch, task->dst, task->src, 238 g_xfer_size_bytes, accel_done); 239 break; 240 case ACCEL_DUALCAST: 241 rc = spdk_accel_submit_dualcast(__accel_task_from_ap_task(task), 242 worker->ch, task->dst, task->dst2, 243 task->src, g_xfer_size_bytes, accel_done); 244 break; 245 default: 246 assert(false); 247 break; 248 249 } 250 251 if (rc) { 252 accel_done(__accel_task_from_ap_task(task), rc); 253 } 254 } 255 256 static void 257 _accel_done(void *arg1) 258 { 259 struct ap_task *task = arg1; 260 struct worker_thread *worker = task->worker; 261 uint32_t sw_crc32c; 262 263 assert(worker); 264 assert(worker->current_queue_depth > 0); 265 266 if (g_verify && task->status == 0) { 267 switch (g_workload_selection) { 268 case ACCEL_CRC32C: 269 /* calculate sw CRC-32C and compare to sw aceel result. */ 270 sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed); 271 if (*(uint32_t *)task->dst != sw_crc32c) { 272 SPDK_NOTICELOG("CRC-32C miscompare\n"); 273 worker->xfer_failed++; 274 } 275 break; 276 case ACCEL_COPY: 277 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 278 SPDK_NOTICELOG("Data miscompare\n"); 279 worker->xfer_failed++; 280 } 281 break; 282 case ACCEL_DUALCAST: 283 if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 284 SPDK_NOTICELOG("Data miscompare, first destination\n"); 285 worker->xfer_failed++; 286 } 287 if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 288 SPDK_NOTICELOG("Data miscompare, second destination\n"); 289 worker->xfer_failed++; 290 } 291 break; 292 default: 293 assert(false); 294 break; 295 } 296 } 297 298 if (task->expected_status == -EILSEQ) { 299 assert(task->status != 0); 300 worker->injected_miscompares++; 301 } else if (task->status) { 302 /* Expected to pass but API reported error. */ 303 worker->xfer_failed++; 304 } 305 306 worker->xfer_completed++; 307 worker->current_queue_depth--; 308 309 if (!worker->is_draining) { 310 _submit_single(worker, task); 311 } else { 312 spdk_free(task->src); 313 spdk_free(task->dst); 314 if (g_workload_selection == ACCEL_DUALCAST) { 315 spdk_free(task->dst2); 316 } 317 spdk_mempool_put(worker->task_pool, task); 318 } 319 } 320 321 static int 322 dump_result(void) 323 { 324 uint64_t total_completed = 0; 325 uint64_t total_failed = 0; 326 uint64_t total_miscompared = 0; 327 uint64_t total_xfer_per_sec, total_bw_in_MiBps; 328 struct worker_thread *worker = g_workers; 329 330 printf("\nCore Transfers Bandwidth Failed Miscompares\n"); 331 printf("-----------------------------------------------------------------\n"); 332 while (worker != NULL) { 333 334 uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 335 uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 336 (g_time_in_sec * 1024 * 1024); 337 338 total_completed += worker->xfer_completed; 339 total_failed += worker->xfer_failed; 340 total_miscompared += worker->injected_miscompares; 341 342 if (xfer_per_sec) { 343 printf("%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64 "\n", 344 worker->core, xfer_per_sec, 345 bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 346 } 347 348 worker = worker->next; 349 } 350 351 total_xfer_per_sec = total_completed / g_time_in_sec; 352 total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 353 (g_time_in_sec * 1024 * 1024); 354 355 printf("==================================================================\n"); 356 printf("Total:%16" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64"\n\n", 357 total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 358 359 return total_failed ? 1 : 0; 360 } 361 362 static int 363 _check_draining(void *arg) 364 { 365 struct worker_thread *worker = arg; 366 367 assert(worker); 368 369 if (worker->current_queue_depth == 0) { 370 spdk_poller_unregister(&worker->is_draining_poller); 371 unregister_worker(worker); 372 } 373 374 return -1; 375 } 376 377 static int 378 _worker_stop(void *arg) 379 { 380 struct worker_thread *worker = arg; 381 382 assert(worker); 383 384 spdk_poller_unregister(&worker->stop_poller); 385 386 /* now let the worker drain and check it's outstanding IO with a poller */ 387 worker->is_draining = true; 388 worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 389 390 return 0; 391 } 392 393 static void 394 _init_thread_done(void *ctx) 395 { 396 } 397 398 static void 399 _init_thread(void *arg1) 400 { 401 struct worker_thread *worker; 402 char task_pool_name[30]; 403 struct ap_task *task; 404 int i; 405 uint32_t align = 0; 406 407 worker = calloc(1, sizeof(*worker)); 408 if (worker == NULL) { 409 fprintf(stderr, "Unable to allocate worker\n"); 410 return; 411 } 412 413 /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 414 * we do this for all engines to keep it simple. 415 */ 416 if (g_workload_selection == ACCEL_DUALCAST) { 417 align = ALIGN_4K; 418 } 419 420 worker->core = spdk_env_get_current_core(); 421 worker->thread = spdk_get_thread(); 422 worker->next = g_workers; 423 worker->ch = spdk_accel_engine_get_io_channel(); 424 425 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", g_num_workers); 426 worker->task_pool = spdk_mempool_create(task_pool_name, 427 g_queue_depth, 428 spdk_accel_task_size() + sizeof(struct ap_task), 429 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 430 SPDK_ENV_SOCKET_ID_ANY); 431 if (!worker->task_pool) { 432 fprintf(stderr, "Could not allocate buffer pool.\n"); 433 free(worker); 434 return; 435 } 436 437 /* Register a poller that will stop the worker at time elapsed */ 438 worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 439 g_time_in_sec * 1000000ULL); 440 441 g_workers = worker; 442 pthread_mutex_lock(&g_workers_lock); 443 g_num_workers++; 444 pthread_mutex_unlock(&g_workers_lock); 445 446 for (i = 0; i < g_queue_depth; i++) { 447 task = spdk_mempool_get(worker->task_pool); 448 if (!task) { 449 fprintf(stderr, "Unable to get accel_task\n"); 450 return; 451 } 452 453 task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 454 if (task->src == NULL) { 455 fprintf(stderr, "Unable to alloc src buffer\n"); 456 return; 457 } 458 memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 459 460 task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 461 if (task->dst == NULL) { 462 fprintf(stderr, "Unable to alloc dst buffer\n"); 463 return; 464 } 465 466 if (g_workload_selection == ACCEL_DUALCAST) { 467 task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 468 if (task->dst2 == NULL) { 469 fprintf(stderr, "Unable to alloc dst buffer\n"); 470 return; 471 } 472 memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 473 } 474 475 /* For compare we want the buffers to match, otherwise not. */ 476 if (g_workload_selection == ACCEL_COMPARE) { 477 memset(task->dst, DATA_PATTERN, g_xfer_size_bytes); 478 } else { 479 memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes); 480 } 481 482 _submit_single(worker, task); 483 } 484 } 485 486 static void 487 accel_done(void *ref, int status) 488 { 489 struct ap_task *task = __ap_task_from_accel_task(ref); 490 struct worker_thread *worker = task->worker; 491 492 assert(worker); 493 494 task->status = status; 495 spdk_thread_send_msg(worker->thread, _accel_done, task); 496 } 497 498 static void 499 accel_perf_start(void *arg1) 500 { 501 uint64_t capabilites; 502 struct spdk_io_channel *accel_ch; 503 504 accel_ch = spdk_accel_engine_get_io_channel(); 505 capabilites = spdk_accel_get_capabilities(accel_ch); 506 spdk_put_io_channel(accel_ch); 507 508 if ((capabilites & g_workload_selection) != g_workload_selection) { 509 SPDK_ERRLOG("Selected workload is not supported by the current engine\n"); 510 SPDK_NOTICELOG("Software engine is selected by default, enable a HW engine via RPC\n\n"); 511 spdk_app_stop(-1); 512 return; 513 } 514 515 g_tsc_rate = spdk_get_ticks_hz(); 516 g_tsc_us_rate = g_tsc_rate / (1000 * 1000); 517 g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 518 519 printf("Running for %d seconds...\n", g_time_in_sec); 520 fflush(stdout); 521 522 spdk_for_each_thread(_init_thread, NULL, _init_thread_done); 523 } 524 525 int 526 main(int argc, char **argv) 527 { 528 struct spdk_app_opts opts = {}; 529 struct worker_thread *worker, *tmp; 530 int rc = 0; 531 532 pthread_mutex_init(&g_workers_lock, NULL); 533 spdk_app_opts_init(&opts); 534 opts.reactor_mask = "0x1"; 535 if ((rc = spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:", NULL, parse_args, 536 usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { 537 rc = -1; 538 goto cleanup; 539 } 540 541 if ((g_workload_selection != ACCEL_COPY) && 542 (g_workload_selection != ACCEL_FILL) && 543 (g_workload_selection != ACCEL_CRC32C) && 544 (g_workload_selection != ACCEL_COMPARE) && 545 (g_workload_selection != ACCEL_DUALCAST)) { 546 usage(); 547 rc = -1; 548 goto cleanup; 549 } 550 551 dump_user_config(&opts); 552 rc = spdk_app_start(&opts, accel_perf_start, NULL); 553 if (rc) { 554 SPDK_ERRLOG("ERROR starting application\n"); 555 } else { 556 dump_result(); 557 } 558 559 pthread_mutex_destroy(&g_workers_lock); 560 561 worker = g_workers; 562 while (worker) { 563 tmp = worker->next; 564 free(worker); 565 worker = tmp; 566 } 567 cleanup: 568 spdk_app_fini(); 569 return rc; 570 } 571