1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2*a6dbe372Spaul luse * Copyright (C) 2020 Intel Corporation. 39f51cf32Spaul luse * All rights reserved. 49f51cf32Spaul luse */ 59f51cf32Spaul luse 69f51cf32Spaul luse #include "spdk/stdinc.h" 79f51cf32Spaul luse #include "spdk/thread.h" 89f51cf32Spaul luse #include "spdk/env.h" 99f51cf32Spaul luse #include "spdk/event.h" 109f51cf32Spaul luse #include "spdk/log.h" 119f51cf32Spaul luse #include "spdk/string.h" 12081f080aSBen Walker #include "spdk/accel.h" 13e69375bfSpaul luse #include "spdk/crc32.h" 140cecfcb1Spaul luse #include "spdk/util.h" 159f51cf32Spaul luse 16b9218b7aSpaul luse #define DATA_PATTERN 0x5a 170ef079c6Spaul luse #define ALIGN_4K 0x1000 18b9218b7aSpaul luse 199f51cf32Spaul luse static uint64_t g_tsc_rate; 209f51cf32Spaul luse static uint64_t g_tsc_end; 219b189667Spaul luse static int g_rc; 229f51cf32Spaul luse static int g_xfer_size_bytes = 4096; 239f51cf32Spaul luse static int g_queue_depth = 32; 24e1bf63afSJim Harris /* g_allocate_depth indicates how many tasks we allocate per worker. It will 25e1bf63afSJim Harris * be at least as much as the queue depth. 26e1bf63afSJim Harris */ 27e1bf63afSJim Harris static int g_allocate_depth = 0; 28445fe74eSpaul luse static int g_threads_per_core = 1; 299f51cf32Spaul luse static int g_time_in_sec = 5; 30e69375bfSpaul luse static uint32_t g_crc32c_seed = 0; 31850cd900Spaul luse static uint32_t g_chained_count = 1; 32b9218b7aSpaul luse static int g_fail_percent_goal = 0; 3389495464Spaul luse static uint8_t g_fill_pattern = 255; 349f51cf32Spaul luse static bool g_verify = false; 352a0c66d0Spaul luse static const char *g_workload_type = NULL; 3637b68d72Spaul luse static enum accel_opcode g_workload_selection; 379f51cf32Spaul luse static struct worker_thread *g_workers = NULL; 389f51cf32Spaul luse static int g_num_workers = 0; 399f51cf32Spaul luse static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 409260fa0cSpaul luse static struct spdk_app_opts g_opts = {}; 41cdefd3d3Spaul luse 42cdefd3d3Spaul luse struct worker_thread; 43cdefd3d3Spaul luse static void accel_done(void *ref, int status); 44cdefd3d3Spaul luse 45445fe74eSpaul luse struct display_info { 46445fe74eSpaul luse int core; 47445fe74eSpaul luse int thread; 48445fe74eSpaul luse }; 49445fe74eSpaul luse 50cdefd3d3Spaul luse struct ap_task { 51cdefd3d3Spaul luse void *src; 5228886ac3Spaul luse struct iovec *src_iovs; 5328886ac3Spaul luse uint32_t src_iovcnt; 54850cd900Spaul luse struct iovec *dst_iovs; 55850cd900Spaul luse uint32_t dst_iovcnt; 56cdefd3d3Spaul luse void *dst; 57cdefd3d3Spaul luse void *dst2; 58221eb3f4Spaul luse uint32_t crc_dst; 59cdefd3d3Spaul luse struct worker_thread *worker; 60cdefd3d3Spaul luse int expected_status; /* used for the compare operation */ 61cdefd3d3Spaul luse TAILQ_ENTRY(ap_task) link; 62cdefd3d3Spaul luse }; 639f51cf32Spaul luse 649f51cf32Spaul luse struct worker_thread { 659f51cf32Spaul luse struct spdk_io_channel *ch; 669f51cf32Spaul luse uint64_t xfer_completed; 679f51cf32Spaul luse uint64_t xfer_failed; 68b9218b7aSpaul luse uint64_t injected_miscompares; 699f51cf32Spaul luse uint64_t current_queue_depth; 70ac9a1a83Spaul luse TAILQ_HEAD(, ap_task) tasks_pool; 719f51cf32Spaul luse struct worker_thread *next; 729f51cf32Spaul luse unsigned core; 739f51cf32Spaul luse struct spdk_thread *thread; 749f51cf32Spaul luse bool is_draining; 759f51cf32Spaul luse struct spdk_poller *is_draining_poller; 769f51cf32Spaul luse struct spdk_poller *stop_poller; 77ac9a1a83Spaul luse void *task_base; 78445fe74eSpaul luse struct display_info display; 7913067997Spaul luse enum accel_opcode workload; 809f51cf32Spaul luse }; 819f51cf32Spaul luse 829f51cf32Spaul luse static void 839260fa0cSpaul luse dump_user_config(void) 849f51cf32Spaul luse { 85712e8cb7SBen Walker const char *module_name = NULL; 869260fa0cSpaul luse int rc; 879260fa0cSpaul luse 88712e8cb7SBen Walker rc = spdk_accel_get_opc_module_name(g_workload_selection, &module_name); 899260fa0cSpaul luse if (rc) { 90712e8cb7SBen Walker printf("error getting module name (%d)\n", rc); 919260fa0cSpaul luse } 929260fa0cSpaul luse 939260fa0cSpaul luse printf("\nSPDK Configuration:\n"); 949260fa0cSpaul luse printf("Core mask: %s\n\n", g_opts.reactor_mask); 959f51cf32Spaul luse printf("Accel Perf Configuration:\n"); 962a0c66d0Spaul luse printf("Workload Type: %s\n", g_workload_type); 9737b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 98b9218b7aSpaul luse printf("CRC-32C seed: %u\n", g_crc32c_seed); 9937b68d72Spaul luse } else if (g_workload_selection == ACCEL_OPC_FILL) { 10089495464Spaul luse printf("Fill pattern: 0x%x\n", g_fill_pattern); 10137b68d72Spaul luse } else if ((g_workload_selection == ACCEL_OPC_COMPARE) && g_fail_percent_goal > 0) { 10289495464Spaul luse printf("Failure inject: %u percent\n", g_fail_percent_goal); 103e69375bfSpaul luse } 10437b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 105221eb3f4Spaul luse printf("Vector size: %u bytes\n", g_xfer_size_bytes); 106850cd900Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_chained_count); 107221eb3f4Spaul luse } else { 1089f51cf32Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 109221eb3f4Spaul luse } 110850cd900Spaul luse printf("vector count %u\n", g_chained_count); 111712e8cb7SBen Walker printf("Module: %s\n", module_name); 1129f51cf32Spaul luse printf("Queue depth: %u\n", g_queue_depth); 113e1bf63afSJim Harris printf("Allocate depth: %u\n", g_allocate_depth); 114445fe74eSpaul luse printf("# threads/core: %u\n", g_threads_per_core); 1159f51cf32Spaul luse printf("Run time: %u seconds\n", g_time_in_sec); 1169f51cf32Spaul luse printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 1179f51cf32Spaul luse } 1189f51cf32Spaul luse 1199f51cf32Spaul luse static void 1209f51cf32Spaul luse usage(void) 1219f51cf32Spaul luse { 1229f51cf32Spaul luse printf("accel_perf options:\n"); 1239f51cf32Spaul luse printf("\t[-h help message]\n"); 124f17e6705Spaul luse printf("\t[-q queue depth per core]\n"); 125850cd900Spaul luse printf("\t[-C for supported workloads, use this value to configure the io vector size to test (default 1)\n"); 126445fe74eSpaul luse printf("\t[-T number of threads per core\n"); 12788754353SZiye Yang printf("\t[-n number of channels]\n"); 1289f51cf32Spaul luse printf("\t[-o transfer size in bytes]\n"); 1299f51cf32Spaul luse printf("\t[-t time in seconds]\n"); 13084162738Spaul luse printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); 131e69375bfSpaul luse printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 132b9218b7aSpaul luse printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 13389495464Spaul luse printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 1342a0c66d0Spaul luse printf("\t[-y verify result if this switch is on]\n"); 135e1bf63afSJim Harris printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 136e1bf63afSJim Harris printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 1379f51cf32Spaul luse } 1389f51cf32Spaul luse 1399f51cf32Spaul luse static int 1409f51cf32Spaul luse parse_args(int argc, char *argv) 1419f51cf32Spaul luse { 142358b84b4SZiye Yang int argval = 0; 143c82d5789SJim Harris 1449f51cf32Spaul luse switch (argc) { 145e1bf63afSJim Harris case 'a': 146c82d5789SJim Harris case 'C': 147c82d5789SJim Harris case 'f': 148c82d5789SJim Harris case 'T': 149c82d5789SJim Harris case 'o': 150c82d5789SJim Harris case 'P': 151c82d5789SJim Harris case 'q': 152c82d5789SJim Harris case 's': 153c82d5789SJim Harris case 't': 154c82d5789SJim Harris argval = spdk_strtol(optarg, 10); 155c82d5789SJim Harris if (argval < 0) { 156c82d5789SJim Harris fprintf(stderr, "-%c option must be non-negative.\n", argc); 157c82d5789SJim Harris usage(); 158c82d5789SJim Harris return 1; 159c82d5789SJim Harris } 160c82d5789SJim Harris break; 161c82d5789SJim Harris default: 162c82d5789SJim Harris break; 163c82d5789SJim Harris }; 164c82d5789SJim Harris 165c82d5789SJim Harris switch (argc) { 166e1bf63afSJim Harris case 'a': 167e1bf63afSJim Harris g_allocate_depth = argval; 168e1bf63afSJim Harris break; 16988754353SZiye Yang case 'C': 170850cd900Spaul luse g_chained_count = argval; 17188754353SZiye Yang break; 17289495464Spaul luse case 'f': 173c82d5789SJim Harris g_fill_pattern = (uint8_t)argval; 17489495464Spaul luse break; 175445fe74eSpaul luse case 'T': 176c82d5789SJim Harris g_threads_per_core = argval; 177445fe74eSpaul luse break; 1789f51cf32Spaul luse case 'o': 179c82d5789SJim Harris g_xfer_size_bytes = argval; 1809f51cf32Spaul luse break; 181b9218b7aSpaul luse case 'P': 182c82d5789SJim Harris g_fail_percent_goal = argval; 183b9218b7aSpaul luse break; 1849f51cf32Spaul luse case 'q': 185c82d5789SJim Harris g_queue_depth = argval; 1869f51cf32Spaul luse break; 187e69375bfSpaul luse case 's': 188c82d5789SJim Harris g_crc32c_seed = argval; 189e69375bfSpaul luse break; 1909f51cf32Spaul luse case 't': 191c82d5789SJim Harris g_time_in_sec = argval; 1929f51cf32Spaul luse break; 1939f51cf32Spaul luse case 'y': 1949f51cf32Spaul luse g_verify = true; 1959f51cf32Spaul luse break; 1962a0c66d0Spaul luse case 'w': 1972a0c66d0Spaul luse g_workload_type = optarg; 198514be889Spaul luse if (!strcmp(g_workload_type, "copy")) { 19937b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY; 200514be889Spaul luse } else if (!strcmp(g_workload_type, "fill")) { 20137b68d72Spaul luse g_workload_selection = ACCEL_OPC_FILL; 202e69375bfSpaul luse } else if (!strcmp(g_workload_type, "crc32c")) { 20337b68d72Spaul luse g_workload_selection = ACCEL_OPC_CRC32C; 204221eb3f4Spaul luse } else if (!strcmp(g_workload_type, "copy_crc32c")) { 20537b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY_CRC32C; 206b9218b7aSpaul luse } else if (!strcmp(g_workload_type, "compare")) { 20737b68d72Spaul luse g_workload_selection = ACCEL_OPC_COMPARE; 2080ef079c6Spaul luse } else if (!strcmp(g_workload_type, "dualcast")) { 20937b68d72Spaul luse g_workload_selection = ACCEL_OPC_DUALCAST; 210b21221e1Spaul luse } else { 211b21221e1Spaul luse usage(); 212b21221e1Spaul luse return 1; 213514be889Spaul luse } 2142a0c66d0Spaul luse break; 2159f51cf32Spaul luse default: 2169f51cf32Spaul luse usage(); 2179f51cf32Spaul luse return 1; 2189f51cf32Spaul luse } 21988754353SZiye Yang 2209f51cf32Spaul luse return 0; 2219f51cf32Spaul luse } 2229f51cf32Spaul luse 223eea826a2Spaul luse static int dump_result(void); 2249f51cf32Spaul luse static void 2259f51cf32Spaul luse unregister_worker(void *arg1) 2269f51cf32Spaul luse { 2279f51cf32Spaul luse struct worker_thread *worker = arg1; 2289f51cf32Spaul luse 229ac9a1a83Spaul luse free(worker->task_base); 2309f51cf32Spaul luse spdk_put_io_channel(worker->ch); 2319f51cf32Spaul luse pthread_mutex_lock(&g_workers_lock); 2329f51cf32Spaul luse assert(g_num_workers >= 1); 2339f51cf32Spaul luse if (--g_num_workers == 0) { 2349f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2359b189667Spaul luse g_rc = dump_result(); 2369f51cf32Spaul luse spdk_app_stop(0); 2379f51cf32Spaul luse } 2389f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2399f51cf32Spaul luse } 2409f51cf32Spaul luse 2418da995c4Spaul luse static int 2428da995c4Spaul luse _get_task_data_bufs(struct ap_task *task) 2438da995c4Spaul luse { 2448da995c4Spaul luse uint32_t align = 0; 24588754353SZiye Yang uint32_t i = 0; 246221eb3f4Spaul luse int dst_buff_len = g_xfer_size_bytes; 2478da995c4Spaul luse 2488da995c4Spaul luse /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 249712e8cb7SBen Walker * we do this for all modules to keep it simple. 2508da995c4Spaul luse */ 25137b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 2528da995c4Spaul luse align = ALIGN_4K; 2538da995c4Spaul luse } 2548da995c4Spaul luse 25537b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 256850cd900Spaul luse assert(g_chained_count > 0); 257850cd900Spaul luse task->src_iovcnt = g_chained_count; 25828886ac3Spaul luse task->src_iovs = calloc(task->src_iovcnt, sizeof(struct iovec)); 25928886ac3Spaul luse if (!task->src_iovs) { 26028886ac3Spaul luse fprintf(stderr, "cannot allocated task->src_iovs fot task=%p\n", task); 26188754353SZiye Yang return -ENOMEM; 26288754353SZiye Yang } 26388754353SZiye Yang 26437b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 265850cd900Spaul luse dst_buff_len = g_xfer_size_bytes * g_chained_count; 266221eb3f4Spaul luse } 267221eb3f4Spaul luse 26828886ac3Spaul luse for (i = 0; i < task->src_iovcnt; i++) { 26928886ac3Spaul luse task->src_iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 27028886ac3Spaul luse if (task->src_iovs[i].iov_base == NULL) { 27188754353SZiye Yang return -ENOMEM; 27288754353SZiye Yang } 27328886ac3Spaul luse memset(task->src_iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 27428886ac3Spaul luse task->src_iovs[i].iov_len = g_xfer_size_bytes; 27588754353SZiye Yang } 27688754353SZiye Yang 27788754353SZiye Yang } else { 2788da995c4Spaul luse task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 2798da995c4Spaul luse if (task->src == NULL) { 2808da995c4Spaul luse fprintf(stderr, "Unable to alloc src buffer\n"); 2818da995c4Spaul luse return -ENOMEM; 2828da995c4Spaul luse } 28388754353SZiye Yang 28488754353SZiye Yang /* For fill, set the entire src buffer so we can check if verify is enabled. */ 28537b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_FILL) { 28688754353SZiye Yang memset(task->src, g_fill_pattern, g_xfer_size_bytes); 28788754353SZiye Yang } else { 2888da995c4Spaul luse memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 28988754353SZiye Yang } 29088754353SZiye Yang } 2918da995c4Spaul luse 29237b68d72Spaul luse if (g_workload_selection != ACCEL_OPC_CRC32C) { 293221eb3f4Spaul luse task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 2948da995c4Spaul luse if (task->dst == NULL) { 2958da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 2968da995c4Spaul luse return -ENOMEM; 2978da995c4Spaul luse } 2988da995c4Spaul luse 2998da995c4Spaul luse /* For compare we want the buffers to match, otherwise not. */ 30037b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COMPARE) { 301221eb3f4Spaul luse memset(task->dst, DATA_PATTERN, dst_buff_len); 3028da995c4Spaul luse } else { 303221eb3f4Spaul luse memset(task->dst, ~DATA_PATTERN, dst_buff_len); 304221eb3f4Spaul luse } 3058da995c4Spaul luse } 3068da995c4Spaul luse 30784162738Spaul luse /* For dualcast 2 buffers are needed for the operation. */ 30884162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 3098da995c4Spaul luse task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3108da995c4Spaul luse if (task->dst2 == NULL) { 3118da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3128da995c4Spaul luse return -ENOMEM; 3138da995c4Spaul luse } 3148da995c4Spaul luse memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 3158da995c4Spaul luse } 3168da995c4Spaul luse 3178da995c4Spaul luse return 0; 3188da995c4Spaul luse } 3198da995c4Spaul luse 320ac9a1a83Spaul luse inline static struct ap_task * 321ac9a1a83Spaul luse _get_task(struct worker_thread *worker) 322ac9a1a83Spaul luse { 323ac9a1a83Spaul luse struct ap_task *task; 324ac9a1a83Spaul luse 325ac9a1a83Spaul luse if (!TAILQ_EMPTY(&worker->tasks_pool)) { 326ac9a1a83Spaul luse task = TAILQ_FIRST(&worker->tasks_pool); 327ac9a1a83Spaul luse TAILQ_REMOVE(&worker->tasks_pool, task, link); 328ac9a1a83Spaul luse } else { 329ac9a1a83Spaul luse fprintf(stderr, "Unable to get ap_task\n"); 330ac9a1a83Spaul luse return NULL; 331ac9a1a83Spaul luse } 332ac9a1a83Spaul luse 333ac9a1a83Spaul luse return task; 334ac9a1a83Spaul luse } 335ac9a1a83Spaul luse 336f17e6705Spaul luse /* Submit one operation using the same ap task that just completed. */ 3379f51cf32Spaul luse static void 338ac9a1a83Spaul luse _submit_single(struct worker_thread *worker, struct ap_task *task) 3399f51cf32Spaul luse { 340b9218b7aSpaul luse int random_num; 34140ec8e97Spaul luse int rc = 0; 34212c40f05Spaul luse int flags = 0; 3439f51cf32Spaul luse 3449f51cf32Spaul luse assert(worker); 3459f51cf32Spaul luse 34613067997Spaul luse switch (worker->workload) { 34737b68d72Spaul luse case ACCEL_OPC_COPY: 348e8463f87Spaul luse rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, 34912c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 350e69375bfSpaul luse break; 35137b68d72Spaul luse case ACCEL_OPC_FILL: 3522a0c66d0Spaul luse /* For fill use the first byte of the task->dst buffer */ 353ee7e31f9Spaul luse rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, 35412c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 355e69375bfSpaul luse break; 35637b68d72Spaul luse case ACCEL_OPC_CRC32C: 357a738acd5Spaul luse rc = spdk_accel_submit_crc32cv(worker->ch, &task->crc_dst, 35828886ac3Spaul luse task->src_iovs, task->src_iovcnt, g_crc32c_seed, 35990c56d96SZiye Yang accel_done, task); 360e69375bfSpaul luse break; 36137b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 36228886ac3Spaul luse rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->src_iovs, task->src_iovcnt, 36312c40f05Spaul luse &task->crc_dst, g_crc32c_seed, flags, accel_done, task); 364221eb3f4Spaul luse break; 36537b68d72Spaul luse case ACCEL_OPC_COMPARE: 366b9218b7aSpaul luse random_num = rand() % 100; 367b9218b7aSpaul luse if (random_num < g_fail_percent_goal) { 368b9218b7aSpaul luse task->expected_status = -EILSEQ; 369b9218b7aSpaul luse *(uint8_t *)task->dst = ~DATA_PATTERN; 370b9218b7aSpaul luse } else { 371b9218b7aSpaul luse task->expected_status = 0; 372b9218b7aSpaul luse *(uint8_t *)task->dst = DATA_PATTERN; 373b9218b7aSpaul luse } 374ee7e31f9Spaul luse rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, 375e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 376b9218b7aSpaul luse break; 37737b68d72Spaul luse case ACCEL_OPC_DUALCAST: 378ee7e31f9Spaul luse rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, 37912c40f05Spaul luse task->src, g_xfer_size_bytes, flags, accel_done, task); 3800ef079c6Spaul luse break; 381e69375bfSpaul luse default: 3822a0c66d0Spaul luse assert(false); 383e69375bfSpaul luse break; 384e69375bfSpaul luse 3852a0c66d0Spaul luse } 38640ec8e97Spaul luse 3876799d46aSpaul luse worker->current_queue_depth++; 38840ec8e97Spaul luse if (rc) { 389e8463f87Spaul luse accel_done(task, rc); 39040ec8e97Spaul luse } 3919f51cf32Spaul luse } 3929f51cf32Spaul luse 3939f51cf32Spaul luse static void 394e150f6b8SZiye Yang _free_task_buffers(struct ap_task *task) 395ac9a1a83Spaul luse { 39688754353SZiye Yang uint32_t i; 39788754353SZiye Yang 39837b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 39928886ac3Spaul luse if (task->src_iovs) { 40028886ac3Spaul luse for (i = 0; i < task->src_iovcnt; i++) { 40128886ac3Spaul luse if (task->src_iovs[i].iov_base) { 40228886ac3Spaul luse spdk_dma_free(task->src_iovs[i].iov_base); 40388754353SZiye Yang } 40488754353SZiye Yang } 40528886ac3Spaul luse free(task->src_iovs); 40688754353SZiye Yang } 40788754353SZiye Yang } else { 408ac9a1a83Spaul luse spdk_dma_free(task->src); 40988754353SZiye Yang } 41088754353SZiye Yang 411ac9a1a83Spaul luse spdk_dma_free(task->dst); 41284162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 413ac9a1a83Spaul luse spdk_dma_free(task->dst2); 414ac9a1a83Spaul luse } 415ac9a1a83Spaul luse } 416ac9a1a83Spaul luse 417221eb3f4Spaul luse static int 41828886ac3Spaul luse _vector_memcmp(void *_dst, struct iovec *src_src_iovs, uint32_t iovcnt) 419221eb3f4Spaul luse { 420221eb3f4Spaul luse uint32_t i; 421221eb3f4Spaul luse uint32_t ttl_len = 0; 422221eb3f4Spaul luse uint8_t *dst = (uint8_t *)_dst; 423221eb3f4Spaul luse 424221eb3f4Spaul luse for (i = 0; i < iovcnt; i++) { 42528886ac3Spaul luse if (memcmp(dst, src_src_iovs[i].iov_base, src_src_iovs[i].iov_len)) { 426221eb3f4Spaul luse return -1; 427221eb3f4Spaul luse } 42828886ac3Spaul luse dst += src_src_iovs[i].iov_len; 42928886ac3Spaul luse ttl_len += src_src_iovs[i].iov_len; 430221eb3f4Spaul luse } 431221eb3f4Spaul luse 432221eb3f4Spaul luse if (ttl_len != iovcnt * g_xfer_size_bytes) { 433221eb3f4Spaul luse return -1; 434221eb3f4Spaul luse } 435221eb3f4Spaul luse 436221eb3f4Spaul luse return 0; 437221eb3f4Spaul luse } 438221eb3f4Spaul luse 43913067997Spaul luse static int _worker_stop(void *arg); 44013067997Spaul luse 441fab40895Spaul luse static void 442df42f358Spaul luse accel_done(void *arg1, int status) 4439f51cf32Spaul luse { 4449f51cf32Spaul luse struct ap_task *task = arg1; 4459f51cf32Spaul luse struct worker_thread *worker = task->worker; 446e69375bfSpaul luse uint32_t sw_crc32c; 4479f51cf32Spaul luse 4489f51cf32Spaul luse assert(worker); 4499f51cf32Spaul luse assert(worker->current_queue_depth > 0); 4509f51cf32Spaul luse 451df42f358Spaul luse if (g_verify && status == 0) { 45213067997Spaul luse switch (worker->workload) { 45337b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 45428886ac3Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->src_iovs, task->src_iovcnt, ~g_crc32c_seed); 455221eb3f4Spaul luse if (task->crc_dst != sw_crc32c) { 456221eb3f4Spaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 457221eb3f4Spaul luse worker->xfer_failed++; 458221eb3f4Spaul luse } 45928886ac3Spaul luse if (_vector_memcmp(task->dst, task->src_iovs, task->src_iovcnt)) { 460221eb3f4Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 461221eb3f4Spaul luse worker->xfer_failed++; 462221eb3f4Spaul luse } 463221eb3f4Spaul luse break; 46437b68d72Spaul luse case ACCEL_OPC_CRC32C: 46528886ac3Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->src_iovs, task->src_iovcnt, ~g_crc32c_seed); 466a738acd5Spaul luse if (task->crc_dst != sw_crc32c) { 467e69375bfSpaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 468e69375bfSpaul luse worker->xfer_failed++; 469e69375bfSpaul luse } 470b9218b7aSpaul luse break; 47137b68d72Spaul luse case ACCEL_OPC_COPY: 472b9218b7aSpaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 4739f51cf32Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 4749f51cf32Spaul luse worker->xfer_failed++; 475b9218b7aSpaul luse } 476b9218b7aSpaul luse break; 47737b68d72Spaul luse case ACCEL_OPC_DUALCAST: 4780ef079c6Spaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 4790ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, first destination\n"); 4800ef079c6Spaul luse worker->xfer_failed++; 4810ef079c6Spaul luse } 4820ef079c6Spaul luse if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 4830ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, second destination\n"); 4840ef079c6Spaul luse worker->xfer_failed++; 4850ef079c6Spaul luse } 4860ef079c6Spaul luse break; 48737b68d72Spaul luse case ACCEL_OPC_FILL: 488d207237fSpaul luse if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 489d207237fSpaul luse SPDK_NOTICELOG("Data miscompare\n"); 490d207237fSpaul luse worker->xfer_failed++; 491d207237fSpaul luse } 492d207237fSpaul luse break; 49337b68d72Spaul luse case ACCEL_OPC_COMPARE: 4948cee297cSpaul luse break; 495b9218b7aSpaul luse default: 496b9218b7aSpaul luse assert(false); 497b9218b7aSpaul luse break; 4989f51cf32Spaul luse } 4999f51cf32Spaul luse } 500b9218b7aSpaul luse 501b9218b7aSpaul luse if (task->expected_status == -EILSEQ) { 502df42f358Spaul luse assert(status != 0); 503b9218b7aSpaul luse worker->injected_miscompares++; 50413067997Spaul luse status = 0; 505df42f358Spaul luse } else if (status) { 506712e8cb7SBen Walker /* Expected to pass but the accel module reported an error (ex: COMPARE operation). */ 507b9218b7aSpaul luse worker->xfer_failed++; 508b9218b7aSpaul luse } 509b9218b7aSpaul luse 5109f51cf32Spaul luse worker->xfer_completed++; 5119f51cf32Spaul luse worker->current_queue_depth--; 5129f51cf32Spaul luse 51313067997Spaul luse if (!worker->is_draining && status == 0) { 514451462f6SJim Harris TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 515451462f6SJim Harris task = _get_task(worker); 5169f51cf32Spaul luse _submit_single(worker, task); 517f17e6705Spaul luse } else { 518b34883e0SZiye Yang TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 519f17e6705Spaul luse } 5209f51cf32Spaul luse } 5219f51cf32Spaul luse 5229f51cf32Spaul luse static int 5239f51cf32Spaul luse dump_result(void) 5249f51cf32Spaul luse { 5259f51cf32Spaul luse uint64_t total_completed = 0; 5269f51cf32Spaul luse uint64_t total_failed = 0; 527b9218b7aSpaul luse uint64_t total_miscompared = 0; 5289f51cf32Spaul luse uint64_t total_xfer_per_sec, total_bw_in_MiBps; 5299f51cf32Spaul luse struct worker_thread *worker = g_workers; 5309f51cf32Spaul luse 531445fe74eSpaul luse printf("\nCore,Thread Transfers Bandwidth Failed Miscompares\n"); 532445fe74eSpaul luse printf("------------------------------------------------------------------------\n"); 5339f51cf32Spaul luse while (worker != NULL) { 5349f51cf32Spaul luse 5359f51cf32Spaul luse uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 5369f51cf32Spaul luse uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 5379f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 5389f51cf32Spaul luse 5399f51cf32Spaul luse total_completed += worker->xfer_completed; 5409f51cf32Spaul luse total_failed += worker->xfer_failed; 541b9218b7aSpaul luse total_miscompared += worker->injected_miscompares; 5429f51cf32Spaul luse 5439f51cf32Spaul luse if (xfer_per_sec) { 544445fe74eSpaul luse printf("%u,%u%17" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 545445fe74eSpaul luse worker->display.core, worker->display.thread, xfer_per_sec, 546b9218b7aSpaul luse bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 5479f51cf32Spaul luse } 5489f51cf32Spaul luse 5499f51cf32Spaul luse worker = worker->next; 5509f51cf32Spaul luse } 5519f51cf32Spaul luse 5529f51cf32Spaul luse total_xfer_per_sec = total_completed / g_time_in_sec; 5539f51cf32Spaul luse total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 5549f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 5559f51cf32Spaul luse 556445fe74eSpaul luse printf("=========================================================================\n"); 557445fe74eSpaul luse printf("Total:%15" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 558b9218b7aSpaul luse total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 5599f51cf32Spaul luse 5609f51cf32Spaul luse return total_failed ? 1 : 0; 5619f51cf32Spaul luse } 5629f51cf32Spaul luse 563e150f6b8SZiye Yang static inline void 564e150f6b8SZiye Yang _free_task_buffers_in_pool(struct worker_thread *worker) 565e150f6b8SZiye Yang { 566e150f6b8SZiye Yang struct ap_task *task; 567e150f6b8SZiye Yang 568e150f6b8SZiye Yang assert(worker); 569e150f6b8SZiye Yang while ((task = TAILQ_FIRST(&worker->tasks_pool))) { 570e150f6b8SZiye Yang TAILQ_REMOVE(&worker->tasks_pool, task, link); 571e150f6b8SZiye Yang _free_task_buffers(task); 572e150f6b8SZiye Yang } 573e150f6b8SZiye Yang } 574e150f6b8SZiye Yang 5759f51cf32Spaul luse static int 5769f51cf32Spaul luse _check_draining(void *arg) 5779f51cf32Spaul luse { 5789f51cf32Spaul luse struct worker_thread *worker = arg; 5799f51cf32Spaul luse 5809f51cf32Spaul luse assert(worker); 5819f51cf32Spaul luse 5829f51cf32Spaul luse if (worker->current_queue_depth == 0) { 583e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 5849f51cf32Spaul luse spdk_poller_unregister(&worker->is_draining_poller); 5859f51cf32Spaul luse unregister_worker(worker); 5869f51cf32Spaul luse } 5879f51cf32Spaul luse 588fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 5899f51cf32Spaul luse } 5909f51cf32Spaul luse 5919f51cf32Spaul luse static int 5929f51cf32Spaul luse _worker_stop(void *arg) 5939f51cf32Spaul luse { 5949f51cf32Spaul luse struct worker_thread *worker = arg; 5959f51cf32Spaul luse 5969f51cf32Spaul luse assert(worker); 5979f51cf32Spaul luse 5989f51cf32Spaul luse spdk_poller_unregister(&worker->stop_poller); 5999f51cf32Spaul luse 6009f51cf32Spaul luse /* now let the worker drain and check it's outstanding IO with a poller */ 6019f51cf32Spaul luse worker->is_draining = true; 602ab0bc5c2SShuhei Matsumoto worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 6039f51cf32Spaul luse 604fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 6059f51cf32Spaul luse } 6069f51cf32Spaul luse 6079f51cf32Spaul luse static void 608a34fc12bSpaul luse _init_thread(void *arg1) 609a34fc12bSpaul luse { 610a34fc12bSpaul luse struct worker_thread *worker; 611a34fc12bSpaul luse struct ap_task *task; 612998b5d66Spaul luse int i, num_tasks = g_allocate_depth; 613445fe74eSpaul luse struct display_info *display = arg1; 614a34fc12bSpaul luse 615a34fc12bSpaul luse worker = calloc(1, sizeof(*worker)); 616a34fc12bSpaul luse if (worker == NULL) { 617a34fc12bSpaul luse fprintf(stderr, "Unable to allocate worker\n"); 618445fe74eSpaul luse free(display); 619a34fc12bSpaul luse return; 620a34fc12bSpaul luse } 621a34fc12bSpaul luse 62213067997Spaul luse worker->workload = g_workload_selection; 623445fe74eSpaul luse worker->display.core = display->core; 624445fe74eSpaul luse worker->display.thread = display->thread; 625445fe74eSpaul luse free(display); 6269f51cf32Spaul luse worker->core = spdk_env_get_current_core(); 6279f51cf32Spaul luse worker->thread = spdk_get_thread(); 628eea826a2Spaul luse pthread_mutex_lock(&g_workers_lock); 629eea826a2Spaul luse g_num_workers++; 6309f51cf32Spaul luse worker->next = g_workers; 631eea826a2Spaul luse g_workers = worker; 632eea826a2Spaul luse pthread_mutex_unlock(&g_workers_lock); 63334c48f1bSBen Walker worker->ch = spdk_accel_get_io_channel(); 6342dd64cf9Spaul luse if (worker->ch == NULL) { 6352dd64cf9Spaul luse fprintf(stderr, "Unable to get an accel channel\n"); 6362dd64cf9Spaul luse goto error; 6372dd64cf9Spaul luse } 638b9218b7aSpaul luse 639f17e6705Spaul luse TAILQ_INIT(&worker->tasks_pool); 640f17e6705Spaul luse 641ac9a1a83Spaul luse worker->task_base = calloc(num_tasks, sizeof(struct ap_task)); 642ac9a1a83Spaul luse if (worker->task_base == NULL) { 643ac9a1a83Spaul luse fprintf(stderr, "Could not allocate task base.\n"); 644ac9a1a83Spaul luse goto error; 6450cecfcb1Spaul luse } 646ac9a1a83Spaul luse 647ac9a1a83Spaul luse task = worker->task_base; 648ac9a1a83Spaul luse for (i = 0; i < num_tasks; i++) { 649ac9a1a83Spaul luse TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 6504cd7ca9bSJim Harris task->worker = worker; 651ac9a1a83Spaul luse if (_get_task_data_bufs(task)) { 652ac9a1a83Spaul luse fprintf(stderr, "Unable to get data bufs\n"); 653ac9a1a83Spaul luse goto error; 654ac9a1a83Spaul luse } 655ac9a1a83Spaul luse task++; 6569f51cf32Spaul luse } 6579f51cf32Spaul luse 6589f51cf32Spaul luse /* Register a poller that will stop the worker at time elapsed */ 659ab0bc5c2SShuhei Matsumoto worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 6609f51cf32Spaul luse g_time_in_sec * 1000000ULL); 6619f51cf32Spaul luse 662998b5d66Spaul luse /* Load up queue depth worth of operations. */ 663998b5d66Spaul luse for (i = 0; i < g_queue_depth; i++) { 664ac9a1a83Spaul luse task = _get_task(worker); 665ac9a1a83Spaul luse if (task == NULL) { 666a34fc12bSpaul luse goto error; 667b9218b7aSpaul luse } 668b9218b7aSpaul luse 6699f51cf32Spaul luse _submit_single(worker, task); 6709f51cf32Spaul luse } 671a34fc12bSpaul luse return; 672a34fc12bSpaul luse error: 673e150f6b8SZiye Yang 674e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 675ac9a1a83Spaul luse free(worker->task_base); 676a34fc12bSpaul luse spdk_app_stop(-1); 6779f51cf32Spaul luse } 6789f51cf32Spaul luse 6799f51cf32Spaul luse static void 6809f51cf32Spaul luse accel_perf_start(void *arg1) 6819f51cf32Spaul luse { 682eea826a2Spaul luse struct spdk_cpuset tmp_cpumask = {}; 683eea826a2Spaul luse char thread_name[32]; 684eea826a2Spaul luse uint32_t i; 685445fe74eSpaul luse int j; 686eea826a2Spaul luse struct spdk_thread *thread; 687445fe74eSpaul luse struct display_info *display; 688514be889Spaul luse 6899f51cf32Spaul luse g_tsc_rate = spdk_get_ticks_hz(); 6909f51cf32Spaul luse g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 6919f51cf32Spaul luse 6929260fa0cSpaul luse dump_user_config(); 6939260fa0cSpaul luse 6949f51cf32Spaul luse printf("Running for %d seconds...\n", g_time_in_sec); 6959f51cf32Spaul luse fflush(stdout); 6969f51cf32Spaul luse 697eea826a2Spaul luse /* Create worker threads for each core that was specified. */ 698eea826a2Spaul luse SPDK_ENV_FOREACH_CORE(i) { 699445fe74eSpaul luse for (j = 0; j < g_threads_per_core; j++) { 700445fe74eSpaul luse snprintf(thread_name, sizeof(thread_name), "ap_worker_%u_%u", i, j); 701eea826a2Spaul luse spdk_cpuset_zero(&tmp_cpumask); 702eea826a2Spaul luse spdk_cpuset_set_cpu(&tmp_cpumask, i, true); 703eea826a2Spaul luse thread = spdk_thread_create(thread_name, &tmp_cpumask); 704445fe74eSpaul luse display = calloc(1, sizeof(*display)); 705445fe74eSpaul luse if (display == NULL) { 706445fe74eSpaul luse fprintf(stderr, "Unable to allocate memory\n"); 707445fe74eSpaul luse spdk_app_stop(-1); 708445fe74eSpaul luse return; 709445fe74eSpaul luse } 710445fe74eSpaul luse display->core = i; 711445fe74eSpaul luse display->thread = j; 712445fe74eSpaul luse spdk_thread_send_msg(thread, _init_thread, display); 713445fe74eSpaul luse } 714eea826a2Spaul luse } 7159f51cf32Spaul luse } 7169f51cf32Spaul luse 7179f51cf32Spaul luse int 7189f51cf32Spaul luse main(int argc, char **argv) 7199f51cf32Spaul luse { 7209f51cf32Spaul luse struct worker_thread *worker, *tmp; 7219f51cf32Spaul luse 7229f51cf32Spaul luse pthread_mutex_init(&g_workers_lock, NULL); 7239260fa0cSpaul luse spdk_app_opts_init(&g_opts, sizeof(g_opts)); 7249260fa0cSpaul luse g_opts.name = "accel_perf"; 7259260fa0cSpaul luse g_opts.reactor_mask = "0x1"; 7269260fa0cSpaul luse if (spdk_app_parse_args(argc, argv, &g_opts, "a:C:o:q:t:yw:P:f:T:", NULL, parse_args, 7271e2b38baSyidong0635 usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { 7289b189667Spaul luse g_rc = -1; 7299f51cf32Spaul luse goto cleanup; 7309f51cf32Spaul luse } 7319f51cf32Spaul luse 73237b68d72Spaul luse if ((g_workload_selection != ACCEL_OPC_COPY) && 73337b68d72Spaul luse (g_workload_selection != ACCEL_OPC_FILL) && 73437b68d72Spaul luse (g_workload_selection != ACCEL_OPC_CRC32C) && 73537b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COPY_CRC32C) && 73637b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COMPARE) && 73784162738Spaul luse (g_workload_selection != ACCEL_OPC_DUALCAST)) { 7382a0c66d0Spaul luse usage(); 7399b189667Spaul luse g_rc = -1; 7402a0c66d0Spaul luse goto cleanup; 7412a0c66d0Spaul luse } 7422a0c66d0Spaul luse 743e1bf63afSJim Harris if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 744e1bf63afSJim Harris fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 745e1bf63afSJim Harris usage(); 746e1bf63afSJim Harris g_rc = -1; 747e1bf63afSJim Harris goto cleanup; 748e1bf63afSJim Harris } 749e1bf63afSJim Harris 750e1bf63afSJim Harris if (g_allocate_depth == 0) { 751e1bf63afSJim Harris g_allocate_depth = g_queue_depth; 752e1bf63afSJim Harris } 753e1bf63afSJim Harris 75437b68d72Spaul luse if ((g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) && 755850cd900Spaul luse g_chained_count == 0) { 75688754353SZiye Yang usage(); 75788754353SZiye Yang g_rc = -1; 75888754353SZiye Yang goto cleanup; 75988754353SZiye Yang } 76088754353SZiye Yang 7619260fa0cSpaul luse g_rc = spdk_app_start(&g_opts, accel_perf_start, NULL); 7629b189667Spaul luse if (g_rc) { 7639f51cf32Spaul luse SPDK_ERRLOG("ERROR starting application\n"); 7649f51cf32Spaul luse } 7659f51cf32Spaul luse 7669f51cf32Spaul luse pthread_mutex_destroy(&g_workers_lock); 7679f51cf32Spaul luse 7689f51cf32Spaul luse worker = g_workers; 7699f51cf32Spaul luse while (worker) { 7709f51cf32Spaul luse tmp = worker->next; 7719f51cf32Spaul luse free(worker); 7729f51cf32Spaul luse worker = tmp; 7739f51cf32Spaul luse } 7749f51cf32Spaul luse cleanup: 7759f51cf32Spaul luse spdk_app_fini(); 7769b189667Spaul luse return g_rc; 7779f51cf32Spaul luse } 778