1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 29f51cf32Spaul luse * Copyright (c) Intel Corporation. 39f51cf32Spaul luse * All rights reserved. 49f51cf32Spaul luse */ 59f51cf32Spaul luse 69f51cf32Spaul luse #include "spdk/stdinc.h" 79f51cf32Spaul luse #include "spdk/thread.h" 89f51cf32Spaul luse #include "spdk/env.h" 99f51cf32Spaul luse #include "spdk/event.h" 109f51cf32Spaul luse #include "spdk/log.h" 119f51cf32Spaul luse #include "spdk/string.h" 12081f080aSBen Walker #include "spdk/accel.h" 13e69375bfSpaul luse #include "spdk/crc32.h" 140cecfcb1Spaul luse #include "spdk/util.h" 159f51cf32Spaul luse 16b9218b7aSpaul luse #define DATA_PATTERN 0x5a 170ef079c6Spaul luse #define ALIGN_4K 0x1000 18b9218b7aSpaul luse 199f51cf32Spaul luse static uint64_t g_tsc_rate; 209f51cf32Spaul luse static uint64_t g_tsc_end; 219b189667Spaul luse static int g_rc; 229f51cf32Spaul luse static int g_xfer_size_bytes = 4096; 239f51cf32Spaul luse static int g_queue_depth = 32; 24e1bf63afSJim Harris /* g_allocate_depth indicates how many tasks we allocate per worker. It will 25e1bf63afSJim Harris * be at least as much as the queue depth. 26e1bf63afSJim Harris */ 27e1bf63afSJim Harris static int g_allocate_depth = 0; 28445fe74eSpaul luse static int g_threads_per_core = 1; 299f51cf32Spaul luse static int g_time_in_sec = 5; 30e69375bfSpaul luse static uint32_t g_crc32c_seed = 0; 3188754353SZiye Yang static uint32_t g_crc32c_chained_count = 1; 32b9218b7aSpaul luse static int g_fail_percent_goal = 0; 3389495464Spaul luse static uint8_t g_fill_pattern = 255; 349f51cf32Spaul luse static bool g_verify = false; 352a0c66d0Spaul luse static const char *g_workload_type = NULL; 3637b68d72Spaul luse static enum accel_opcode g_workload_selection; 379f51cf32Spaul luse static struct worker_thread *g_workers = NULL; 389f51cf32Spaul luse static int g_num_workers = 0; 399f51cf32Spaul luse static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 409260fa0cSpaul luse static struct spdk_app_opts g_opts = {}; 41cdefd3d3Spaul luse 42cdefd3d3Spaul luse struct worker_thread; 43cdefd3d3Spaul luse static void accel_done(void *ref, int status); 44cdefd3d3Spaul luse 45445fe74eSpaul luse struct display_info { 46445fe74eSpaul luse int core; 47445fe74eSpaul luse int thread; 48445fe74eSpaul luse }; 49445fe74eSpaul luse 50cdefd3d3Spaul luse struct ap_task { 51cdefd3d3Spaul luse void *src; 5288754353SZiye Yang struct iovec *iovs; 5388754353SZiye Yang uint32_t iov_cnt; 54cdefd3d3Spaul luse void *dst; 55cdefd3d3Spaul luse void *dst2; 56221eb3f4Spaul luse uint32_t crc_dst; 57cdefd3d3Spaul luse struct worker_thread *worker; 58cdefd3d3Spaul luse int expected_status; /* used for the compare operation */ 59cdefd3d3Spaul luse TAILQ_ENTRY(ap_task) link; 60cdefd3d3Spaul luse }; 619f51cf32Spaul luse 629f51cf32Spaul luse struct worker_thread { 639f51cf32Spaul luse struct spdk_io_channel *ch; 649f51cf32Spaul luse uint64_t xfer_completed; 659f51cf32Spaul luse uint64_t xfer_failed; 66b9218b7aSpaul luse uint64_t injected_miscompares; 679f51cf32Spaul luse uint64_t current_queue_depth; 68ac9a1a83Spaul luse TAILQ_HEAD(, ap_task) tasks_pool; 699f51cf32Spaul luse struct worker_thread *next; 709f51cf32Spaul luse unsigned core; 719f51cf32Spaul luse struct spdk_thread *thread; 729f51cf32Spaul luse bool is_draining; 739f51cf32Spaul luse struct spdk_poller *is_draining_poller; 749f51cf32Spaul luse struct spdk_poller *stop_poller; 75ac9a1a83Spaul luse void *task_base; 76445fe74eSpaul luse struct display_info display; 7713067997Spaul luse enum accel_opcode workload; 789f51cf32Spaul luse }; 799f51cf32Spaul luse 809f51cf32Spaul luse static void 819260fa0cSpaul luse dump_user_config(void) 829f51cf32Spaul luse { 839260fa0cSpaul luse const char *engine_name = NULL; 849260fa0cSpaul luse int rc; 859260fa0cSpaul luse 869260fa0cSpaul luse rc = spdk_accel_get_opc_engine_name(g_workload_selection, &engine_name); 879260fa0cSpaul luse if (rc) { 889260fa0cSpaul luse printf("error getting engine name (%d)\n", rc); 899260fa0cSpaul luse } 909260fa0cSpaul luse 919260fa0cSpaul luse printf("\nSPDK Configuration:\n"); 929260fa0cSpaul luse printf("Core mask: %s\n\n", g_opts.reactor_mask); 939f51cf32Spaul luse printf("Accel Perf Configuration:\n"); 942a0c66d0Spaul luse printf("Workload Type: %s\n", g_workload_type); 9537b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 96b9218b7aSpaul luse printf("CRC-32C seed: %u\n", g_crc32c_seed); 97221eb3f4Spaul luse printf("vector count %u\n", g_crc32c_chained_count); 9837b68d72Spaul luse } else if (g_workload_selection == ACCEL_OPC_FILL) { 9989495464Spaul luse printf("Fill pattern: 0x%x\n", g_fill_pattern); 10037b68d72Spaul luse } else if ((g_workload_selection == ACCEL_OPC_COMPARE) && g_fail_percent_goal > 0) { 10189495464Spaul luse printf("Failure inject: %u percent\n", g_fail_percent_goal); 102e69375bfSpaul luse } 10337b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 104221eb3f4Spaul luse printf("Vector size: %u bytes\n", g_xfer_size_bytes); 105221eb3f4Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count); 106221eb3f4Spaul luse } else { 1079f51cf32Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 108221eb3f4Spaul luse } 1099260fa0cSpaul luse printf("Engine: %s\n", engine_name); 1109f51cf32Spaul luse printf("Queue depth: %u\n", g_queue_depth); 111e1bf63afSJim Harris printf("Allocate depth: %u\n", g_allocate_depth); 112445fe74eSpaul luse printf("# threads/core: %u\n", g_threads_per_core); 1139f51cf32Spaul luse printf("Run time: %u seconds\n", g_time_in_sec); 1149f51cf32Spaul luse printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 1159f51cf32Spaul luse } 1169f51cf32Spaul luse 1179f51cf32Spaul luse static void 1189f51cf32Spaul luse usage(void) 1199f51cf32Spaul luse { 1209f51cf32Spaul luse printf("accel_perf options:\n"); 1219f51cf32Spaul luse printf("\t[-h help message]\n"); 122f17e6705Spaul luse printf("\t[-q queue depth per core]\n"); 123221eb3f4Spaul luse printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n"); 124445fe74eSpaul luse printf("\t[-T number of threads per core\n"); 12588754353SZiye Yang printf("\t[-n number of channels]\n"); 1269f51cf32Spaul luse printf("\t[-o transfer size in bytes]\n"); 1279f51cf32Spaul luse printf("\t[-t time in seconds]\n"); 12884162738Spaul luse printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); 129e69375bfSpaul luse printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 130b9218b7aSpaul luse printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 13189495464Spaul luse printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 1322a0c66d0Spaul luse printf("\t[-y verify result if this switch is on]\n"); 133e1bf63afSJim Harris printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 134e1bf63afSJim Harris printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 1359f51cf32Spaul luse } 1369f51cf32Spaul luse 1379f51cf32Spaul luse static int 1389f51cf32Spaul luse parse_args(int argc, char *argv) 1399f51cf32Spaul luse { 140358b84b4SZiye Yang int argval = 0; 141c82d5789SJim Harris 1429f51cf32Spaul luse switch (argc) { 143e1bf63afSJim Harris case 'a': 144c82d5789SJim Harris case 'C': 145c82d5789SJim Harris case 'f': 146c82d5789SJim Harris case 'T': 147c82d5789SJim Harris case 'o': 148c82d5789SJim Harris case 'P': 149c82d5789SJim Harris case 'q': 150c82d5789SJim Harris case 's': 151c82d5789SJim Harris case 't': 152c82d5789SJim Harris argval = spdk_strtol(optarg, 10); 153c82d5789SJim Harris if (argval < 0) { 154c82d5789SJim Harris fprintf(stderr, "-%c option must be non-negative.\n", argc); 155c82d5789SJim Harris usage(); 156c82d5789SJim Harris return 1; 157c82d5789SJim Harris } 158c82d5789SJim Harris break; 159c82d5789SJim Harris default: 160c82d5789SJim Harris break; 161c82d5789SJim Harris }; 162c82d5789SJim Harris 163c82d5789SJim Harris switch (argc) { 164e1bf63afSJim Harris case 'a': 165e1bf63afSJim Harris g_allocate_depth = argval; 166e1bf63afSJim Harris break; 16788754353SZiye Yang case 'C': 168c82d5789SJim Harris g_crc32c_chained_count = argval; 16988754353SZiye Yang break; 17089495464Spaul luse case 'f': 171c82d5789SJim Harris g_fill_pattern = (uint8_t)argval; 17289495464Spaul luse break; 173445fe74eSpaul luse case 'T': 174c82d5789SJim Harris g_threads_per_core = argval; 175445fe74eSpaul luse break; 1769f51cf32Spaul luse case 'o': 177c82d5789SJim Harris g_xfer_size_bytes = argval; 1789f51cf32Spaul luse break; 179b9218b7aSpaul luse case 'P': 180c82d5789SJim Harris g_fail_percent_goal = argval; 181b9218b7aSpaul luse break; 1829f51cf32Spaul luse case 'q': 183c82d5789SJim Harris g_queue_depth = argval; 1849f51cf32Spaul luse break; 185e69375bfSpaul luse case 's': 186c82d5789SJim Harris g_crc32c_seed = argval; 187e69375bfSpaul luse break; 1889f51cf32Spaul luse case 't': 189c82d5789SJim Harris g_time_in_sec = argval; 1909f51cf32Spaul luse break; 1919f51cf32Spaul luse case 'y': 1929f51cf32Spaul luse g_verify = true; 1939f51cf32Spaul luse break; 1942a0c66d0Spaul luse case 'w': 1952a0c66d0Spaul luse g_workload_type = optarg; 196514be889Spaul luse if (!strcmp(g_workload_type, "copy")) { 19737b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY; 198514be889Spaul luse } else if (!strcmp(g_workload_type, "fill")) { 19937b68d72Spaul luse g_workload_selection = ACCEL_OPC_FILL; 200e69375bfSpaul luse } else if (!strcmp(g_workload_type, "crc32c")) { 20137b68d72Spaul luse g_workload_selection = ACCEL_OPC_CRC32C; 202221eb3f4Spaul luse } else if (!strcmp(g_workload_type, "copy_crc32c")) { 20337b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY_CRC32C; 204b9218b7aSpaul luse } else if (!strcmp(g_workload_type, "compare")) { 20537b68d72Spaul luse g_workload_selection = ACCEL_OPC_COMPARE; 2060ef079c6Spaul luse } else if (!strcmp(g_workload_type, "dualcast")) { 20737b68d72Spaul luse g_workload_selection = ACCEL_OPC_DUALCAST; 208b21221e1Spaul luse } else { 209b21221e1Spaul luse usage(); 210b21221e1Spaul luse return 1; 211514be889Spaul luse } 2122a0c66d0Spaul luse break; 2139f51cf32Spaul luse default: 2149f51cf32Spaul luse usage(); 2159f51cf32Spaul luse return 1; 2169f51cf32Spaul luse } 21788754353SZiye Yang 2189f51cf32Spaul luse return 0; 2199f51cf32Spaul luse } 2209f51cf32Spaul luse 221eea826a2Spaul luse static int dump_result(void); 2229f51cf32Spaul luse static void 2239f51cf32Spaul luse unregister_worker(void *arg1) 2249f51cf32Spaul luse { 2259f51cf32Spaul luse struct worker_thread *worker = arg1; 2269f51cf32Spaul luse 227ac9a1a83Spaul luse free(worker->task_base); 2289f51cf32Spaul luse spdk_put_io_channel(worker->ch); 2299f51cf32Spaul luse pthread_mutex_lock(&g_workers_lock); 2309f51cf32Spaul luse assert(g_num_workers >= 1); 2319f51cf32Spaul luse if (--g_num_workers == 0) { 2329f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2339b189667Spaul luse g_rc = dump_result(); 2349f51cf32Spaul luse spdk_app_stop(0); 2359f51cf32Spaul luse } 2369f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2379f51cf32Spaul luse } 2389f51cf32Spaul luse 2398da995c4Spaul luse static int 2408da995c4Spaul luse _get_task_data_bufs(struct ap_task *task) 2418da995c4Spaul luse { 2428da995c4Spaul luse uint32_t align = 0; 24388754353SZiye Yang uint32_t i = 0; 244221eb3f4Spaul luse int dst_buff_len = g_xfer_size_bytes; 2458da995c4Spaul luse 2468da995c4Spaul luse /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 2478da995c4Spaul luse * we do this for all engines to keep it simple. 2488da995c4Spaul luse */ 24937b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 2508da995c4Spaul luse align = ALIGN_4K; 2518da995c4Spaul luse } 2528da995c4Spaul luse 25337b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 25488754353SZiye Yang assert(g_crc32c_chained_count > 0); 25588754353SZiye Yang task->iov_cnt = g_crc32c_chained_count; 25688754353SZiye Yang task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); 25788754353SZiye Yang if (!task->iovs) { 25888754353SZiye Yang fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); 25988754353SZiye Yang return -ENOMEM; 26088754353SZiye Yang } 26188754353SZiye Yang 26237b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 263221eb3f4Spaul luse dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count; 264221eb3f4Spaul luse } 265221eb3f4Spaul luse 26688754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 26788754353SZiye Yang task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 26888754353SZiye Yang if (task->iovs[i].iov_base == NULL) { 26988754353SZiye Yang return -ENOMEM; 27088754353SZiye Yang } 27188754353SZiye Yang memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 27288754353SZiye Yang task->iovs[i].iov_len = g_xfer_size_bytes; 27388754353SZiye Yang } 27488754353SZiye Yang 27588754353SZiye Yang } else { 2768da995c4Spaul luse task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 2778da995c4Spaul luse if (task->src == NULL) { 2788da995c4Spaul luse fprintf(stderr, "Unable to alloc src buffer\n"); 2798da995c4Spaul luse return -ENOMEM; 2808da995c4Spaul luse } 28188754353SZiye Yang 28288754353SZiye Yang /* For fill, set the entire src buffer so we can check if verify is enabled. */ 28337b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_FILL) { 28488754353SZiye Yang memset(task->src, g_fill_pattern, g_xfer_size_bytes); 28588754353SZiye Yang } else { 2868da995c4Spaul luse memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 28788754353SZiye Yang } 28888754353SZiye Yang } 2898da995c4Spaul luse 29037b68d72Spaul luse if (g_workload_selection != ACCEL_OPC_CRC32C) { 291221eb3f4Spaul luse task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 2928da995c4Spaul luse if (task->dst == NULL) { 2938da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 2948da995c4Spaul luse return -ENOMEM; 2958da995c4Spaul luse } 2968da995c4Spaul luse 2978da995c4Spaul luse /* For compare we want the buffers to match, otherwise not. */ 29837b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COMPARE) { 299221eb3f4Spaul luse memset(task->dst, DATA_PATTERN, dst_buff_len); 3008da995c4Spaul luse } else { 301221eb3f4Spaul luse memset(task->dst, ~DATA_PATTERN, dst_buff_len); 302221eb3f4Spaul luse } 3038da995c4Spaul luse } 3048da995c4Spaul luse 30584162738Spaul luse /* For dualcast 2 buffers are needed for the operation. */ 30684162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 3078da995c4Spaul luse task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3088da995c4Spaul luse if (task->dst2 == NULL) { 3098da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3108da995c4Spaul luse return -ENOMEM; 3118da995c4Spaul luse } 3128da995c4Spaul luse memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 3138da995c4Spaul luse } 3148da995c4Spaul luse 3158da995c4Spaul luse return 0; 3168da995c4Spaul luse } 3178da995c4Spaul luse 318ac9a1a83Spaul luse inline static struct ap_task * 319ac9a1a83Spaul luse _get_task(struct worker_thread *worker) 320ac9a1a83Spaul luse { 321ac9a1a83Spaul luse struct ap_task *task; 322ac9a1a83Spaul luse 323ac9a1a83Spaul luse if (!TAILQ_EMPTY(&worker->tasks_pool)) { 324ac9a1a83Spaul luse task = TAILQ_FIRST(&worker->tasks_pool); 325ac9a1a83Spaul luse TAILQ_REMOVE(&worker->tasks_pool, task, link); 326ac9a1a83Spaul luse } else { 327ac9a1a83Spaul luse fprintf(stderr, "Unable to get ap_task\n"); 328ac9a1a83Spaul luse return NULL; 329ac9a1a83Spaul luse } 330ac9a1a83Spaul luse 331ac9a1a83Spaul luse return task; 332ac9a1a83Spaul luse } 333ac9a1a83Spaul luse 334f17e6705Spaul luse /* Submit one operation using the same ap task that just completed. */ 3359f51cf32Spaul luse static void 336ac9a1a83Spaul luse _submit_single(struct worker_thread *worker, struct ap_task *task) 3379f51cf32Spaul luse { 338b9218b7aSpaul luse int random_num; 33940ec8e97Spaul luse int rc = 0; 34012c40f05Spaul luse int flags = 0; 3419f51cf32Spaul luse 3429f51cf32Spaul luse assert(worker); 3439f51cf32Spaul luse 34413067997Spaul luse switch (worker->workload) { 34537b68d72Spaul luse case ACCEL_OPC_COPY: 346e8463f87Spaul luse rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, 34712c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 348e69375bfSpaul luse break; 34937b68d72Spaul luse case ACCEL_OPC_FILL: 3502a0c66d0Spaul luse /* For fill use the first byte of the task->dst buffer */ 351ee7e31f9Spaul luse rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, 35212c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 353e69375bfSpaul luse break; 35437b68d72Spaul luse case ACCEL_OPC_CRC32C: 355a738acd5Spaul luse rc = spdk_accel_submit_crc32cv(worker->ch, &task->crc_dst, 35688754353SZiye Yang task->iovs, task->iov_cnt, g_crc32c_seed, 35790c56d96SZiye Yang accel_done, task); 358e69375bfSpaul luse break; 35937b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 360221eb3f4Spaul luse rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->iovs, task->iov_cnt, 36112c40f05Spaul luse &task->crc_dst, g_crc32c_seed, flags, accel_done, task); 362221eb3f4Spaul luse break; 36337b68d72Spaul luse case ACCEL_OPC_COMPARE: 364b9218b7aSpaul luse random_num = rand() % 100; 365b9218b7aSpaul luse if (random_num < g_fail_percent_goal) { 366b9218b7aSpaul luse task->expected_status = -EILSEQ; 367b9218b7aSpaul luse *(uint8_t *)task->dst = ~DATA_PATTERN; 368b9218b7aSpaul luse } else { 369b9218b7aSpaul luse task->expected_status = 0; 370b9218b7aSpaul luse *(uint8_t *)task->dst = DATA_PATTERN; 371b9218b7aSpaul luse } 372ee7e31f9Spaul luse rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, 373e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 374b9218b7aSpaul luse break; 37537b68d72Spaul luse case ACCEL_OPC_DUALCAST: 376ee7e31f9Spaul luse rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, 37712c40f05Spaul luse task->src, g_xfer_size_bytes, flags, accel_done, task); 3780ef079c6Spaul luse break; 379e69375bfSpaul luse default: 3802a0c66d0Spaul luse assert(false); 381e69375bfSpaul luse break; 382e69375bfSpaul luse 3832a0c66d0Spaul luse } 38440ec8e97Spaul luse 3856799d46aSpaul luse worker->current_queue_depth++; 38640ec8e97Spaul luse if (rc) { 387e8463f87Spaul luse accel_done(task, rc); 38840ec8e97Spaul luse } 3899f51cf32Spaul luse } 3909f51cf32Spaul luse 3919f51cf32Spaul luse static void 392e150f6b8SZiye Yang _free_task_buffers(struct ap_task *task) 393ac9a1a83Spaul luse { 39488754353SZiye Yang uint32_t i; 39588754353SZiye Yang 39637b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 39788754353SZiye Yang if (task->iovs) { 39888754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 39988754353SZiye Yang if (task->iovs[i].iov_base) { 40088754353SZiye Yang spdk_dma_free(task->iovs[i].iov_base); 40188754353SZiye Yang } 40288754353SZiye Yang } 40388754353SZiye Yang free(task->iovs); 40488754353SZiye Yang } 40588754353SZiye Yang } else { 406ac9a1a83Spaul luse spdk_dma_free(task->src); 40788754353SZiye Yang } 40888754353SZiye Yang 409ac9a1a83Spaul luse spdk_dma_free(task->dst); 41084162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 411ac9a1a83Spaul luse spdk_dma_free(task->dst2); 412ac9a1a83Spaul luse } 413ac9a1a83Spaul luse } 414ac9a1a83Spaul luse 415221eb3f4Spaul luse static int 416221eb3f4Spaul luse _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) 417221eb3f4Spaul luse { 418221eb3f4Spaul luse uint32_t i; 419221eb3f4Spaul luse uint32_t ttl_len = 0; 420221eb3f4Spaul luse uint8_t *dst = (uint8_t *)_dst; 421221eb3f4Spaul luse 422221eb3f4Spaul luse for (i = 0; i < iovcnt; i++) { 423221eb3f4Spaul luse if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) { 424221eb3f4Spaul luse return -1; 425221eb3f4Spaul luse } 426221eb3f4Spaul luse dst += src_iovs[i].iov_len; 427221eb3f4Spaul luse ttl_len += src_iovs[i].iov_len; 428221eb3f4Spaul luse } 429221eb3f4Spaul luse 430221eb3f4Spaul luse if (ttl_len != iovcnt * g_xfer_size_bytes) { 431221eb3f4Spaul luse return -1; 432221eb3f4Spaul luse } 433221eb3f4Spaul luse 434221eb3f4Spaul luse return 0; 435221eb3f4Spaul luse } 436221eb3f4Spaul luse 43713067997Spaul luse static int _worker_stop(void *arg); 43813067997Spaul luse 439fab40895Spaul luse static void 440df42f358Spaul luse accel_done(void *arg1, int status) 4419f51cf32Spaul luse { 4429f51cf32Spaul luse struct ap_task *task = arg1; 4439f51cf32Spaul luse struct worker_thread *worker = task->worker; 444e69375bfSpaul luse uint32_t sw_crc32c; 4459f51cf32Spaul luse 4469f51cf32Spaul luse assert(worker); 4479f51cf32Spaul luse assert(worker->current_queue_depth > 0); 4489f51cf32Spaul luse 449df42f358Spaul luse if (g_verify && status == 0) { 45013067997Spaul luse switch (worker->workload) { 45137b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 452221eb3f4Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 453221eb3f4Spaul luse if (task->crc_dst != sw_crc32c) { 454221eb3f4Spaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 455221eb3f4Spaul luse worker->xfer_failed++; 456221eb3f4Spaul luse } 457221eb3f4Spaul luse if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) { 458221eb3f4Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 459221eb3f4Spaul luse worker->xfer_failed++; 460221eb3f4Spaul luse } 461221eb3f4Spaul luse break; 46237b68d72Spaul luse case ACCEL_OPC_CRC32C: 463b85127ccSZiye Yang sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 464a738acd5Spaul luse if (task->crc_dst != sw_crc32c) { 465e69375bfSpaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 466e69375bfSpaul luse worker->xfer_failed++; 467e69375bfSpaul luse } 468b9218b7aSpaul luse break; 46937b68d72Spaul luse case ACCEL_OPC_COPY: 470b9218b7aSpaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 4719f51cf32Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 4729f51cf32Spaul luse worker->xfer_failed++; 473b9218b7aSpaul luse } 474b9218b7aSpaul luse break; 47537b68d72Spaul luse case ACCEL_OPC_DUALCAST: 4760ef079c6Spaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 4770ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, first destination\n"); 4780ef079c6Spaul luse worker->xfer_failed++; 4790ef079c6Spaul luse } 4800ef079c6Spaul luse if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 4810ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, second destination\n"); 4820ef079c6Spaul luse worker->xfer_failed++; 4830ef079c6Spaul luse } 4840ef079c6Spaul luse break; 48537b68d72Spaul luse case ACCEL_OPC_FILL: 486d207237fSpaul luse if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 487d207237fSpaul luse SPDK_NOTICELOG("Data miscompare\n"); 488d207237fSpaul luse worker->xfer_failed++; 489d207237fSpaul luse } 490d207237fSpaul luse break; 49137b68d72Spaul luse case ACCEL_OPC_COMPARE: 4928cee297cSpaul luse break; 493b9218b7aSpaul luse default: 494b9218b7aSpaul luse assert(false); 495b9218b7aSpaul luse break; 4969f51cf32Spaul luse } 4979f51cf32Spaul luse } 498b9218b7aSpaul luse 499b9218b7aSpaul luse if (task->expected_status == -EILSEQ) { 500df42f358Spaul luse assert(status != 0); 501b9218b7aSpaul luse worker->injected_miscompares++; 50213067997Spaul luse status = 0; 503df42f358Spaul luse } else if (status) { 504f17e6705Spaul luse /* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */ 505b9218b7aSpaul luse worker->xfer_failed++; 506b9218b7aSpaul luse } 507b9218b7aSpaul luse 5089f51cf32Spaul luse worker->xfer_completed++; 5099f51cf32Spaul luse worker->current_queue_depth--; 5109f51cf32Spaul luse 51113067997Spaul luse if (!worker->is_draining && status == 0) { 512451462f6SJim Harris TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 513451462f6SJim Harris task = _get_task(worker); 5149f51cf32Spaul luse _submit_single(worker, task); 515f17e6705Spaul luse } else { 516b34883e0SZiye Yang TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 517f17e6705Spaul luse } 5189f51cf32Spaul luse } 5199f51cf32Spaul luse 5209f51cf32Spaul luse static int 5219f51cf32Spaul luse dump_result(void) 5229f51cf32Spaul luse { 5239f51cf32Spaul luse uint64_t total_completed = 0; 5249f51cf32Spaul luse uint64_t total_failed = 0; 525b9218b7aSpaul luse uint64_t total_miscompared = 0; 5269f51cf32Spaul luse uint64_t total_xfer_per_sec, total_bw_in_MiBps; 5279f51cf32Spaul luse struct worker_thread *worker = g_workers; 5289f51cf32Spaul luse 529445fe74eSpaul luse printf("\nCore,Thread Transfers Bandwidth Failed Miscompares\n"); 530445fe74eSpaul luse printf("------------------------------------------------------------------------\n"); 5319f51cf32Spaul luse while (worker != NULL) { 5329f51cf32Spaul luse 5339f51cf32Spaul luse uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 5349f51cf32Spaul luse uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 5359f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 5369f51cf32Spaul luse 5379f51cf32Spaul luse total_completed += worker->xfer_completed; 5389f51cf32Spaul luse total_failed += worker->xfer_failed; 539b9218b7aSpaul luse total_miscompared += worker->injected_miscompares; 5409f51cf32Spaul luse 5419f51cf32Spaul luse if (xfer_per_sec) { 542445fe74eSpaul luse printf("%u,%u%17" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 543445fe74eSpaul luse worker->display.core, worker->display.thread, xfer_per_sec, 544b9218b7aSpaul luse bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 5459f51cf32Spaul luse } 5469f51cf32Spaul luse 5479f51cf32Spaul luse worker = worker->next; 5489f51cf32Spaul luse } 5499f51cf32Spaul luse 5509f51cf32Spaul luse total_xfer_per_sec = total_completed / g_time_in_sec; 5519f51cf32Spaul luse total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 5529f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 5539f51cf32Spaul luse 554445fe74eSpaul luse printf("=========================================================================\n"); 555445fe74eSpaul luse printf("Total:%15" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 556b9218b7aSpaul luse total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 5579f51cf32Spaul luse 5589f51cf32Spaul luse return total_failed ? 1 : 0; 5599f51cf32Spaul luse } 5609f51cf32Spaul luse 561e150f6b8SZiye Yang static inline void 562e150f6b8SZiye Yang _free_task_buffers_in_pool(struct worker_thread *worker) 563e150f6b8SZiye Yang { 564e150f6b8SZiye Yang struct ap_task *task; 565e150f6b8SZiye Yang 566e150f6b8SZiye Yang assert(worker); 567e150f6b8SZiye Yang while ((task = TAILQ_FIRST(&worker->tasks_pool))) { 568e150f6b8SZiye Yang TAILQ_REMOVE(&worker->tasks_pool, task, link); 569e150f6b8SZiye Yang _free_task_buffers(task); 570e150f6b8SZiye Yang } 571e150f6b8SZiye Yang } 572e150f6b8SZiye Yang 5739f51cf32Spaul luse static int 5749f51cf32Spaul luse _check_draining(void *arg) 5759f51cf32Spaul luse { 5769f51cf32Spaul luse struct worker_thread *worker = arg; 5779f51cf32Spaul luse 5789f51cf32Spaul luse assert(worker); 5799f51cf32Spaul luse 5809f51cf32Spaul luse if (worker->current_queue_depth == 0) { 581e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 5829f51cf32Spaul luse spdk_poller_unregister(&worker->is_draining_poller); 5839f51cf32Spaul luse unregister_worker(worker); 5849f51cf32Spaul luse } 5859f51cf32Spaul luse 586fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 5879f51cf32Spaul luse } 5889f51cf32Spaul luse 5899f51cf32Spaul luse static int 5909f51cf32Spaul luse _worker_stop(void *arg) 5919f51cf32Spaul luse { 5929f51cf32Spaul luse struct worker_thread *worker = arg; 5939f51cf32Spaul luse 5949f51cf32Spaul luse assert(worker); 5959f51cf32Spaul luse 5969f51cf32Spaul luse spdk_poller_unregister(&worker->stop_poller); 5979f51cf32Spaul luse 5989f51cf32Spaul luse /* now let the worker drain and check it's outstanding IO with a poller */ 5999f51cf32Spaul luse worker->is_draining = true; 600ab0bc5c2SShuhei Matsumoto worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 6019f51cf32Spaul luse 602fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 6039f51cf32Spaul luse } 6049f51cf32Spaul luse 6059f51cf32Spaul luse static void 606a34fc12bSpaul luse _init_thread(void *arg1) 607a34fc12bSpaul luse { 608a34fc12bSpaul luse struct worker_thread *worker; 609a34fc12bSpaul luse struct ap_task *task; 610998b5d66Spaul luse int i, num_tasks = g_allocate_depth; 611445fe74eSpaul luse struct display_info *display = arg1; 612a34fc12bSpaul luse 613a34fc12bSpaul luse worker = calloc(1, sizeof(*worker)); 614a34fc12bSpaul luse if (worker == NULL) { 615a34fc12bSpaul luse fprintf(stderr, "Unable to allocate worker\n"); 616445fe74eSpaul luse free(display); 617a34fc12bSpaul luse return; 618a34fc12bSpaul luse } 619a34fc12bSpaul luse 62013067997Spaul luse worker->workload = g_workload_selection; 621445fe74eSpaul luse worker->display.core = display->core; 622445fe74eSpaul luse worker->display.thread = display->thread; 623445fe74eSpaul luse free(display); 6249f51cf32Spaul luse worker->core = spdk_env_get_current_core(); 6259f51cf32Spaul luse worker->thread = spdk_get_thread(); 626eea826a2Spaul luse pthread_mutex_lock(&g_workers_lock); 627eea826a2Spaul luse g_num_workers++; 6289f51cf32Spaul luse worker->next = g_workers; 629eea826a2Spaul luse g_workers = worker; 630eea826a2Spaul luse pthread_mutex_unlock(&g_workers_lock); 631*34c48f1bSBen Walker worker->ch = spdk_accel_get_io_channel(); 6322dd64cf9Spaul luse if (worker->ch == NULL) { 6332dd64cf9Spaul luse fprintf(stderr, "Unable to get an accel channel\n"); 6342dd64cf9Spaul luse goto error; 6352dd64cf9Spaul luse } 636b9218b7aSpaul luse 637f17e6705Spaul luse TAILQ_INIT(&worker->tasks_pool); 638f17e6705Spaul luse 639ac9a1a83Spaul luse worker->task_base = calloc(num_tasks, sizeof(struct ap_task)); 640ac9a1a83Spaul luse if (worker->task_base == NULL) { 641ac9a1a83Spaul luse fprintf(stderr, "Could not allocate task base.\n"); 642ac9a1a83Spaul luse goto error; 6430cecfcb1Spaul luse } 644ac9a1a83Spaul luse 645ac9a1a83Spaul luse task = worker->task_base; 646ac9a1a83Spaul luse for (i = 0; i < num_tasks; i++) { 647ac9a1a83Spaul luse TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 6484cd7ca9bSJim Harris task->worker = worker; 649ac9a1a83Spaul luse if (_get_task_data_bufs(task)) { 650ac9a1a83Spaul luse fprintf(stderr, "Unable to get data bufs\n"); 651ac9a1a83Spaul luse goto error; 652ac9a1a83Spaul luse } 653ac9a1a83Spaul luse task++; 6549f51cf32Spaul luse } 6559f51cf32Spaul luse 6569f51cf32Spaul luse /* Register a poller that will stop the worker at time elapsed */ 657ab0bc5c2SShuhei Matsumoto worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 6589f51cf32Spaul luse g_time_in_sec * 1000000ULL); 6599f51cf32Spaul luse 660998b5d66Spaul luse /* Load up queue depth worth of operations. */ 661998b5d66Spaul luse for (i = 0; i < g_queue_depth; i++) { 662ac9a1a83Spaul luse task = _get_task(worker); 663ac9a1a83Spaul luse if (task == NULL) { 664a34fc12bSpaul luse goto error; 665b9218b7aSpaul luse } 666b9218b7aSpaul luse 6679f51cf32Spaul luse _submit_single(worker, task); 6689f51cf32Spaul luse } 669a34fc12bSpaul luse return; 670a34fc12bSpaul luse error: 671e150f6b8SZiye Yang 672e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 673ac9a1a83Spaul luse free(worker->task_base); 674a34fc12bSpaul luse spdk_app_stop(-1); 6759f51cf32Spaul luse } 6769f51cf32Spaul luse 6779f51cf32Spaul luse static void 6789f51cf32Spaul luse accel_perf_start(void *arg1) 6799f51cf32Spaul luse { 680eea826a2Spaul luse struct spdk_cpuset tmp_cpumask = {}; 681eea826a2Spaul luse char thread_name[32]; 682eea826a2Spaul luse uint32_t i; 683445fe74eSpaul luse int j; 684eea826a2Spaul luse struct spdk_thread *thread; 685445fe74eSpaul luse struct display_info *display; 686514be889Spaul luse 6879f51cf32Spaul luse g_tsc_rate = spdk_get_ticks_hz(); 6889f51cf32Spaul luse g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 6899f51cf32Spaul luse 6909260fa0cSpaul luse dump_user_config(); 6919260fa0cSpaul luse 6929f51cf32Spaul luse printf("Running for %d seconds...\n", g_time_in_sec); 6939f51cf32Spaul luse fflush(stdout); 6949f51cf32Spaul luse 695eea826a2Spaul luse /* Create worker threads for each core that was specified. */ 696eea826a2Spaul luse SPDK_ENV_FOREACH_CORE(i) { 697445fe74eSpaul luse for (j = 0; j < g_threads_per_core; j++) { 698445fe74eSpaul luse snprintf(thread_name, sizeof(thread_name), "ap_worker_%u_%u", i, j); 699eea826a2Spaul luse spdk_cpuset_zero(&tmp_cpumask); 700eea826a2Spaul luse spdk_cpuset_set_cpu(&tmp_cpumask, i, true); 701eea826a2Spaul luse thread = spdk_thread_create(thread_name, &tmp_cpumask); 702445fe74eSpaul luse display = calloc(1, sizeof(*display)); 703445fe74eSpaul luse if (display == NULL) { 704445fe74eSpaul luse fprintf(stderr, "Unable to allocate memory\n"); 705445fe74eSpaul luse spdk_app_stop(-1); 706445fe74eSpaul luse return; 707445fe74eSpaul luse } 708445fe74eSpaul luse display->core = i; 709445fe74eSpaul luse display->thread = j; 710445fe74eSpaul luse spdk_thread_send_msg(thread, _init_thread, display); 711445fe74eSpaul luse } 712eea826a2Spaul luse } 7139f51cf32Spaul luse } 7149f51cf32Spaul luse 7159f51cf32Spaul luse int 7169f51cf32Spaul luse main(int argc, char **argv) 7179f51cf32Spaul luse { 7189f51cf32Spaul luse struct worker_thread *worker, *tmp; 7199f51cf32Spaul luse 7209f51cf32Spaul luse pthread_mutex_init(&g_workers_lock, NULL); 7219260fa0cSpaul luse spdk_app_opts_init(&g_opts, sizeof(g_opts)); 7229260fa0cSpaul luse g_opts.name = "accel_perf"; 7239260fa0cSpaul luse g_opts.reactor_mask = "0x1"; 7249260fa0cSpaul luse if (spdk_app_parse_args(argc, argv, &g_opts, "a:C:o:q:t:yw:P:f:T:", NULL, parse_args, 7251e2b38baSyidong0635 usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { 7269b189667Spaul luse g_rc = -1; 7279f51cf32Spaul luse goto cleanup; 7289f51cf32Spaul luse } 7299f51cf32Spaul luse 73037b68d72Spaul luse if ((g_workload_selection != ACCEL_OPC_COPY) && 73137b68d72Spaul luse (g_workload_selection != ACCEL_OPC_FILL) && 73237b68d72Spaul luse (g_workload_selection != ACCEL_OPC_CRC32C) && 73337b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COPY_CRC32C) && 73437b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COMPARE) && 73584162738Spaul luse (g_workload_selection != ACCEL_OPC_DUALCAST)) { 7362a0c66d0Spaul luse usage(); 7379b189667Spaul luse g_rc = -1; 7382a0c66d0Spaul luse goto cleanup; 7392a0c66d0Spaul luse } 7402a0c66d0Spaul luse 741e1bf63afSJim Harris if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 742e1bf63afSJim Harris fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 743e1bf63afSJim Harris usage(); 744e1bf63afSJim Harris g_rc = -1; 745e1bf63afSJim Harris goto cleanup; 746e1bf63afSJim Harris } 747e1bf63afSJim Harris 748e1bf63afSJim Harris if (g_allocate_depth == 0) { 749e1bf63afSJim Harris g_allocate_depth = g_queue_depth; 750e1bf63afSJim Harris } 751e1bf63afSJim Harris 75237b68d72Spaul luse if ((g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) && 75388754353SZiye Yang g_crc32c_chained_count == 0) { 75488754353SZiye Yang usage(); 75588754353SZiye Yang g_rc = -1; 75688754353SZiye Yang goto cleanup; 75788754353SZiye Yang } 75888754353SZiye Yang 7599260fa0cSpaul luse g_rc = spdk_app_start(&g_opts, accel_perf_start, NULL); 7609b189667Spaul luse if (g_rc) { 7619f51cf32Spaul luse SPDK_ERRLOG("ERROR starting application\n"); 7629f51cf32Spaul luse } 7639f51cf32Spaul luse 7649f51cf32Spaul luse pthread_mutex_destroy(&g_workers_lock); 7659f51cf32Spaul luse 7669f51cf32Spaul luse worker = g_workers; 7679f51cf32Spaul luse while (worker) { 7689f51cf32Spaul luse tmp = worker->next; 7699f51cf32Spaul luse free(worker); 7709f51cf32Spaul luse worker = tmp; 7719f51cf32Spaul luse } 7729f51cf32Spaul luse cleanup: 7739f51cf32Spaul luse spdk_app_fini(); 7749b189667Spaul luse return g_rc; 7759f51cf32Spaul luse } 776