1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2020 Intel Corporation. 39f51cf32Spaul luse * All rights reserved. 49f51cf32Spaul luse */ 59f51cf32Spaul luse 69f51cf32Spaul luse #include "spdk/stdinc.h" 79f51cf32Spaul luse #include "spdk/thread.h" 89f51cf32Spaul luse #include "spdk/env.h" 99f51cf32Spaul luse #include "spdk/event.h" 109f51cf32Spaul luse #include "spdk/log.h" 119f51cf32Spaul luse #include "spdk/string.h" 12081f080aSBen Walker #include "spdk/accel.h" 13e69375bfSpaul luse #include "spdk/crc32.h" 140cecfcb1Spaul luse #include "spdk/util.h" 159f51cf32Spaul luse 16b9218b7aSpaul luse #define DATA_PATTERN 0x5a 170ef079c6Spaul luse #define ALIGN_4K 0x1000 18*6afbf3dbSpaul luse #define COMP_BUF_PAD_PERCENTAGE 1.1L 19b9218b7aSpaul luse 209f51cf32Spaul luse static uint64_t g_tsc_rate; 219f51cf32Spaul luse static uint64_t g_tsc_end; 229b189667Spaul luse static int g_rc; 239f51cf32Spaul luse static int g_xfer_size_bytes = 4096; 249f51cf32Spaul luse static int g_queue_depth = 32; 25e1bf63afSJim Harris /* g_allocate_depth indicates how many tasks we allocate per worker. It will 26e1bf63afSJim Harris * be at least as much as the queue depth. 27e1bf63afSJim Harris */ 28e1bf63afSJim Harris static int g_allocate_depth = 0; 29445fe74eSpaul luse static int g_threads_per_core = 1; 309f51cf32Spaul luse static int g_time_in_sec = 5; 31e69375bfSpaul luse static uint32_t g_crc32c_seed = 0; 32850cd900Spaul luse static uint32_t g_chained_count = 1; 33b9218b7aSpaul luse static int g_fail_percent_goal = 0; 3489495464Spaul luse static uint8_t g_fill_pattern = 255; 359f51cf32Spaul luse static bool g_verify = false; 362a0c66d0Spaul luse static const char *g_workload_type = NULL; 3737b68d72Spaul luse static enum accel_opcode g_workload_selection; 389f51cf32Spaul luse static struct worker_thread *g_workers = NULL; 399f51cf32Spaul luse static int g_num_workers = 0; 4027e85f52SBen Walker static char *g_cd_file_in_name = NULL; 419f51cf32Spaul luse static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 429260fa0cSpaul luse static struct spdk_app_opts g_opts = {}; 43cdefd3d3Spaul luse 4427e85f52SBen Walker struct ap_compress_seg { 4527e85f52SBen Walker void *uncompressed_data; 4627e85f52SBen Walker uint32_t uncompressed_len; 4727e85f52SBen Walker struct iovec *uncompressed_iovs; 4827e85f52SBen Walker uint32_t uncompressed_iovcnt; 4927e85f52SBen Walker 5027e85f52SBen Walker void *compressed_data; 5127e85f52SBen Walker uint32_t compressed_len; 52*6afbf3dbSpaul luse uint32_t compressed_len_padded; 5327e85f52SBen Walker struct iovec *compressed_iovs; 5427e85f52SBen Walker uint32_t compressed_iovcnt; 5527e85f52SBen Walker 5627e85f52SBen Walker STAILQ_ENTRY(ap_compress_seg) link; 5727e85f52SBen Walker }; 5827e85f52SBen Walker 5927e85f52SBen Walker static STAILQ_HEAD(, ap_compress_seg) g_compress_segs = STAILQ_HEAD_INITIALIZER(g_compress_segs); 6027e85f52SBen Walker 61cdefd3d3Spaul luse struct worker_thread; 62cdefd3d3Spaul luse static void accel_done(void *ref, int status); 63cdefd3d3Spaul luse 64445fe74eSpaul luse struct display_info { 65445fe74eSpaul luse int core; 66445fe74eSpaul luse int thread; 67445fe74eSpaul luse }; 68445fe74eSpaul luse 69cdefd3d3Spaul luse struct ap_task { 70cdefd3d3Spaul luse void *src; 7128886ac3Spaul luse struct iovec *src_iovs; 7228886ac3Spaul luse uint32_t src_iovcnt; 73850cd900Spaul luse struct iovec *dst_iovs; 74850cd900Spaul luse uint32_t dst_iovcnt; 75cdefd3d3Spaul luse void *dst; 76cdefd3d3Spaul luse void *dst2; 77221eb3f4Spaul luse uint32_t crc_dst; 7827e85f52SBen Walker uint32_t compressed_sz; 7927e85f52SBen Walker struct ap_compress_seg *cur_seg; 80cdefd3d3Spaul luse struct worker_thread *worker; 81cdefd3d3Spaul luse int expected_status; /* used for the compare operation */ 82cdefd3d3Spaul luse TAILQ_ENTRY(ap_task) link; 83cdefd3d3Spaul luse }; 849f51cf32Spaul luse 859f51cf32Spaul luse struct worker_thread { 869f51cf32Spaul luse struct spdk_io_channel *ch; 879f51cf32Spaul luse uint64_t xfer_completed; 889f51cf32Spaul luse uint64_t xfer_failed; 89b9218b7aSpaul luse uint64_t injected_miscompares; 909f51cf32Spaul luse uint64_t current_queue_depth; 91ac9a1a83Spaul luse TAILQ_HEAD(, ap_task) tasks_pool; 929f51cf32Spaul luse struct worker_thread *next; 939f51cf32Spaul luse unsigned core; 949f51cf32Spaul luse struct spdk_thread *thread; 959f51cf32Spaul luse bool is_draining; 969f51cf32Spaul luse struct spdk_poller *is_draining_poller; 979f51cf32Spaul luse struct spdk_poller *stop_poller; 98ac9a1a83Spaul luse void *task_base; 99445fe74eSpaul luse struct display_info display; 10013067997Spaul luse enum accel_opcode workload; 1019f51cf32Spaul luse }; 1029f51cf32Spaul luse 1039f51cf32Spaul luse static void 1049260fa0cSpaul luse dump_user_config(void) 1059f51cf32Spaul luse { 106712e8cb7SBen Walker const char *module_name = NULL; 1079260fa0cSpaul luse int rc; 1089260fa0cSpaul luse 109712e8cb7SBen Walker rc = spdk_accel_get_opc_module_name(g_workload_selection, &module_name); 1109260fa0cSpaul luse if (rc) { 111712e8cb7SBen Walker printf("error getting module name (%d)\n", rc); 1129260fa0cSpaul luse } 1139260fa0cSpaul luse 1149260fa0cSpaul luse printf("\nSPDK Configuration:\n"); 1159260fa0cSpaul luse printf("Core mask: %s\n\n", g_opts.reactor_mask); 1169f51cf32Spaul luse printf("Accel Perf Configuration:\n"); 1172a0c66d0Spaul luse printf("Workload Type: %s\n", g_workload_type); 11837b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 119b9218b7aSpaul luse printf("CRC-32C seed: %u\n", g_crc32c_seed); 12037b68d72Spaul luse } else if (g_workload_selection == ACCEL_OPC_FILL) { 12189495464Spaul luse printf("Fill pattern: 0x%x\n", g_fill_pattern); 12237b68d72Spaul luse } else if ((g_workload_selection == ACCEL_OPC_COMPARE) && g_fail_percent_goal > 0) { 12389495464Spaul luse printf("Failure inject: %u percent\n", g_fail_percent_goal); 124e69375bfSpaul luse } 12537b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 126221eb3f4Spaul luse printf("Vector size: %u bytes\n", g_xfer_size_bytes); 127850cd900Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_chained_count); 128221eb3f4Spaul luse } else { 1299f51cf32Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 130221eb3f4Spaul luse } 131850cd900Spaul luse printf("vector count %u\n", g_chained_count); 132712e8cb7SBen Walker printf("Module: %s\n", module_name); 13327e85f52SBen Walker if (g_workload_selection == ACCEL_OPC_COMPRESS || g_workload_selection == ACCEL_OPC_DECOMPRESS) { 13427e85f52SBen Walker printf("File Name: %s\n", g_cd_file_in_name); 13527e85f52SBen Walker } 1369f51cf32Spaul luse printf("Queue depth: %u\n", g_queue_depth); 137e1bf63afSJim Harris printf("Allocate depth: %u\n", g_allocate_depth); 138445fe74eSpaul luse printf("# threads/core: %u\n", g_threads_per_core); 1399f51cf32Spaul luse printf("Run time: %u seconds\n", g_time_in_sec); 1409f51cf32Spaul luse printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 1419f51cf32Spaul luse } 1429f51cf32Spaul luse 1439f51cf32Spaul luse static void 1449f51cf32Spaul luse usage(void) 1459f51cf32Spaul luse { 1469f51cf32Spaul luse printf("accel_perf options:\n"); 1479f51cf32Spaul luse printf("\t[-h help message]\n"); 148f17e6705Spaul luse printf("\t[-q queue depth per core]\n"); 149850cd900Spaul luse printf("\t[-C for supported workloads, use this value to configure the io vector size to test (default 1)\n"); 150445fe74eSpaul luse printf("\t[-T number of threads per core\n"); 15188754353SZiye Yang printf("\t[-n number of channels]\n"); 15227e85f52SBen Walker printf("\t[-o transfer size in bytes (default: 4KiB. For compress/decompress, 0 means the input file size)]\n"); 1539f51cf32Spaul luse printf("\t[-t time in seconds]\n"); 15427e85f52SBen Walker printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, compress, decompress, dualcast\n"); 15527e85f52SBen Walker printf("\t[-l for compress/decompress workloads, name of uncompressed input file\n"); 156e69375bfSpaul luse printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 157b9218b7aSpaul luse printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 15889495464Spaul luse printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 1592a0c66d0Spaul luse printf("\t[-y verify result if this switch is on]\n"); 160e1bf63afSJim Harris printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 161e1bf63afSJim Harris printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 1629f51cf32Spaul luse } 1639f51cf32Spaul luse 1649f51cf32Spaul luse static int 1659f51cf32Spaul luse parse_args(int argc, char *argv) 1669f51cf32Spaul luse { 167358b84b4SZiye Yang int argval = 0; 168c82d5789SJim Harris 1699f51cf32Spaul luse switch (argc) { 170e1bf63afSJim Harris case 'a': 171c82d5789SJim Harris case 'C': 172c82d5789SJim Harris case 'f': 173c82d5789SJim Harris case 'T': 174c82d5789SJim Harris case 'o': 175c82d5789SJim Harris case 'P': 176c82d5789SJim Harris case 'q': 177c82d5789SJim Harris case 's': 178c82d5789SJim Harris case 't': 179c82d5789SJim Harris argval = spdk_strtol(optarg, 10); 180c82d5789SJim Harris if (argval < 0) { 181c82d5789SJim Harris fprintf(stderr, "-%c option must be non-negative.\n", argc); 182c82d5789SJim Harris usage(); 183c82d5789SJim Harris return 1; 184c82d5789SJim Harris } 185c82d5789SJim Harris break; 186c82d5789SJim Harris default: 187c82d5789SJim Harris break; 188c82d5789SJim Harris }; 189c82d5789SJim Harris 190c82d5789SJim Harris switch (argc) { 191e1bf63afSJim Harris case 'a': 192e1bf63afSJim Harris g_allocate_depth = argval; 193e1bf63afSJim Harris break; 19488754353SZiye Yang case 'C': 195850cd900Spaul luse g_chained_count = argval; 19688754353SZiye Yang break; 19727e85f52SBen Walker case 'l': 19827e85f52SBen Walker g_cd_file_in_name = optarg; 19927e85f52SBen Walker break; 20089495464Spaul luse case 'f': 201c82d5789SJim Harris g_fill_pattern = (uint8_t)argval; 20289495464Spaul luse break; 203445fe74eSpaul luse case 'T': 204c82d5789SJim Harris g_threads_per_core = argval; 205445fe74eSpaul luse break; 2069f51cf32Spaul luse case 'o': 207c82d5789SJim Harris g_xfer_size_bytes = argval; 2089f51cf32Spaul luse break; 209b9218b7aSpaul luse case 'P': 210c82d5789SJim Harris g_fail_percent_goal = argval; 211b9218b7aSpaul luse break; 2129f51cf32Spaul luse case 'q': 213c82d5789SJim Harris g_queue_depth = argval; 2149f51cf32Spaul luse break; 215e69375bfSpaul luse case 's': 216c82d5789SJim Harris g_crc32c_seed = argval; 217e69375bfSpaul luse break; 2189f51cf32Spaul luse case 't': 219c82d5789SJim Harris g_time_in_sec = argval; 2209f51cf32Spaul luse break; 2219f51cf32Spaul luse case 'y': 2229f51cf32Spaul luse g_verify = true; 2239f51cf32Spaul luse break; 2242a0c66d0Spaul luse case 'w': 2252a0c66d0Spaul luse g_workload_type = optarg; 226514be889Spaul luse if (!strcmp(g_workload_type, "copy")) { 22737b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY; 228514be889Spaul luse } else if (!strcmp(g_workload_type, "fill")) { 22937b68d72Spaul luse g_workload_selection = ACCEL_OPC_FILL; 230e69375bfSpaul luse } else if (!strcmp(g_workload_type, "crc32c")) { 23137b68d72Spaul luse g_workload_selection = ACCEL_OPC_CRC32C; 232221eb3f4Spaul luse } else if (!strcmp(g_workload_type, "copy_crc32c")) { 23337b68d72Spaul luse g_workload_selection = ACCEL_OPC_COPY_CRC32C; 234b9218b7aSpaul luse } else if (!strcmp(g_workload_type, "compare")) { 23537b68d72Spaul luse g_workload_selection = ACCEL_OPC_COMPARE; 2360ef079c6Spaul luse } else if (!strcmp(g_workload_type, "dualcast")) { 23737b68d72Spaul luse g_workload_selection = ACCEL_OPC_DUALCAST; 23827e85f52SBen Walker } else if (!strcmp(g_workload_type, "compress")) { 23927e85f52SBen Walker g_workload_selection = ACCEL_OPC_COMPRESS; 24027e85f52SBen Walker } else if (!strcmp(g_workload_type, "decompress")) { 24127e85f52SBen Walker g_workload_selection = ACCEL_OPC_DECOMPRESS; 242b21221e1Spaul luse } else { 243b21221e1Spaul luse usage(); 244b21221e1Spaul luse return 1; 245514be889Spaul luse } 2462a0c66d0Spaul luse break; 2479f51cf32Spaul luse default: 2489f51cf32Spaul luse usage(); 2499f51cf32Spaul luse return 1; 2509f51cf32Spaul luse } 25188754353SZiye Yang 2529f51cf32Spaul luse return 0; 2539f51cf32Spaul luse } 2549f51cf32Spaul luse 255eea826a2Spaul luse static int dump_result(void); 2569f51cf32Spaul luse static void 2579f51cf32Spaul luse unregister_worker(void *arg1) 2589f51cf32Spaul luse { 2599f51cf32Spaul luse struct worker_thread *worker = arg1; 2609f51cf32Spaul luse 261ac9a1a83Spaul luse free(worker->task_base); 2629f51cf32Spaul luse spdk_put_io_channel(worker->ch); 2630e7821e9SJim Harris spdk_thread_exit(spdk_get_thread()); 2649f51cf32Spaul luse pthread_mutex_lock(&g_workers_lock); 2659f51cf32Spaul luse assert(g_num_workers >= 1); 2669f51cf32Spaul luse if (--g_num_workers == 0) { 2679f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2689b189667Spaul luse g_rc = dump_result(); 2699f51cf32Spaul luse spdk_app_stop(0); 270f042d6baSGangCao } else { 2719f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2729f51cf32Spaul luse } 273f042d6baSGangCao } 2749f51cf32Spaul luse 27527e85f52SBen Walker static void 27627e85f52SBen Walker accel_perf_construct_iovs(void *buf, uint64_t sz, struct iovec *iovs, uint32_t iovcnt) 27727e85f52SBen Walker { 27827e85f52SBen Walker uint64_t ele_size; 27927e85f52SBen Walker uint8_t *data; 28027e85f52SBen Walker uint32_t i; 28127e85f52SBen Walker 28227e85f52SBen Walker ele_size = spdk_divide_round_up(sz, iovcnt); 28327e85f52SBen Walker 28427e85f52SBen Walker data = buf; 28527e85f52SBen Walker for (i = 0; i < iovcnt; i++) { 28627e85f52SBen Walker ele_size = spdk_min(ele_size, sz); 28727e85f52SBen Walker assert(ele_size > 0); 28827e85f52SBen Walker 28927e85f52SBen Walker iovs[i].iov_base = data; 29027e85f52SBen Walker iovs[i].iov_len = ele_size; 29127e85f52SBen Walker 29227e85f52SBen Walker data += ele_size; 29327e85f52SBen Walker sz -= ele_size; 29427e85f52SBen Walker } 29527e85f52SBen Walker assert(sz == 0); 29627e85f52SBen Walker } 29727e85f52SBen Walker 2988da995c4Spaul luse static int 2998da995c4Spaul luse _get_task_data_bufs(struct ap_task *task) 3008da995c4Spaul luse { 3018da995c4Spaul luse uint32_t align = 0; 30288754353SZiye Yang uint32_t i = 0; 303221eb3f4Spaul luse int dst_buff_len = g_xfer_size_bytes; 3048da995c4Spaul luse 3058da995c4Spaul luse /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 306712e8cb7SBen Walker * we do this for all modules to keep it simple. 3078da995c4Spaul luse */ 30837b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 3098da995c4Spaul luse align = ALIGN_4K; 3108da995c4Spaul luse } 3118da995c4Spaul luse 31227e85f52SBen Walker if (g_workload_selection == ACCEL_OPC_COMPRESS || 31327e85f52SBen Walker g_workload_selection == ACCEL_OPC_DECOMPRESS) { 31427e85f52SBen Walker task->cur_seg = STAILQ_FIRST(&g_compress_segs); 315*6afbf3dbSpaul luse 316*6afbf3dbSpaul luse if (g_workload_selection == ACCEL_OPC_COMPRESS) { 317*6afbf3dbSpaul luse dst_buff_len = task->cur_seg->compressed_len_padded; 318*6afbf3dbSpaul luse } 319*6afbf3dbSpaul luse 320*6afbf3dbSpaul luse task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 321*6afbf3dbSpaul luse if (task->dst == NULL) { 322*6afbf3dbSpaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 323*6afbf3dbSpaul luse return -ENOMEM; 324*6afbf3dbSpaul luse } 325*6afbf3dbSpaul luse 326*6afbf3dbSpaul luse task->dst_iovs = calloc(g_chained_count, sizeof(struct iovec)); 327*6afbf3dbSpaul luse if (!task->dst_iovs) { 328*6afbf3dbSpaul luse fprintf(stderr, "cannot allocate task->dst_iovs for task=%p\n", task); 329*6afbf3dbSpaul luse return -ENOMEM; 330*6afbf3dbSpaul luse } 331*6afbf3dbSpaul luse task->dst_iovcnt = g_chained_count; 332*6afbf3dbSpaul luse accel_perf_construct_iovs(task->dst, dst_buff_len, task->dst_iovs, task->dst_iovcnt); 333*6afbf3dbSpaul luse 334*6afbf3dbSpaul luse return 0; 335*6afbf3dbSpaul luse } 336*6afbf3dbSpaul luse 337*6afbf3dbSpaul luse if (g_workload_selection == ACCEL_OPC_CRC32C || 33827e85f52SBen Walker g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 339850cd900Spaul luse assert(g_chained_count > 0); 340850cd900Spaul luse task->src_iovcnt = g_chained_count; 34128886ac3Spaul luse task->src_iovs = calloc(task->src_iovcnt, sizeof(struct iovec)); 34228886ac3Spaul luse if (!task->src_iovs) { 34328886ac3Spaul luse fprintf(stderr, "cannot allocated task->src_iovs fot task=%p\n", task); 34488754353SZiye Yang return -ENOMEM; 34588754353SZiye Yang } 34688754353SZiye Yang 34737b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 348850cd900Spaul luse dst_buff_len = g_xfer_size_bytes * g_chained_count; 349221eb3f4Spaul luse } 350221eb3f4Spaul luse 35128886ac3Spaul luse for (i = 0; i < task->src_iovcnt; i++) { 35228886ac3Spaul luse task->src_iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 35328886ac3Spaul luse if (task->src_iovs[i].iov_base == NULL) { 35488754353SZiye Yang return -ENOMEM; 35588754353SZiye Yang } 35628886ac3Spaul luse memset(task->src_iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 35728886ac3Spaul luse task->src_iovs[i].iov_len = g_xfer_size_bytes; 35888754353SZiye Yang } 35988754353SZiye Yang 36088754353SZiye Yang } else { 3618da995c4Spaul luse task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 3628da995c4Spaul luse if (task->src == NULL) { 3638da995c4Spaul luse fprintf(stderr, "Unable to alloc src buffer\n"); 3648da995c4Spaul luse return -ENOMEM; 3658da995c4Spaul luse } 36688754353SZiye Yang 36788754353SZiye Yang /* For fill, set the entire src buffer so we can check if verify is enabled. */ 36837b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_FILL) { 36988754353SZiye Yang memset(task->src, g_fill_pattern, g_xfer_size_bytes); 37088754353SZiye Yang } else { 3718da995c4Spaul luse memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 37288754353SZiye Yang } 37388754353SZiye Yang } 3748da995c4Spaul luse 37537b68d72Spaul luse if (g_workload_selection != ACCEL_OPC_CRC32C) { 376221eb3f4Spaul luse task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 3778da995c4Spaul luse if (task->dst == NULL) { 3788da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3798da995c4Spaul luse return -ENOMEM; 3808da995c4Spaul luse } 3818da995c4Spaul luse 3828da995c4Spaul luse /* For compare we want the buffers to match, otherwise not. */ 38337b68d72Spaul luse if (g_workload_selection == ACCEL_OPC_COMPARE) { 384221eb3f4Spaul luse memset(task->dst, DATA_PATTERN, dst_buff_len); 3858da995c4Spaul luse } else { 386221eb3f4Spaul luse memset(task->dst, ~DATA_PATTERN, dst_buff_len); 387221eb3f4Spaul luse } 3888da995c4Spaul luse } 3898da995c4Spaul luse 39084162738Spaul luse /* For dualcast 2 buffers are needed for the operation. */ 39184162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 3928da995c4Spaul luse task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3938da995c4Spaul luse if (task->dst2 == NULL) { 3948da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3958da995c4Spaul luse return -ENOMEM; 3968da995c4Spaul luse } 3978da995c4Spaul luse memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 3988da995c4Spaul luse } 3998da995c4Spaul luse 4008da995c4Spaul luse return 0; 4018da995c4Spaul luse } 4028da995c4Spaul luse 403ac9a1a83Spaul luse inline static struct ap_task * 404ac9a1a83Spaul luse _get_task(struct worker_thread *worker) 405ac9a1a83Spaul luse { 406ac9a1a83Spaul luse struct ap_task *task; 407ac9a1a83Spaul luse 408ac9a1a83Spaul luse if (!TAILQ_EMPTY(&worker->tasks_pool)) { 409ac9a1a83Spaul luse task = TAILQ_FIRST(&worker->tasks_pool); 410ac9a1a83Spaul luse TAILQ_REMOVE(&worker->tasks_pool, task, link); 411ac9a1a83Spaul luse } else { 412ac9a1a83Spaul luse fprintf(stderr, "Unable to get ap_task\n"); 413ac9a1a83Spaul luse return NULL; 414ac9a1a83Spaul luse } 415ac9a1a83Spaul luse 416ac9a1a83Spaul luse return task; 417ac9a1a83Spaul luse } 418ac9a1a83Spaul luse 419f17e6705Spaul luse /* Submit one operation using the same ap task that just completed. */ 4209f51cf32Spaul luse static void 421ac9a1a83Spaul luse _submit_single(struct worker_thread *worker, struct ap_task *task) 4229f51cf32Spaul luse { 423b9218b7aSpaul luse int random_num; 42440ec8e97Spaul luse int rc = 0; 42512c40f05Spaul luse int flags = 0; 4269f51cf32Spaul luse 4279f51cf32Spaul luse assert(worker); 4289f51cf32Spaul luse 42913067997Spaul luse switch (worker->workload) { 43037b68d72Spaul luse case ACCEL_OPC_COPY: 431e8463f87Spaul luse rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, 43212c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 433e69375bfSpaul luse break; 43437b68d72Spaul luse case ACCEL_OPC_FILL: 4352a0c66d0Spaul luse /* For fill use the first byte of the task->dst buffer */ 436ee7e31f9Spaul luse rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, 43712c40f05Spaul luse g_xfer_size_bytes, flags, accel_done, task); 438e69375bfSpaul luse break; 43937b68d72Spaul luse case ACCEL_OPC_CRC32C: 440a738acd5Spaul luse rc = spdk_accel_submit_crc32cv(worker->ch, &task->crc_dst, 44128886ac3Spaul luse task->src_iovs, task->src_iovcnt, g_crc32c_seed, 44290c56d96SZiye Yang accel_done, task); 443e69375bfSpaul luse break; 44437b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 44528886ac3Spaul luse rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->src_iovs, task->src_iovcnt, 44612c40f05Spaul luse &task->crc_dst, g_crc32c_seed, flags, accel_done, task); 447221eb3f4Spaul luse break; 44837b68d72Spaul luse case ACCEL_OPC_COMPARE: 449b9218b7aSpaul luse random_num = rand() % 100; 450b9218b7aSpaul luse if (random_num < g_fail_percent_goal) { 451b9218b7aSpaul luse task->expected_status = -EILSEQ; 452b9218b7aSpaul luse *(uint8_t *)task->dst = ~DATA_PATTERN; 453b9218b7aSpaul luse } else { 454b9218b7aSpaul luse task->expected_status = 0; 455b9218b7aSpaul luse *(uint8_t *)task->dst = DATA_PATTERN; 456b9218b7aSpaul luse } 457ee7e31f9Spaul luse rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, 458e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 459b9218b7aSpaul luse break; 46037b68d72Spaul luse case ACCEL_OPC_DUALCAST: 461ee7e31f9Spaul luse rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, 46212c40f05Spaul luse task->src, g_xfer_size_bytes, flags, accel_done, task); 4630ef079c6Spaul luse break; 46427e85f52SBen Walker case ACCEL_OPC_COMPRESS: 46527e85f52SBen Walker task->src_iovs = task->cur_seg->uncompressed_iovs; 46627e85f52SBen Walker task->src_iovcnt = task->cur_seg->uncompressed_iovcnt; 467*6afbf3dbSpaul luse rc = spdk_accel_submit_compress(worker->ch, task->dst, task->cur_seg->compressed_len_padded, 468*6afbf3dbSpaul luse task->src_iovs, 46927e85f52SBen Walker task->src_iovcnt, &task->compressed_sz, flags, accel_done, task); 47027e85f52SBen Walker break; 47127e85f52SBen Walker case ACCEL_OPC_DECOMPRESS: 47227e85f52SBen Walker task->src_iovs = task->cur_seg->compressed_iovs; 47327e85f52SBen Walker task->src_iovcnt = task->cur_seg->compressed_iovcnt; 47427e85f52SBen Walker rc = spdk_accel_submit_decompress(worker->ch, task->dst_iovs, task->dst_iovcnt, task->src_iovs, 47591f3063bSpaul luse task->src_iovcnt, NULL, flags, accel_done, task); 47627e85f52SBen Walker break; 477e69375bfSpaul luse default: 4782a0c66d0Spaul luse assert(false); 479e69375bfSpaul luse break; 480e69375bfSpaul luse 4812a0c66d0Spaul luse } 48240ec8e97Spaul luse 4836799d46aSpaul luse worker->current_queue_depth++; 48440ec8e97Spaul luse if (rc) { 485e8463f87Spaul luse accel_done(task, rc); 48640ec8e97Spaul luse } 4879f51cf32Spaul luse } 4889f51cf32Spaul luse 4899f51cf32Spaul luse static void 490e150f6b8SZiye Yang _free_task_buffers(struct ap_task *task) 491ac9a1a83Spaul luse { 49288754353SZiye Yang uint32_t i; 49388754353SZiye Yang 494*6afbf3dbSpaul luse if (g_workload_selection == ACCEL_OPC_DECOMPRESS || g_workload_selection == ACCEL_OPC_COMPRESS) { 49527e85f52SBen Walker free(task->dst_iovs); 49627e85f52SBen Walker } else if (g_workload_selection == ACCEL_OPC_CRC32C || 49727e85f52SBen Walker g_workload_selection == ACCEL_OPC_COPY_CRC32C) { 49828886ac3Spaul luse if (task->src_iovs) { 49928886ac3Spaul luse for (i = 0; i < task->src_iovcnt; i++) { 50028886ac3Spaul luse if (task->src_iovs[i].iov_base) { 50128886ac3Spaul luse spdk_dma_free(task->src_iovs[i].iov_base); 50288754353SZiye Yang } 50388754353SZiye Yang } 50428886ac3Spaul luse free(task->src_iovs); 50588754353SZiye Yang } 50688754353SZiye Yang } else { 507ac9a1a83Spaul luse spdk_dma_free(task->src); 50888754353SZiye Yang } 50988754353SZiye Yang 510ac9a1a83Spaul luse spdk_dma_free(task->dst); 51184162738Spaul luse if (g_workload_selection == ACCEL_OPC_DUALCAST) { 512ac9a1a83Spaul luse spdk_dma_free(task->dst2); 513ac9a1a83Spaul luse } 514ac9a1a83Spaul luse } 515ac9a1a83Spaul luse 516221eb3f4Spaul luse static int 51728886ac3Spaul luse _vector_memcmp(void *_dst, struct iovec *src_src_iovs, uint32_t iovcnt) 518221eb3f4Spaul luse { 519221eb3f4Spaul luse uint32_t i; 520221eb3f4Spaul luse uint32_t ttl_len = 0; 521221eb3f4Spaul luse uint8_t *dst = (uint8_t *)_dst; 522221eb3f4Spaul luse 523221eb3f4Spaul luse for (i = 0; i < iovcnt; i++) { 52428886ac3Spaul luse if (memcmp(dst, src_src_iovs[i].iov_base, src_src_iovs[i].iov_len)) { 525221eb3f4Spaul luse return -1; 526221eb3f4Spaul luse } 52728886ac3Spaul luse dst += src_src_iovs[i].iov_len; 52828886ac3Spaul luse ttl_len += src_src_iovs[i].iov_len; 529221eb3f4Spaul luse } 530221eb3f4Spaul luse 531221eb3f4Spaul luse if (ttl_len != iovcnt * g_xfer_size_bytes) { 532221eb3f4Spaul luse return -1; 533221eb3f4Spaul luse } 534221eb3f4Spaul luse 535221eb3f4Spaul luse return 0; 536221eb3f4Spaul luse } 537221eb3f4Spaul luse 53813067997Spaul luse static int _worker_stop(void *arg); 53913067997Spaul luse 540fab40895Spaul luse static void 541df42f358Spaul luse accel_done(void *arg1, int status) 5429f51cf32Spaul luse { 5439f51cf32Spaul luse struct ap_task *task = arg1; 5449f51cf32Spaul luse struct worker_thread *worker = task->worker; 545e69375bfSpaul luse uint32_t sw_crc32c; 5469f51cf32Spaul luse 5479f51cf32Spaul luse assert(worker); 5489f51cf32Spaul luse assert(worker->current_queue_depth > 0); 5499f51cf32Spaul luse 550df42f358Spaul luse if (g_verify && status == 0) { 55113067997Spaul luse switch (worker->workload) { 55237b68d72Spaul luse case ACCEL_OPC_COPY_CRC32C: 55328886ac3Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->src_iovs, task->src_iovcnt, ~g_crc32c_seed); 554221eb3f4Spaul luse if (task->crc_dst != sw_crc32c) { 555221eb3f4Spaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 556221eb3f4Spaul luse worker->xfer_failed++; 557221eb3f4Spaul luse } 55828886ac3Spaul luse if (_vector_memcmp(task->dst, task->src_iovs, task->src_iovcnt)) { 559221eb3f4Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 560221eb3f4Spaul luse worker->xfer_failed++; 561221eb3f4Spaul luse } 562221eb3f4Spaul luse break; 56337b68d72Spaul luse case ACCEL_OPC_CRC32C: 56428886ac3Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->src_iovs, task->src_iovcnt, ~g_crc32c_seed); 565a738acd5Spaul luse if (task->crc_dst != sw_crc32c) { 566e69375bfSpaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 567e69375bfSpaul luse worker->xfer_failed++; 568e69375bfSpaul luse } 569b9218b7aSpaul luse break; 57037b68d72Spaul luse case ACCEL_OPC_COPY: 571b9218b7aSpaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 5729f51cf32Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 5739f51cf32Spaul luse worker->xfer_failed++; 574b9218b7aSpaul luse } 575b9218b7aSpaul luse break; 57637b68d72Spaul luse case ACCEL_OPC_DUALCAST: 5770ef079c6Spaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 5780ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, first destination\n"); 5790ef079c6Spaul luse worker->xfer_failed++; 5800ef079c6Spaul luse } 5810ef079c6Spaul luse if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 5820ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, second destination\n"); 5830ef079c6Spaul luse worker->xfer_failed++; 5840ef079c6Spaul luse } 5850ef079c6Spaul luse break; 58637b68d72Spaul luse case ACCEL_OPC_FILL: 587d207237fSpaul luse if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 588d207237fSpaul luse SPDK_NOTICELOG("Data miscompare\n"); 589d207237fSpaul luse worker->xfer_failed++; 590d207237fSpaul luse } 591d207237fSpaul luse break; 59237b68d72Spaul luse case ACCEL_OPC_COMPARE: 5938cee297cSpaul luse break; 59427e85f52SBen Walker case ACCEL_OPC_COMPRESS: 59527e85f52SBen Walker break; 59627e85f52SBen Walker case ACCEL_OPC_DECOMPRESS: 59727e85f52SBen Walker if (memcmp(task->dst, task->cur_seg->uncompressed_data, task->cur_seg->uncompressed_len)) { 59827e85f52SBen Walker SPDK_NOTICELOG("Data miscompare on decompression\n"); 59927e85f52SBen Walker worker->xfer_failed++; 60027e85f52SBen Walker } 60127e85f52SBen Walker break; 602b9218b7aSpaul luse default: 603b9218b7aSpaul luse assert(false); 604b9218b7aSpaul luse break; 6059f51cf32Spaul luse } 6069f51cf32Spaul luse } 607b9218b7aSpaul luse 60827e85f52SBen Walker if (worker->workload == ACCEL_OPC_COMPRESS || g_workload_selection == ACCEL_OPC_DECOMPRESS) { 60927e85f52SBen Walker /* Advance the task to the next segment */ 61027e85f52SBen Walker task->cur_seg = STAILQ_NEXT(task->cur_seg, link); 61127e85f52SBen Walker if (task->cur_seg == NULL) { 61227e85f52SBen Walker task->cur_seg = STAILQ_FIRST(&g_compress_segs); 61327e85f52SBen Walker } 61427e85f52SBen Walker } 61527e85f52SBen Walker 616b9218b7aSpaul luse if (task->expected_status == -EILSEQ) { 617df42f358Spaul luse assert(status != 0); 618b9218b7aSpaul luse worker->injected_miscompares++; 61913067997Spaul luse status = 0; 620df42f358Spaul luse } else if (status) { 621712e8cb7SBen Walker /* Expected to pass but the accel module reported an error (ex: COMPARE operation). */ 622b9218b7aSpaul luse worker->xfer_failed++; 623b9218b7aSpaul luse } 624b9218b7aSpaul luse 6259f51cf32Spaul luse worker->xfer_completed++; 6269f51cf32Spaul luse worker->current_queue_depth--; 6279f51cf32Spaul luse 62813067997Spaul luse if (!worker->is_draining && status == 0) { 629451462f6SJim Harris TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 630451462f6SJim Harris task = _get_task(worker); 6319f51cf32Spaul luse _submit_single(worker, task); 632f17e6705Spaul luse } else { 633b34883e0SZiye Yang TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 634f17e6705Spaul luse } 6359f51cf32Spaul luse } 6369f51cf32Spaul luse 6379f51cf32Spaul luse static int 6389f51cf32Spaul luse dump_result(void) 6399f51cf32Spaul luse { 6409f51cf32Spaul luse uint64_t total_completed = 0; 6419f51cf32Spaul luse uint64_t total_failed = 0; 642b9218b7aSpaul luse uint64_t total_miscompared = 0; 6439f51cf32Spaul luse uint64_t total_xfer_per_sec, total_bw_in_MiBps; 6449f51cf32Spaul luse struct worker_thread *worker = g_workers; 6459f51cf32Spaul luse 646445fe74eSpaul luse printf("\nCore,Thread Transfers Bandwidth Failed Miscompares\n"); 647445fe74eSpaul luse printf("------------------------------------------------------------------------\n"); 6489f51cf32Spaul luse while (worker != NULL) { 6499f51cf32Spaul luse 6509f51cf32Spaul luse uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 6519f51cf32Spaul luse uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 6529f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 6539f51cf32Spaul luse 6549f51cf32Spaul luse total_completed += worker->xfer_completed; 6559f51cf32Spaul luse total_failed += worker->xfer_failed; 656b9218b7aSpaul luse total_miscompared += worker->injected_miscompares; 6579f51cf32Spaul luse 6589f51cf32Spaul luse if (xfer_per_sec) { 659445fe74eSpaul luse printf("%u,%u%17" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 660445fe74eSpaul luse worker->display.core, worker->display.thread, xfer_per_sec, 661b9218b7aSpaul luse bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 6629f51cf32Spaul luse } 6639f51cf32Spaul luse 6649f51cf32Spaul luse worker = worker->next; 6659f51cf32Spaul luse } 6669f51cf32Spaul luse 6679f51cf32Spaul luse total_xfer_per_sec = total_completed / g_time_in_sec; 6689f51cf32Spaul luse total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 6699f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 6709f51cf32Spaul luse 671445fe74eSpaul luse printf("=========================================================================\n"); 672445fe74eSpaul luse printf("Total:%15" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 673b9218b7aSpaul luse total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 6749f51cf32Spaul luse 6759f51cf32Spaul luse return total_failed ? 1 : 0; 6769f51cf32Spaul luse } 6779f51cf32Spaul luse 678e150f6b8SZiye Yang static inline void 679e150f6b8SZiye Yang _free_task_buffers_in_pool(struct worker_thread *worker) 680e150f6b8SZiye Yang { 681e150f6b8SZiye Yang struct ap_task *task; 682e150f6b8SZiye Yang 683e150f6b8SZiye Yang assert(worker); 684e150f6b8SZiye Yang while ((task = TAILQ_FIRST(&worker->tasks_pool))) { 685e150f6b8SZiye Yang TAILQ_REMOVE(&worker->tasks_pool, task, link); 686e150f6b8SZiye Yang _free_task_buffers(task); 687e150f6b8SZiye Yang } 688e150f6b8SZiye Yang } 689e150f6b8SZiye Yang 6909f51cf32Spaul luse static int 6919f51cf32Spaul luse _check_draining(void *arg) 6929f51cf32Spaul luse { 6939f51cf32Spaul luse struct worker_thread *worker = arg; 6949f51cf32Spaul luse 6959f51cf32Spaul luse assert(worker); 6969f51cf32Spaul luse 6979f51cf32Spaul luse if (worker->current_queue_depth == 0) { 698e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 6999f51cf32Spaul luse spdk_poller_unregister(&worker->is_draining_poller); 7009f51cf32Spaul luse unregister_worker(worker); 7019f51cf32Spaul luse } 7029f51cf32Spaul luse 703fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 7049f51cf32Spaul luse } 7059f51cf32Spaul luse 7069f51cf32Spaul luse static int 7079f51cf32Spaul luse _worker_stop(void *arg) 7089f51cf32Spaul luse { 7099f51cf32Spaul luse struct worker_thread *worker = arg; 7109f51cf32Spaul luse 7119f51cf32Spaul luse assert(worker); 7129f51cf32Spaul luse 7139f51cf32Spaul luse spdk_poller_unregister(&worker->stop_poller); 7149f51cf32Spaul luse 7159f51cf32Spaul luse /* now let the worker drain and check it's outstanding IO with a poller */ 7169f51cf32Spaul luse worker->is_draining = true; 717ab0bc5c2SShuhei Matsumoto worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 7189f51cf32Spaul luse 719fa9e703fSpaul Luse return SPDK_POLLER_BUSY; 7209f51cf32Spaul luse } 7219f51cf32Spaul luse 7229f51cf32Spaul luse static void 723a34fc12bSpaul luse _init_thread(void *arg1) 724a34fc12bSpaul luse { 725a34fc12bSpaul luse struct worker_thread *worker; 726a34fc12bSpaul luse struct ap_task *task; 727998b5d66Spaul luse int i, num_tasks = g_allocate_depth; 728445fe74eSpaul luse struct display_info *display = arg1; 729a34fc12bSpaul luse 730a34fc12bSpaul luse worker = calloc(1, sizeof(*worker)); 731a34fc12bSpaul luse if (worker == NULL) { 732a34fc12bSpaul luse fprintf(stderr, "Unable to allocate worker\n"); 733445fe74eSpaul luse free(display); 734a34fc12bSpaul luse return; 735a34fc12bSpaul luse } 736a34fc12bSpaul luse 73713067997Spaul luse worker->workload = g_workload_selection; 738445fe74eSpaul luse worker->display.core = display->core; 739445fe74eSpaul luse worker->display.thread = display->thread; 740445fe74eSpaul luse free(display); 7419f51cf32Spaul luse worker->core = spdk_env_get_current_core(); 7429f51cf32Spaul luse worker->thread = spdk_get_thread(); 743eea826a2Spaul luse pthread_mutex_lock(&g_workers_lock); 744eea826a2Spaul luse g_num_workers++; 7459f51cf32Spaul luse worker->next = g_workers; 746eea826a2Spaul luse g_workers = worker; 747eea826a2Spaul luse pthread_mutex_unlock(&g_workers_lock); 74834c48f1bSBen Walker worker->ch = spdk_accel_get_io_channel(); 7492dd64cf9Spaul luse if (worker->ch == NULL) { 7502dd64cf9Spaul luse fprintf(stderr, "Unable to get an accel channel\n"); 7512dd64cf9Spaul luse goto error; 7522dd64cf9Spaul luse } 753b9218b7aSpaul luse 754f17e6705Spaul luse TAILQ_INIT(&worker->tasks_pool); 755f17e6705Spaul luse 756ac9a1a83Spaul luse worker->task_base = calloc(num_tasks, sizeof(struct ap_task)); 757ac9a1a83Spaul luse if (worker->task_base == NULL) { 758ac9a1a83Spaul luse fprintf(stderr, "Could not allocate task base.\n"); 759ac9a1a83Spaul luse goto error; 7600cecfcb1Spaul luse } 761ac9a1a83Spaul luse 762ac9a1a83Spaul luse task = worker->task_base; 763ac9a1a83Spaul luse for (i = 0; i < num_tasks; i++) { 764ac9a1a83Spaul luse TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 7654cd7ca9bSJim Harris task->worker = worker; 766ac9a1a83Spaul luse if (_get_task_data_bufs(task)) { 767ac9a1a83Spaul luse fprintf(stderr, "Unable to get data bufs\n"); 768ac9a1a83Spaul luse goto error; 769ac9a1a83Spaul luse } 770ac9a1a83Spaul luse task++; 7719f51cf32Spaul luse } 7729f51cf32Spaul luse 7739f51cf32Spaul luse /* Register a poller that will stop the worker at time elapsed */ 774ab0bc5c2SShuhei Matsumoto worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 7759f51cf32Spaul luse g_time_in_sec * 1000000ULL); 7769f51cf32Spaul luse 777998b5d66Spaul luse /* Load up queue depth worth of operations. */ 778998b5d66Spaul luse for (i = 0; i < g_queue_depth; i++) { 779ac9a1a83Spaul luse task = _get_task(worker); 780ac9a1a83Spaul luse if (task == NULL) { 781a34fc12bSpaul luse goto error; 782b9218b7aSpaul luse } 783b9218b7aSpaul luse 7849f51cf32Spaul luse _submit_single(worker, task); 7859f51cf32Spaul luse } 786a34fc12bSpaul luse return; 787a34fc12bSpaul luse error: 788e150f6b8SZiye Yang 789e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 790ac9a1a83Spaul luse free(worker->task_base); 791a34fc12bSpaul luse spdk_app_stop(-1); 7929f51cf32Spaul luse } 7939f51cf32Spaul luse 7949f51cf32Spaul luse static void 7959f51cf32Spaul luse accel_perf_start(void *arg1) 7969f51cf32Spaul luse { 797eea826a2Spaul luse struct spdk_cpuset tmp_cpumask = {}; 798eea826a2Spaul luse char thread_name[32]; 799eea826a2Spaul luse uint32_t i; 800445fe74eSpaul luse int j; 801eea826a2Spaul luse struct spdk_thread *thread; 802445fe74eSpaul luse struct display_info *display; 803514be889Spaul luse 8049f51cf32Spaul luse g_tsc_rate = spdk_get_ticks_hz(); 8059f51cf32Spaul luse g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 8069f51cf32Spaul luse 8079260fa0cSpaul luse dump_user_config(); 8089260fa0cSpaul luse 8099f51cf32Spaul luse printf("Running for %d seconds...\n", g_time_in_sec); 8109f51cf32Spaul luse fflush(stdout); 8119f51cf32Spaul luse 812eea826a2Spaul luse /* Create worker threads for each core that was specified. */ 813eea826a2Spaul luse SPDK_ENV_FOREACH_CORE(i) { 814445fe74eSpaul luse for (j = 0; j < g_threads_per_core; j++) { 815445fe74eSpaul luse snprintf(thread_name, sizeof(thread_name), "ap_worker_%u_%u", i, j); 816eea826a2Spaul luse spdk_cpuset_zero(&tmp_cpumask); 817eea826a2Spaul luse spdk_cpuset_set_cpu(&tmp_cpumask, i, true); 818eea826a2Spaul luse thread = spdk_thread_create(thread_name, &tmp_cpumask); 819445fe74eSpaul luse display = calloc(1, sizeof(*display)); 820445fe74eSpaul luse if (display == NULL) { 821445fe74eSpaul luse fprintf(stderr, "Unable to allocate memory\n"); 822445fe74eSpaul luse spdk_app_stop(-1); 823445fe74eSpaul luse return; 824445fe74eSpaul luse } 825445fe74eSpaul luse display->core = i; 826445fe74eSpaul luse display->thread = j; 827445fe74eSpaul luse spdk_thread_send_msg(thread, _init_thread, display); 828445fe74eSpaul luse } 829eea826a2Spaul luse } 8309f51cf32Spaul luse } 8319f51cf32Spaul luse 83227e85f52SBen Walker static void 83327e85f52SBen Walker accel_perf_free_compress_segs(void) 83427e85f52SBen Walker { 83527e85f52SBen Walker struct ap_compress_seg *seg, *tmp; 83627e85f52SBen Walker 83727e85f52SBen Walker STAILQ_FOREACH_SAFE(seg, &g_compress_segs, link, tmp) { 83827e85f52SBen Walker free(seg->uncompressed_iovs); 83927e85f52SBen Walker free(seg->compressed_iovs); 84027e85f52SBen Walker spdk_dma_free(seg->compressed_data); 84127e85f52SBen Walker spdk_dma_free(seg->uncompressed_data); 84227e85f52SBen Walker STAILQ_REMOVE_HEAD(&g_compress_segs, link); 84327e85f52SBen Walker free(seg); 84427e85f52SBen Walker } 84527e85f52SBen Walker } 84627e85f52SBen Walker 84727e85f52SBen Walker struct accel_perf_prep_ctx { 84827e85f52SBen Walker FILE *file; 84927e85f52SBen Walker long remaining; 85027e85f52SBen Walker struct spdk_io_channel *ch; 85127e85f52SBen Walker struct ap_compress_seg *cur_seg; 85227e85f52SBen Walker }; 85327e85f52SBen Walker 85427e85f52SBen Walker static void accel_perf_prep_process_seg(struct accel_perf_prep_ctx *ctx); 85527e85f52SBen Walker 85627e85f52SBen Walker static void 85727e85f52SBen Walker accel_perf_prep_process_seg_cpl(void *ref, int status) 85827e85f52SBen Walker { 85927e85f52SBen Walker struct accel_perf_prep_ctx *ctx = ref; 86027e85f52SBen Walker struct ap_compress_seg *seg; 86127e85f52SBen Walker 86227e85f52SBen Walker if (status != 0) { 86327e85f52SBen Walker fprintf(stderr, "error (%d) on initial compress completion\n", status); 86427e85f52SBen Walker spdk_dma_free(ctx->cur_seg->compressed_data); 86527e85f52SBen Walker spdk_dma_free(ctx->cur_seg->uncompressed_data); 86627e85f52SBen Walker free(ctx->cur_seg); 86727e85f52SBen Walker spdk_put_io_channel(ctx->ch); 86827e85f52SBen Walker fclose(ctx->file); 86927e85f52SBen Walker free(ctx); 87027e85f52SBen Walker spdk_app_stop(-status); 87127e85f52SBen Walker return; 87227e85f52SBen Walker } 87327e85f52SBen Walker 87427e85f52SBen Walker seg = ctx->cur_seg; 87527e85f52SBen Walker 87627e85f52SBen Walker if (g_workload_selection == ACCEL_OPC_DECOMPRESS) { 87727e85f52SBen Walker seg->compressed_iovs = calloc(g_chained_count, sizeof(struct iovec)); 87827e85f52SBen Walker if (seg->compressed_iovs == NULL) { 87927e85f52SBen Walker fprintf(stderr, "unable to allocate iovec\n"); 88027e85f52SBen Walker spdk_dma_free(seg->compressed_data); 88127e85f52SBen Walker spdk_dma_free(seg->uncompressed_data); 88227e85f52SBen Walker free(seg); 88327e85f52SBen Walker spdk_put_io_channel(ctx->ch); 88427e85f52SBen Walker fclose(ctx->file); 88527e85f52SBen Walker free(ctx); 88627e85f52SBen Walker spdk_app_stop(-ENOMEM); 88727e85f52SBen Walker return; 88827e85f52SBen Walker } 88927e85f52SBen Walker seg->compressed_iovcnt = g_chained_count; 89027e85f52SBen Walker 89127e85f52SBen Walker accel_perf_construct_iovs(seg->compressed_data, seg->compressed_len, seg->compressed_iovs, 89227e85f52SBen Walker seg->compressed_iovcnt); 89327e85f52SBen Walker } 89427e85f52SBen Walker 89527e85f52SBen Walker STAILQ_INSERT_TAIL(&g_compress_segs, seg, link); 89627e85f52SBen Walker ctx->remaining -= seg->uncompressed_len; 89727e85f52SBen Walker 89827e85f52SBen Walker accel_perf_prep_process_seg(ctx); 89927e85f52SBen Walker } 90027e85f52SBen Walker 90127e85f52SBen Walker static void 90227e85f52SBen Walker accel_perf_prep_process_seg(struct accel_perf_prep_ctx *ctx) 90327e85f52SBen Walker { 90427e85f52SBen Walker struct ap_compress_seg *seg; 905*6afbf3dbSpaul luse int sz, sz_read, sz_padded; 90627e85f52SBen Walker void *ubuf, *cbuf; 90727e85f52SBen Walker struct iovec iov[1]; 90827e85f52SBen Walker int rc; 90927e85f52SBen Walker 91027e85f52SBen Walker if (ctx->remaining == 0) { 91127e85f52SBen Walker spdk_put_io_channel(ctx->ch); 91227e85f52SBen Walker fclose(ctx->file); 91327e85f52SBen Walker free(ctx); 91427e85f52SBen Walker accel_perf_start(NULL); 91527e85f52SBen Walker return; 91627e85f52SBen Walker } 91727e85f52SBen Walker 91827e85f52SBen Walker sz = spdk_min(ctx->remaining, g_xfer_size_bytes); 919*6afbf3dbSpaul luse /* Add 10% pad to the compress buffer for incompressible data. Note that a real app 920*6afbf3dbSpaul luse * would likely either deal with the failure of not having a large enough buffer 921*6afbf3dbSpaul luse * by submitting another operation with a larger one. Or, like the vbdev module 922*6afbf3dbSpaul luse * does, just accept the error and use the data uncompressed marking it as such in 923*6afbf3dbSpaul luse * its own metadata so that in the future it doesn't try to decompress uncompressed 924*6afbf3dbSpaul luse * data, etc. 925*6afbf3dbSpaul luse */ 926*6afbf3dbSpaul luse sz_padded = sz * COMP_BUF_PAD_PERCENTAGE; 92727e85f52SBen Walker 92827e85f52SBen Walker ubuf = spdk_dma_zmalloc(sz, ALIGN_4K, NULL); 92927e85f52SBen Walker if (!ubuf) { 93027e85f52SBen Walker fprintf(stderr, "unable to allocate uncompress buffer\n"); 93127e85f52SBen Walker rc = -ENOMEM; 93227e85f52SBen Walker goto error; 93327e85f52SBen Walker } 93427e85f52SBen Walker 935*6afbf3dbSpaul luse cbuf = spdk_dma_malloc(sz_padded, ALIGN_4K, NULL); 93627e85f52SBen Walker if (!cbuf) { 93727e85f52SBen Walker fprintf(stderr, "unable to allocate compress buffer\n"); 93827e85f52SBen Walker rc = -ENOMEM; 93927e85f52SBen Walker spdk_dma_free(ubuf); 94027e85f52SBen Walker goto error; 94127e85f52SBen Walker } 94227e85f52SBen Walker 94327e85f52SBen Walker seg = calloc(1, sizeof(*seg)); 94427e85f52SBen Walker if (!seg) { 94527e85f52SBen Walker fprintf(stderr, "unable to allocate comp/decomp segment\n"); 94627e85f52SBen Walker spdk_dma_free(ubuf); 94727e85f52SBen Walker spdk_dma_free(cbuf); 94827e85f52SBen Walker rc = -ENOMEM; 94927e85f52SBen Walker goto error; 95027e85f52SBen Walker } 95127e85f52SBen Walker 95227e85f52SBen Walker sz_read = fread(ubuf, sizeof(uint8_t), sz, ctx->file); 95327e85f52SBen Walker if (sz_read != sz) { 95427e85f52SBen Walker fprintf(stderr, "unable to read input file\n"); 95527e85f52SBen Walker free(seg); 95627e85f52SBen Walker spdk_dma_free(ubuf); 95727e85f52SBen Walker spdk_dma_free(cbuf); 95827e85f52SBen Walker rc = -errno; 95927e85f52SBen Walker goto error; 96027e85f52SBen Walker } 96127e85f52SBen Walker 96227e85f52SBen Walker if (g_workload_selection == ACCEL_OPC_COMPRESS) { 96327e85f52SBen Walker seg->uncompressed_iovs = calloc(g_chained_count, sizeof(struct iovec)); 96427e85f52SBen Walker if (seg->uncompressed_iovs == NULL) { 96527e85f52SBen Walker fprintf(stderr, "unable to allocate iovec\n"); 96627e85f52SBen Walker free(seg); 96727e85f52SBen Walker spdk_dma_free(ubuf); 96827e85f52SBen Walker spdk_dma_free(cbuf); 96927e85f52SBen Walker rc = -ENOMEM; 97027e85f52SBen Walker goto error; 97127e85f52SBen Walker } 97227e85f52SBen Walker seg->uncompressed_iovcnt = g_chained_count; 97327e85f52SBen Walker accel_perf_construct_iovs(ubuf, sz, seg->uncompressed_iovs, seg->uncompressed_iovcnt); 97427e85f52SBen Walker } 97527e85f52SBen Walker 97627e85f52SBen Walker seg->uncompressed_data = ubuf; 97727e85f52SBen Walker seg->uncompressed_len = sz; 97827e85f52SBen Walker seg->compressed_data = cbuf; 97927e85f52SBen Walker seg->compressed_len = sz; 980*6afbf3dbSpaul luse seg->compressed_len_padded = sz_padded; 98127e85f52SBen Walker 98227e85f52SBen Walker ctx->cur_seg = seg; 98327e85f52SBen Walker iov[0].iov_base = seg->uncompressed_data; 98427e85f52SBen Walker iov[0].iov_len = seg->uncompressed_len; 98527e85f52SBen Walker /* Note that anytime a call is made to spdk_accel_submit_compress() there's a chance 98627e85f52SBen Walker * it will fail with -ENOMEM in the event that the destination buffer is not large enough 987*6afbf3dbSpaul luse * to hold the compressed data. This example app simply adds 10% buffer for compressed data 988*6afbf3dbSpaul luse * but real applications may want to consider a more sophisticated method. 98927e85f52SBen Walker */ 990*6afbf3dbSpaul luse rc = spdk_accel_submit_compress(ctx->ch, seg->compressed_data, seg->compressed_len_padded, iov, 1, 99127e85f52SBen Walker &seg->compressed_len, 0, accel_perf_prep_process_seg_cpl, ctx); 99227e85f52SBen Walker if (rc < 0) { 99327e85f52SBen Walker fprintf(stderr, "error (%d) on initial compress submission\n", rc); 99427e85f52SBen Walker goto error; 99527e85f52SBen Walker } 99627e85f52SBen Walker 99727e85f52SBen Walker return; 99827e85f52SBen Walker 99927e85f52SBen Walker error: 100027e85f52SBen Walker spdk_put_io_channel(ctx->ch); 100127e85f52SBen Walker fclose(ctx->file); 100227e85f52SBen Walker free(ctx); 100327e85f52SBen Walker spdk_app_stop(rc); 100427e85f52SBen Walker } 100527e85f52SBen Walker 100627e85f52SBen Walker static void 100727e85f52SBen Walker accel_perf_prep(void *arg1) 100827e85f52SBen Walker { 100927e85f52SBen Walker struct accel_perf_prep_ctx *ctx; 101027e85f52SBen Walker int rc = 0; 101127e85f52SBen Walker 101227e85f52SBen Walker if (g_workload_selection != ACCEL_OPC_COMPRESS && 101327e85f52SBen Walker g_workload_selection != ACCEL_OPC_DECOMPRESS) { 101427e85f52SBen Walker accel_perf_start(arg1); 101527e85f52SBen Walker return; 101627e85f52SBen Walker } 101727e85f52SBen Walker 101827e85f52SBen Walker if (g_cd_file_in_name == NULL) { 101927e85f52SBen Walker fprintf(stdout, "A filename is required.\n"); 102027e85f52SBen Walker rc = -EINVAL; 102127e85f52SBen Walker goto error_end; 102227e85f52SBen Walker } 102327e85f52SBen Walker 102427e85f52SBen Walker if (g_workload_selection == ACCEL_OPC_COMPRESS && g_verify) { 102527e85f52SBen Walker fprintf(stdout, "\nCompression does not support the verify option, aborting.\n"); 102627e85f52SBen Walker rc = -ENOTSUP; 102727e85f52SBen Walker goto error_end; 102827e85f52SBen Walker } 102927e85f52SBen Walker 103027e85f52SBen Walker printf("Preparing input file...\n"); 103127e85f52SBen Walker 103227e85f52SBen Walker ctx = calloc(1, sizeof(*ctx)); 103327e85f52SBen Walker if (ctx == NULL) { 103427e85f52SBen Walker rc = -ENOMEM; 103527e85f52SBen Walker goto error_end; 103627e85f52SBen Walker } 103727e85f52SBen Walker 103827e85f52SBen Walker ctx->file = fopen(g_cd_file_in_name, "r"); 103927e85f52SBen Walker if (ctx->file == NULL) { 104027e85f52SBen Walker fprintf(stderr, "Could not open file %s.\n", g_cd_file_in_name); 104127e85f52SBen Walker rc = -errno; 104227e85f52SBen Walker goto error_ctx; 104327e85f52SBen Walker } 104427e85f52SBen Walker 104527e85f52SBen Walker fseek(ctx->file, 0L, SEEK_END); 104627e85f52SBen Walker ctx->remaining = ftell(ctx->file); 104727e85f52SBen Walker fseek(ctx->file, 0L, SEEK_SET); 104827e85f52SBen Walker 104927e85f52SBen Walker ctx->ch = spdk_accel_get_io_channel(); 105027e85f52SBen Walker if (ctx->ch == NULL) { 105127e85f52SBen Walker rc = -EAGAIN; 105227e85f52SBen Walker goto error_file; 105327e85f52SBen Walker } 105427e85f52SBen Walker 105527e85f52SBen Walker if (g_xfer_size_bytes == 0) { 105627e85f52SBen Walker /* size of 0 means "file at a time" */ 105727e85f52SBen Walker g_xfer_size_bytes = ctx->remaining; 105827e85f52SBen Walker } 105927e85f52SBen Walker 106027e85f52SBen Walker accel_perf_prep_process_seg(ctx); 106127e85f52SBen Walker return; 106227e85f52SBen Walker 106327e85f52SBen Walker error_file: 106427e85f52SBen Walker fclose(ctx->file); 106527e85f52SBen Walker error_ctx: 106627e85f52SBen Walker free(ctx); 106727e85f52SBen Walker error_end: 106827e85f52SBen Walker spdk_app_stop(rc); 106927e85f52SBen Walker } 107027e85f52SBen Walker 10719f51cf32Spaul luse int 10729f51cf32Spaul luse main(int argc, char **argv) 10739f51cf32Spaul luse { 10749f51cf32Spaul luse struct worker_thread *worker, *tmp; 10759f51cf32Spaul luse 10769f51cf32Spaul luse pthread_mutex_init(&g_workers_lock, NULL); 10779260fa0cSpaul luse spdk_app_opts_init(&g_opts, sizeof(g_opts)); 10789260fa0cSpaul luse g_opts.name = "accel_perf"; 10799260fa0cSpaul luse g_opts.reactor_mask = "0x1"; 108027e85f52SBen Walker if (spdk_app_parse_args(argc, argv, &g_opts, "a:C:o:q:t:yw:P:f:T:l:", NULL, parse_args, 10811e2b38baSyidong0635 usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { 10829b189667Spaul luse g_rc = -1; 10839f51cf32Spaul luse goto cleanup; 10849f51cf32Spaul luse } 10859f51cf32Spaul luse 108637b68d72Spaul luse if ((g_workload_selection != ACCEL_OPC_COPY) && 108737b68d72Spaul luse (g_workload_selection != ACCEL_OPC_FILL) && 108837b68d72Spaul luse (g_workload_selection != ACCEL_OPC_CRC32C) && 108937b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COPY_CRC32C) && 109037b68d72Spaul luse (g_workload_selection != ACCEL_OPC_COMPARE) && 109127e85f52SBen Walker (g_workload_selection != ACCEL_OPC_COMPRESS) && 109227e85f52SBen Walker (g_workload_selection != ACCEL_OPC_DECOMPRESS) && 109384162738Spaul luse (g_workload_selection != ACCEL_OPC_DUALCAST)) { 10942a0c66d0Spaul luse usage(); 10959b189667Spaul luse g_rc = -1; 10962a0c66d0Spaul luse goto cleanup; 10972a0c66d0Spaul luse } 10982a0c66d0Spaul luse 1099e1bf63afSJim Harris if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 1100e1bf63afSJim Harris fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 1101e1bf63afSJim Harris usage(); 1102e1bf63afSJim Harris g_rc = -1; 1103e1bf63afSJim Harris goto cleanup; 1104e1bf63afSJim Harris } 1105e1bf63afSJim Harris 1106e1bf63afSJim Harris if (g_allocate_depth == 0) { 1107e1bf63afSJim Harris g_allocate_depth = g_queue_depth; 1108e1bf63afSJim Harris } 1109e1bf63afSJim Harris 111037b68d72Spaul luse if ((g_workload_selection == ACCEL_OPC_CRC32C || g_workload_selection == ACCEL_OPC_COPY_CRC32C) && 1111850cd900Spaul luse g_chained_count == 0) { 111288754353SZiye Yang usage(); 111388754353SZiye Yang g_rc = -1; 111488754353SZiye Yang goto cleanup; 111588754353SZiye Yang } 111688754353SZiye Yang 111727e85f52SBen Walker g_rc = spdk_app_start(&g_opts, accel_perf_prep, NULL); 11189b189667Spaul luse if (g_rc) { 11199f51cf32Spaul luse SPDK_ERRLOG("ERROR starting application\n"); 11209f51cf32Spaul luse } 11219f51cf32Spaul luse 11229f51cf32Spaul luse pthread_mutex_destroy(&g_workers_lock); 11239f51cf32Spaul luse 11249f51cf32Spaul luse worker = g_workers; 11259f51cf32Spaul luse while (worker) { 11269f51cf32Spaul luse tmp = worker->next; 11279f51cf32Spaul luse free(worker); 11289f51cf32Spaul luse worker = tmp; 11299f51cf32Spaul luse } 11309f51cf32Spaul luse cleanup: 113127e85f52SBen Walker accel_perf_free_compress_segs(); 11329f51cf32Spaul luse spdk_app_fini(); 11339b189667Spaul luse return g_rc; 11349f51cf32Spaul luse } 1135