19f51cf32Spaul luse /*- 29f51cf32Spaul luse * BSD LICENSE 39f51cf32Spaul luse * 49f51cf32Spaul luse * Copyright (c) Intel Corporation. 59f51cf32Spaul luse * All rights reserved. 69f51cf32Spaul luse * 79f51cf32Spaul luse * Redistribution and use in source and binary forms, with or without 89f51cf32Spaul luse * modification, are permitted provided that the following conditions 99f51cf32Spaul luse * are met: 109f51cf32Spaul luse * 119f51cf32Spaul luse * * Redistributions of source code must retain the above copyright 129f51cf32Spaul luse * notice, this list of conditions and the following disclaimer. 139f51cf32Spaul luse * * Redistributions in binary form must reproduce the above copyright 149f51cf32Spaul luse * notice, this list of conditions and the following disclaimer in 159f51cf32Spaul luse * the documentation and/or other materials provided with the 169f51cf32Spaul luse * distribution. 179f51cf32Spaul luse * * Neither the name of Intel Corporation nor the names of its 189f51cf32Spaul luse * contributors may be used to endorse or promote products derived 199f51cf32Spaul luse * from this software without specific prior written permission. 209f51cf32Spaul luse * 219f51cf32Spaul luse * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 229f51cf32Spaul luse * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 239f51cf32Spaul luse * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 249f51cf32Spaul luse * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 259f51cf32Spaul luse * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 269f51cf32Spaul luse * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 279f51cf32Spaul luse * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 289f51cf32Spaul luse * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 299f51cf32Spaul luse * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 309f51cf32Spaul luse * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 319f51cf32Spaul luse * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 329f51cf32Spaul luse */ 339f51cf32Spaul luse 349f51cf32Spaul luse #include "spdk/stdinc.h" 359f51cf32Spaul luse #include "spdk/thread.h" 369f51cf32Spaul luse #include "spdk/env.h" 379f51cf32Spaul luse #include "spdk/event.h" 389f51cf32Spaul luse #include "spdk/log.h" 399f51cf32Spaul luse #include "spdk/string.h" 409f51cf32Spaul luse #include "spdk/accel_engine.h" 41e69375bfSpaul luse #include "spdk/crc32.h" 420cecfcb1Spaul luse #include "spdk/util.h" 439f51cf32Spaul luse 44b9218b7aSpaul luse #define DATA_PATTERN 0x5a 450ef079c6Spaul luse #define ALIGN_4K 0x1000 46b9218b7aSpaul luse 479f51cf32Spaul luse static uint64_t g_tsc_rate; 489f51cf32Spaul luse static uint64_t g_tsc_end; 499b189667Spaul luse static int g_rc; 509f51cf32Spaul luse static int g_xfer_size_bytes = 4096; 519f51cf32Spaul luse static int g_queue_depth = 32; 52e1bf63afSJim Harris /* g_allocate_depth indicates how many tasks we allocate per worker. It will 53e1bf63afSJim Harris * be at least as much as the queue depth. 54e1bf63afSJim Harris */ 55e1bf63afSJim Harris static int g_allocate_depth = 0; 56f17e6705Spaul luse static int g_ops_per_batch = 0; 57445fe74eSpaul luse static int g_threads_per_core = 1; 589f51cf32Spaul luse static int g_time_in_sec = 5; 59e69375bfSpaul luse static uint32_t g_crc32c_seed = 0; 6088754353SZiye Yang static uint32_t g_crc32c_chained_count = 1; 61b9218b7aSpaul luse static int g_fail_percent_goal = 0; 6289495464Spaul luse static uint8_t g_fill_pattern = 255; 639f51cf32Spaul luse static bool g_verify = false; 642a0c66d0Spaul luse static const char *g_workload_type = NULL; 65514be889Spaul luse static enum accel_capability g_workload_selection; 669f51cf32Spaul luse static struct worker_thread *g_workers = NULL; 679f51cf32Spaul luse static int g_num_workers = 0; 689f51cf32Spaul luse static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 69cdefd3d3Spaul luse 70cdefd3d3Spaul luse struct worker_thread; 71cdefd3d3Spaul luse static void accel_done(void *ref, int status); 72cdefd3d3Spaul luse 73445fe74eSpaul luse struct display_info { 74445fe74eSpaul luse int core; 75445fe74eSpaul luse int thread; 76445fe74eSpaul luse }; 77445fe74eSpaul luse 78cdefd3d3Spaul luse struct ap_task { 79cdefd3d3Spaul luse void *src; 8088754353SZiye Yang struct iovec *iovs; 8188754353SZiye Yang uint32_t iov_cnt; 82cdefd3d3Spaul luse void *dst; 83cdefd3d3Spaul luse void *dst2; 84221eb3f4Spaul luse uint32_t crc_dst; 85cdefd3d3Spaul luse struct worker_thread *worker; 86cdefd3d3Spaul luse int expected_status; /* used for the compare operation */ 87cdefd3d3Spaul luse TAILQ_ENTRY(ap_task) link; 88cdefd3d3Spaul luse }; 899f51cf32Spaul luse 90f17e6705Spaul luse struct accel_batch { 91f17e6705Spaul luse int cmd_count; 92f17e6705Spaul luse struct spdk_accel_batch *batch; 93f17e6705Spaul luse struct worker_thread *worker; 94f17e6705Spaul luse TAILQ_ENTRY(accel_batch) link; 95f17e6705Spaul luse }; 96f17e6705Spaul luse 979f51cf32Spaul luse struct worker_thread { 989f51cf32Spaul luse struct spdk_io_channel *ch; 999f51cf32Spaul luse uint64_t xfer_completed; 1009f51cf32Spaul luse uint64_t xfer_failed; 101b9218b7aSpaul luse uint64_t injected_miscompares; 1029f51cf32Spaul luse uint64_t current_queue_depth; 103ac9a1a83Spaul luse TAILQ_HEAD(, ap_task) tasks_pool; 1049f51cf32Spaul luse struct worker_thread *next; 1059f51cf32Spaul luse unsigned core; 1069f51cf32Spaul luse struct spdk_thread *thread; 1079f51cf32Spaul luse bool is_draining; 1089f51cf32Spaul luse struct spdk_poller *is_draining_poller; 1099f51cf32Spaul luse struct spdk_poller *stop_poller; 110ac9a1a83Spaul luse void *task_base; 111f17e6705Spaul luse struct accel_batch *batch_base; 112445fe74eSpaul luse struct display_info display; 113f17e6705Spaul luse TAILQ_HEAD(, accel_batch) in_prep_batches; 114f17e6705Spaul luse TAILQ_HEAD(, accel_batch) in_use_batches; 115f17e6705Spaul luse TAILQ_HEAD(, accel_batch) to_submit_batches; 1169f51cf32Spaul luse }; 1179f51cf32Spaul luse 1189f51cf32Spaul luse static void 1199f51cf32Spaul luse dump_user_config(struct spdk_app_opts *opts) 1209f51cf32Spaul luse { 1219f51cf32Spaul luse printf("SPDK Configuration:\n"); 1229f51cf32Spaul luse printf("Core mask: %s\n\n", opts->reactor_mask); 1239f51cf32Spaul luse printf("Accel Perf Configuration:\n"); 1242a0c66d0Spaul luse printf("Workload Type: %s\n", g_workload_type); 125221eb3f4Spaul luse if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) { 126b9218b7aSpaul luse printf("CRC-32C seed: %u\n", g_crc32c_seed); 127221eb3f4Spaul luse printf("vector count %u\n", g_crc32c_chained_count); 12889495464Spaul luse } else if (g_workload_selection == ACCEL_FILL) { 12989495464Spaul luse printf("Fill pattern: 0x%x\n", g_fill_pattern); 130b9218b7aSpaul luse } else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) { 13189495464Spaul luse printf("Failure inject: %u percent\n", g_fail_percent_goal); 132e69375bfSpaul luse } 133221eb3f4Spaul luse if (g_workload_selection == ACCEL_COPY_CRC32C) { 134221eb3f4Spaul luse printf("Vector size: %u bytes\n", g_xfer_size_bytes); 135221eb3f4Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count); 136221eb3f4Spaul luse } else { 1379f51cf32Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 138221eb3f4Spaul luse } 1399f51cf32Spaul luse printf("Queue depth: %u\n", g_queue_depth); 140e1bf63afSJim Harris printf("Allocate depth: %u\n", g_allocate_depth); 141445fe74eSpaul luse printf("# threads/core: %u\n", g_threads_per_core); 1429f51cf32Spaul luse printf("Run time: %u seconds\n", g_time_in_sec); 143f17e6705Spaul luse if (g_ops_per_batch > 0) { 144f17e6705Spaul luse printf("Batching: %u operations\n", g_ops_per_batch); 145f17e6705Spaul luse } else { 146f17e6705Spaul luse printf("Batching: Disabled\n"); 147f17e6705Spaul luse } 1489f51cf32Spaul luse printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 1499f51cf32Spaul luse } 1509f51cf32Spaul luse 1519f51cf32Spaul luse static void 1529f51cf32Spaul luse usage(void) 1539f51cf32Spaul luse { 1549f51cf32Spaul luse printf("accel_perf options:\n"); 1559f51cf32Spaul luse printf("\t[-h help message]\n"); 156f17e6705Spaul luse printf("\t[-q queue depth per core]\n"); 157221eb3f4Spaul luse printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n"); 158445fe74eSpaul luse printf("\t[-T number of threads per core\n"); 15988754353SZiye Yang printf("\t[-n number of channels]\n"); 1609f51cf32Spaul luse printf("\t[-o transfer size in bytes]\n"); 1619f51cf32Spaul luse printf("\t[-t time in seconds]\n"); 162221eb3f4Spaul luse printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n"); 163e69375bfSpaul luse printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 164b9218b7aSpaul luse printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 16589495464Spaul luse printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 1662a0c66d0Spaul luse printf("\t[-y verify result if this switch is on]\n"); 167f17e6705Spaul luse printf("\t[-b batch this number of operations at a time (default 0 = disabled)]\n"); 168e1bf63afSJim Harris printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 169e1bf63afSJim Harris printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 1709f51cf32Spaul luse } 1719f51cf32Spaul luse 1729f51cf32Spaul luse static int 1739f51cf32Spaul luse parse_args(int argc, char *argv) 1749f51cf32Spaul luse { 175358b84b4SZiye Yang int argval = 0; 176c82d5789SJim Harris 1779f51cf32Spaul luse switch (argc) { 178e1bf63afSJim Harris case 'a': 179f17e6705Spaul luse case 'b': 180c82d5789SJim Harris case 'C': 181c82d5789SJim Harris case 'f': 182c82d5789SJim Harris case 'T': 183c82d5789SJim Harris case 'o': 184c82d5789SJim Harris case 'P': 185c82d5789SJim Harris case 'q': 186c82d5789SJim Harris case 's': 187c82d5789SJim Harris case 't': 188c82d5789SJim Harris argval = spdk_strtol(optarg, 10); 189c82d5789SJim Harris if (argval < 0) { 190c82d5789SJim Harris fprintf(stderr, "-%c option must be non-negative.\n", argc); 191c82d5789SJim Harris usage(); 192c82d5789SJim Harris return 1; 193c82d5789SJim Harris } 194c82d5789SJim Harris break; 195c82d5789SJim Harris default: 196c82d5789SJim Harris break; 197c82d5789SJim Harris }; 198c82d5789SJim Harris 199c82d5789SJim Harris switch (argc) { 200e1bf63afSJim Harris case 'a': 201e1bf63afSJim Harris g_allocate_depth = argval; 202e1bf63afSJim Harris break; 203c82d5789SJim Harris case 'b': 204c82d5789SJim Harris g_ops_per_batch = argval; 205f17e6705Spaul luse break; 20688754353SZiye Yang case 'C': 207c82d5789SJim Harris g_crc32c_chained_count = argval; 20888754353SZiye Yang break; 20989495464Spaul luse case 'f': 210c82d5789SJim Harris g_fill_pattern = (uint8_t)argval; 21189495464Spaul luse break; 212445fe74eSpaul luse case 'T': 213c82d5789SJim Harris g_threads_per_core = argval; 214445fe74eSpaul luse break; 2159f51cf32Spaul luse case 'o': 216c82d5789SJim Harris g_xfer_size_bytes = argval; 2179f51cf32Spaul luse break; 218b9218b7aSpaul luse case 'P': 219c82d5789SJim Harris g_fail_percent_goal = argval; 220b9218b7aSpaul luse break; 2219f51cf32Spaul luse case 'q': 222c82d5789SJim Harris g_queue_depth = argval; 2239f51cf32Spaul luse break; 224e69375bfSpaul luse case 's': 225c82d5789SJim Harris g_crc32c_seed = argval; 226e69375bfSpaul luse break; 2279f51cf32Spaul luse case 't': 228c82d5789SJim Harris g_time_in_sec = argval; 2299f51cf32Spaul luse break; 2309f51cf32Spaul luse case 'y': 2319f51cf32Spaul luse g_verify = true; 2329f51cf32Spaul luse break; 2332a0c66d0Spaul luse case 'w': 2342a0c66d0Spaul luse g_workload_type = optarg; 235514be889Spaul luse if (!strcmp(g_workload_type, "copy")) { 236514be889Spaul luse g_workload_selection = ACCEL_COPY; 237514be889Spaul luse } else if (!strcmp(g_workload_type, "fill")) { 238514be889Spaul luse g_workload_selection = ACCEL_FILL; 239e69375bfSpaul luse } else if (!strcmp(g_workload_type, "crc32c")) { 240e69375bfSpaul luse g_workload_selection = ACCEL_CRC32C; 241221eb3f4Spaul luse } else if (!strcmp(g_workload_type, "copy_crc32c")) { 242221eb3f4Spaul luse g_workload_selection = ACCEL_COPY_CRC32C; 243b9218b7aSpaul luse } else if (!strcmp(g_workload_type, "compare")) { 244b9218b7aSpaul luse g_workload_selection = ACCEL_COMPARE; 2450ef079c6Spaul luse } else if (!strcmp(g_workload_type, "dualcast")) { 2460ef079c6Spaul luse g_workload_selection = ACCEL_DUALCAST; 247514be889Spaul luse } 2482a0c66d0Spaul luse break; 2499f51cf32Spaul luse default: 2509f51cf32Spaul luse usage(); 2519f51cf32Spaul luse return 1; 2529f51cf32Spaul luse } 25388754353SZiye Yang 2549f51cf32Spaul luse return 0; 2559f51cf32Spaul luse } 2569f51cf32Spaul luse 257eea826a2Spaul luse static int dump_result(void); 2589f51cf32Spaul luse static void 2599f51cf32Spaul luse unregister_worker(void *arg1) 2609f51cf32Spaul luse { 2619f51cf32Spaul luse struct worker_thread *worker = arg1; 2629f51cf32Spaul luse 263ac9a1a83Spaul luse free(worker->task_base); 264f17e6705Spaul luse free(worker->batch_base); 2659f51cf32Spaul luse spdk_put_io_channel(worker->ch); 2669f51cf32Spaul luse pthread_mutex_lock(&g_workers_lock); 2679f51cf32Spaul luse assert(g_num_workers >= 1); 2689f51cf32Spaul luse if (--g_num_workers == 0) { 2699f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2709b189667Spaul luse g_rc = dump_result(); 2719f51cf32Spaul luse spdk_app_stop(0); 2729f51cf32Spaul luse } 2739f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2749f51cf32Spaul luse } 2759f51cf32Spaul luse 2768da995c4Spaul luse static int 2778da995c4Spaul luse _get_task_data_bufs(struct ap_task *task) 2788da995c4Spaul luse { 2798da995c4Spaul luse uint32_t align = 0; 28088754353SZiye Yang uint32_t i = 0; 281221eb3f4Spaul luse int dst_buff_len = g_xfer_size_bytes; 2828da995c4Spaul luse 2838da995c4Spaul luse /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 2848da995c4Spaul luse * we do this for all engines to keep it simple. 2858da995c4Spaul luse */ 2868da995c4Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 2878da995c4Spaul luse align = ALIGN_4K; 2888da995c4Spaul luse } 2898da995c4Spaul luse 290221eb3f4Spaul luse if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) { 29188754353SZiye Yang assert(g_crc32c_chained_count > 0); 29288754353SZiye Yang task->iov_cnt = g_crc32c_chained_count; 29388754353SZiye Yang task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); 29488754353SZiye Yang if (!task->iovs) { 29588754353SZiye Yang fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); 29688754353SZiye Yang return -ENOMEM; 29788754353SZiye Yang } 29888754353SZiye Yang 299221eb3f4Spaul luse if (g_workload_selection == ACCEL_COPY_CRC32C) { 300221eb3f4Spaul luse dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count; 301221eb3f4Spaul luse } 302221eb3f4Spaul luse 30388754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 30488754353SZiye Yang task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 30588754353SZiye Yang if (task->iovs[i].iov_base == NULL) { 30688754353SZiye Yang return -ENOMEM; 30788754353SZiye Yang } 30888754353SZiye Yang memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 30988754353SZiye Yang task->iovs[i].iov_len = g_xfer_size_bytes; 31088754353SZiye Yang } 31188754353SZiye Yang 31288754353SZiye Yang } else { 3138da995c4Spaul luse task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 3148da995c4Spaul luse if (task->src == NULL) { 3158da995c4Spaul luse fprintf(stderr, "Unable to alloc src buffer\n"); 3168da995c4Spaul luse return -ENOMEM; 3178da995c4Spaul luse } 31888754353SZiye Yang 31988754353SZiye Yang /* For fill, set the entire src buffer so we can check if verify is enabled. */ 32088754353SZiye Yang if (g_workload_selection == ACCEL_FILL) { 32188754353SZiye Yang memset(task->src, g_fill_pattern, g_xfer_size_bytes); 32288754353SZiye Yang } else { 3238da995c4Spaul luse memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 32488754353SZiye Yang } 32588754353SZiye Yang } 3268da995c4Spaul luse 327221eb3f4Spaul luse if (g_workload_selection != ACCEL_COPY_CRC32C) { 328221eb3f4Spaul luse task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL); 3298da995c4Spaul luse if (task->dst == NULL) { 3308da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3318da995c4Spaul luse return -ENOMEM; 3328da995c4Spaul luse } 3338da995c4Spaul luse 3348da995c4Spaul luse /* For compare we want the buffers to match, otherwise not. */ 3358da995c4Spaul luse if (g_workload_selection == ACCEL_COMPARE) { 336221eb3f4Spaul luse memset(task->dst, DATA_PATTERN, dst_buff_len); 3378da995c4Spaul luse } else { 338221eb3f4Spaul luse memset(task->dst, ~DATA_PATTERN, dst_buff_len); 339221eb3f4Spaul luse } 3408da995c4Spaul luse } 3418da995c4Spaul luse 3428da995c4Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 3438da995c4Spaul luse task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3448da995c4Spaul luse if (task->dst2 == NULL) { 3458da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3468da995c4Spaul luse return -ENOMEM; 3478da995c4Spaul luse } 3488da995c4Spaul luse memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 3498da995c4Spaul luse } 3508da995c4Spaul luse 3518da995c4Spaul luse return 0; 3528da995c4Spaul luse } 3538da995c4Spaul luse 354ac9a1a83Spaul luse inline static struct ap_task * 355ac9a1a83Spaul luse _get_task(struct worker_thread *worker) 356ac9a1a83Spaul luse { 357ac9a1a83Spaul luse struct ap_task *task; 358ac9a1a83Spaul luse 359ac9a1a83Spaul luse if (!TAILQ_EMPTY(&worker->tasks_pool)) { 360ac9a1a83Spaul luse task = TAILQ_FIRST(&worker->tasks_pool); 361ac9a1a83Spaul luse TAILQ_REMOVE(&worker->tasks_pool, task, link); 362ac9a1a83Spaul luse } else { 363ac9a1a83Spaul luse fprintf(stderr, "Unable to get ap_task\n"); 364ac9a1a83Spaul luse return NULL; 365ac9a1a83Spaul luse } 366ac9a1a83Spaul luse 367ac9a1a83Spaul luse return task; 368ac9a1a83Spaul luse } 369ac9a1a83Spaul luse 370f17e6705Spaul luse /* Submit one operation using the same ap task that just completed. */ 3719f51cf32Spaul luse static void 372ac9a1a83Spaul luse _submit_single(struct worker_thread *worker, struct ap_task *task) 3739f51cf32Spaul luse { 374b9218b7aSpaul luse int random_num; 37540ec8e97Spaul luse int rc = 0; 3769f51cf32Spaul luse 3779f51cf32Spaul luse assert(worker); 3789f51cf32Spaul luse 379e69375bfSpaul luse switch (g_workload_selection) { 380e69375bfSpaul luse case ACCEL_COPY: 381e8463f87Spaul luse rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, 382e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 383e69375bfSpaul luse break; 384e69375bfSpaul luse case ACCEL_FILL: 3852a0c66d0Spaul luse /* For fill use the first byte of the task->dst buffer */ 386ee7e31f9Spaul luse rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, 387e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 388e69375bfSpaul luse break; 389e69375bfSpaul luse case ACCEL_CRC32C: 390a738acd5Spaul luse rc = spdk_accel_submit_crc32cv(worker->ch, &task->crc_dst, 39188754353SZiye Yang task->iovs, task->iov_cnt, g_crc32c_seed, 39290c56d96SZiye Yang accel_done, task); 393e69375bfSpaul luse break; 394221eb3f4Spaul luse case ACCEL_COPY_CRC32C: 395221eb3f4Spaul luse rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->iovs, task->iov_cnt, 396221eb3f4Spaul luse &task->crc_dst, g_crc32c_seed, accel_done, task); 397221eb3f4Spaul luse break; 398b9218b7aSpaul luse case ACCEL_COMPARE: 399b9218b7aSpaul luse random_num = rand() % 100; 400b9218b7aSpaul luse if (random_num < g_fail_percent_goal) { 401b9218b7aSpaul luse task->expected_status = -EILSEQ; 402b9218b7aSpaul luse *(uint8_t *)task->dst = ~DATA_PATTERN; 403b9218b7aSpaul luse } else { 404b9218b7aSpaul luse task->expected_status = 0; 405b9218b7aSpaul luse *(uint8_t *)task->dst = DATA_PATTERN; 406b9218b7aSpaul luse } 407ee7e31f9Spaul luse rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, 408e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 409b9218b7aSpaul luse break; 4100ef079c6Spaul luse case ACCEL_DUALCAST: 411ee7e31f9Spaul luse rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, 412e8463f87Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 4130ef079c6Spaul luse break; 414e69375bfSpaul luse default: 4152a0c66d0Spaul luse assert(false); 416e69375bfSpaul luse break; 417e69375bfSpaul luse 4182a0c66d0Spaul luse } 41940ec8e97Spaul luse 42040ec8e97Spaul luse if (rc) { 421e8463f87Spaul luse accel_done(task, rc); 42240ec8e97Spaul luse } 4239f51cf32Spaul luse } 4249f51cf32Spaul luse 425fab40895Spaul luse static int 426f17e6705Spaul luse _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task, 427f17e6705Spaul luse struct accel_batch *worker_batch) 428fab40895Spaul luse { 429f17e6705Spaul luse struct spdk_accel_batch *batch = worker_batch->batch; 430fab40895Spaul luse int rc = 0; 431fab40895Spaul luse 432f17e6705Spaul luse worker_batch->cmd_count++; 433f17e6705Spaul luse assert(worker_batch->cmd_count <= g_ops_per_batch); 434f17e6705Spaul luse 435fab40895Spaul luse switch (g_workload_selection) { 436fab40895Spaul luse case ACCEL_COPY: 437fab40895Spaul luse rc = spdk_accel_batch_prep_copy(worker->ch, batch, task->dst, 438fab40895Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 439fab40895Spaul luse break; 440fab40895Spaul luse case ACCEL_DUALCAST: 441fab40895Spaul luse rc = spdk_accel_batch_prep_dualcast(worker->ch, batch, task->dst, task->dst2, 442fab40895Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 443fab40895Spaul luse break; 444fab40895Spaul luse case ACCEL_COMPARE: 445fab40895Spaul luse rc = spdk_accel_batch_prep_compare(worker->ch, batch, task->dst, task->src, 446fab40895Spaul luse g_xfer_size_bytes, accel_done, task); 447fab40895Spaul luse break; 448fab40895Spaul luse case ACCEL_FILL: 449fab40895Spaul luse rc = spdk_accel_batch_prep_fill(worker->ch, batch, task->dst, 450fab40895Spaul luse *(uint8_t *)task->src, 451fab40895Spaul luse g_xfer_size_bytes, accel_done, task); 452fab40895Spaul luse break; 453221eb3f4Spaul luse case ACCEL_COPY_CRC32C: 454221eb3f4Spaul luse rc = spdk_accel_batch_prep_copy_crc32c(worker->ch, batch, task->dst, task->src, &task->crc_dst, 455221eb3f4Spaul luse g_crc32c_seed, g_xfer_size_bytes, accel_done, task); 456221eb3f4Spaul luse break; 457fab40895Spaul luse case ACCEL_CRC32C: 458a738acd5Spaul luse rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, &task->crc_dst, 45988754353SZiye Yang task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task); 460fab40895Spaul luse break; 461fab40895Spaul luse default: 462fab40895Spaul luse assert(false); 463fab40895Spaul luse break; 464fab40895Spaul luse } 465fab40895Spaul luse 466fab40895Spaul luse return rc; 467fab40895Spaul luse } 468fab40895Spaul luse 4699f51cf32Spaul luse static void 470e150f6b8SZiye Yang _free_task_buffers(struct ap_task *task) 471ac9a1a83Spaul luse { 47288754353SZiye Yang uint32_t i; 47388754353SZiye Yang 47488754353SZiye Yang if (g_workload_selection == ACCEL_CRC32C) { 47588754353SZiye Yang if (task->iovs) { 47688754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 47788754353SZiye Yang if (task->iovs[i].iov_base) { 47888754353SZiye Yang spdk_dma_free(task->iovs[i].iov_base); 47988754353SZiye Yang } 48088754353SZiye Yang } 48188754353SZiye Yang free(task->iovs); 48288754353SZiye Yang } 48388754353SZiye Yang } else { 484ac9a1a83Spaul luse spdk_dma_free(task->src); 48588754353SZiye Yang } 48688754353SZiye Yang 487ac9a1a83Spaul luse spdk_dma_free(task->dst); 488ac9a1a83Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 489ac9a1a83Spaul luse spdk_dma_free(task->dst2); 490ac9a1a83Spaul luse } 491ac9a1a83Spaul luse } 492ac9a1a83Spaul luse 493f17e6705Spaul luse static void 494f17e6705Spaul luse _build_batch(struct worker_thread *worker, struct ap_task *task) 495f17e6705Spaul luse { 496f17e6705Spaul luse struct accel_batch *worker_batch = NULL; 497f17e6705Spaul luse int rc; 498f17e6705Spaul luse 499f17e6705Spaul luse assert(!TAILQ_EMPTY(&worker->in_prep_batches)); 500f17e6705Spaul luse 501f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->in_prep_batches); 502f17e6705Spaul luse 503f17e6705Spaul luse /* If an accel batch hasn't been created yet do so now. */ 504f17e6705Spaul luse if (worker_batch->batch == NULL) { 505f17e6705Spaul luse worker_batch->batch = spdk_accel_batch_create(worker->ch); 506f17e6705Spaul luse if (worker_batch->batch == NULL) { 507f17e6705Spaul luse fprintf(stderr, "error unable to create new batch\n"); 508f17e6705Spaul luse return; 509f17e6705Spaul luse } 510f17e6705Spaul luse } 511f17e6705Spaul luse 512f17e6705Spaul luse /* Prep the command re-using the last completed command's task */ 513f17e6705Spaul luse rc = _batch_prep_cmd(worker, task, worker_batch); 514f17e6705Spaul luse if (rc) { 515f17e6705Spaul luse fprintf(stderr, "error preping command for batch\n"); 516f17e6705Spaul luse goto error; 517f17e6705Spaul luse } 518f17e6705Spaul luse 519f17e6705Spaul luse /* If this batch is full move it to the to_submit list so it gets 520f17e6705Spaul luse * submitted as batches complete. 521f17e6705Spaul luse */ 522f17e6705Spaul luse if (worker_batch->cmd_count == g_ops_per_batch) { 523f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 524f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->to_submit_batches, worker_batch, link); 525f17e6705Spaul luse } 526f17e6705Spaul luse 527f17e6705Spaul luse return; 528f17e6705Spaul luse error: 529f17e6705Spaul luse spdk_accel_batch_cancel(worker->ch, worker_batch->batch); 530f17e6705Spaul luse 531f17e6705Spaul luse } 532f17e6705Spaul luse 533f17e6705Spaul luse static void batch_done(void *cb_arg, int status); 534f17e6705Spaul luse static void 535f17e6705Spaul luse _drain_batch(struct worker_thread *worker) 536f17e6705Spaul luse { 537f17e6705Spaul luse struct accel_batch *worker_batch, *tmp; 538f17e6705Spaul luse int rc; 539f17e6705Spaul luse 540f17e6705Spaul luse /* submit any batches that were being built up. */ 541f17e6705Spaul luse TAILQ_FOREACH_SAFE(worker_batch, &worker->in_prep_batches, link, tmp) { 542f17e6705Spaul luse if (worker_batch->cmd_count == 0) { 543f17e6705Spaul luse continue; 544f17e6705Spaul luse } 545f17e6705Spaul luse worker->current_queue_depth += worker_batch->cmd_count + 1; 546f17e6705Spaul luse 547f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 548f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 549f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 550f17e6705Spaul luse if (rc == 0) { 551f17e6705Spaul luse worker_batch->cmd_count = 0; 552f17e6705Spaul luse } else { 553f17e6705Spaul luse fprintf(stderr, "error sending final batch\n"); 554f17e6705Spaul luse worker->current_queue_depth -= worker_batch->cmd_count + 1; 555f17e6705Spaul luse break; 556f17e6705Spaul luse } 557f17e6705Spaul luse } 558f17e6705Spaul luse } 559f17e6705Spaul luse 560f17e6705Spaul luse static void 561*df42f358Spaul luse batch_done(void *arg1, int status) 562f17e6705Spaul luse { 563*df42f358Spaul luse struct accel_batch *worker_batch = (struct accel_batch *)arg1; 564f17e6705Spaul luse struct worker_thread *worker = worker_batch->worker; 565f17e6705Spaul luse int rc; 566f17e6705Spaul luse 567*df42f358Spaul luse assert(worker); 568f17e6705Spaul luse assert(TAILQ_EMPTY(&worker->in_use_batches) == 0); 569f17e6705Spaul luse 570*df42f358Spaul luse if (status) { 571*df42f358Spaul luse SPDK_ERRLOG("error %d\n", status); 572f17e6705Spaul luse } 573f17e6705Spaul luse 574f17e6705Spaul luse worker->current_queue_depth--; 575f17e6705Spaul luse TAILQ_REMOVE(&worker->in_use_batches, worker_batch, link); 576f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_prep_batches, worker_batch, link); 577f17e6705Spaul luse worker_batch->batch = NULL; 578f17e6705Spaul luse worker_batch->cmd_count = 0; 579f17e6705Spaul luse 580f17e6705Spaul luse if (!worker->is_draining) { 581f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->to_submit_batches); 582f17e6705Spaul luse if (worker_batch != NULL) { 583f17e6705Spaul luse 584f17e6705Spaul luse assert(worker_batch->cmd_count == g_ops_per_batch); 585f17e6705Spaul luse 586f17e6705Spaul luse /* Add one for the batch command itself. */ 587f17e6705Spaul luse worker->current_queue_depth += g_ops_per_batch + 1; 588f17e6705Spaul luse TAILQ_REMOVE(&worker->to_submit_batches, worker_batch, link); 589f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 590f17e6705Spaul luse 591f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 592f17e6705Spaul luse if (rc) { 593f17e6705Spaul luse fprintf(stderr, "error ending batch\n"); 594f17e6705Spaul luse worker->current_queue_depth -= g_ops_per_batch + 1; 595f17e6705Spaul luse return; 596f17e6705Spaul luse } 597f17e6705Spaul luse } 598f17e6705Spaul luse } else { 599f17e6705Spaul luse _drain_batch(worker); 600f17e6705Spaul luse } 601f17e6705Spaul luse } 602f17e6705Spaul luse 603221eb3f4Spaul luse static int 604221eb3f4Spaul luse _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt) 605221eb3f4Spaul luse { 606221eb3f4Spaul luse uint32_t i; 607221eb3f4Spaul luse uint32_t ttl_len = 0; 608221eb3f4Spaul luse uint8_t *dst = (uint8_t *)_dst; 609221eb3f4Spaul luse 610221eb3f4Spaul luse for (i = 0; i < iovcnt; i++) { 611221eb3f4Spaul luse if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) { 612221eb3f4Spaul luse return -1; 613221eb3f4Spaul luse } 614221eb3f4Spaul luse dst += src_iovs[i].iov_len; 615221eb3f4Spaul luse ttl_len += src_iovs[i].iov_len; 616221eb3f4Spaul luse } 617221eb3f4Spaul luse 618221eb3f4Spaul luse if (ttl_len != iovcnt * g_xfer_size_bytes) { 619221eb3f4Spaul luse return -1; 620221eb3f4Spaul luse } 621221eb3f4Spaul luse 622221eb3f4Spaul luse return 0; 623221eb3f4Spaul luse } 624221eb3f4Spaul luse 625fab40895Spaul luse static void 626*df42f358Spaul luse accel_done(void *arg1, int status) 6279f51cf32Spaul luse { 6289f51cf32Spaul luse struct ap_task *task = arg1; 6299f51cf32Spaul luse struct worker_thread *worker = task->worker; 630e69375bfSpaul luse uint32_t sw_crc32c; 6319f51cf32Spaul luse 6329f51cf32Spaul luse assert(worker); 6339f51cf32Spaul luse assert(worker->current_queue_depth > 0); 6349f51cf32Spaul luse 635*df42f358Spaul luse if (g_verify && status == 0) { 636b9218b7aSpaul luse switch (g_workload_selection) { 637221eb3f4Spaul luse case ACCEL_COPY_CRC32C: 638221eb3f4Spaul luse sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 639221eb3f4Spaul luse if (task->crc_dst != sw_crc32c) { 640221eb3f4Spaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 641221eb3f4Spaul luse worker->xfer_failed++; 642221eb3f4Spaul luse } 643221eb3f4Spaul luse if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) { 644221eb3f4Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 645221eb3f4Spaul luse worker->xfer_failed++; 646221eb3f4Spaul luse } 647221eb3f4Spaul luse break; 648b9218b7aSpaul luse case ACCEL_CRC32C: 649b85127ccSZiye Yang sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 650a738acd5Spaul luse if (task->crc_dst != sw_crc32c) { 651e69375bfSpaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 652e69375bfSpaul luse worker->xfer_failed++; 653e69375bfSpaul luse } 654b9218b7aSpaul luse break; 655b9218b7aSpaul luse case ACCEL_COPY: 656b9218b7aSpaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 6579f51cf32Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 6589f51cf32Spaul luse worker->xfer_failed++; 659b9218b7aSpaul luse } 660b9218b7aSpaul luse break; 6610ef079c6Spaul luse case ACCEL_DUALCAST: 6620ef079c6Spaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 6630ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, first destination\n"); 6640ef079c6Spaul luse worker->xfer_failed++; 6650ef079c6Spaul luse } 6660ef079c6Spaul luse if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 6670ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, second destination\n"); 6680ef079c6Spaul luse worker->xfer_failed++; 6690ef079c6Spaul luse } 6700ef079c6Spaul luse break; 671d207237fSpaul luse case ACCEL_FILL: 672d207237fSpaul luse if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 673d207237fSpaul luse SPDK_NOTICELOG("Data miscompare\n"); 674d207237fSpaul luse worker->xfer_failed++; 675d207237fSpaul luse } 676d207237fSpaul luse break; 6778cee297cSpaul luse case ACCEL_COMPARE: 6788cee297cSpaul luse break; 679b9218b7aSpaul luse default: 680b9218b7aSpaul luse assert(false); 681b9218b7aSpaul luse break; 6829f51cf32Spaul luse } 6839f51cf32Spaul luse } 684b9218b7aSpaul luse 685b9218b7aSpaul luse if (task->expected_status == -EILSEQ) { 686*df42f358Spaul luse assert(status != 0); 687b9218b7aSpaul luse worker->injected_miscompares++; 688*df42f358Spaul luse } else if (status) { 689f17e6705Spaul luse /* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */ 690b9218b7aSpaul luse worker->xfer_failed++; 691b9218b7aSpaul luse } 692b9218b7aSpaul luse 6939f51cf32Spaul luse worker->xfer_completed++; 6949f51cf32Spaul luse worker->current_queue_depth--; 6959f51cf32Spaul luse 69640ec8e97Spaul luse if (!worker->is_draining) { 697451462f6SJim Harris TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 698451462f6SJim Harris task = _get_task(worker); 699f17e6705Spaul luse if (g_ops_per_batch == 0) { 7009f51cf32Spaul luse _submit_single(worker, task); 701ac9a1a83Spaul luse worker->current_queue_depth++; 702f17e6705Spaul luse } else { 703f17e6705Spaul luse _build_batch(worker, task); 7049f51cf32Spaul luse } 705f17e6705Spaul luse } else if (g_ops_per_batch > 0) { 706f17e6705Spaul luse _drain_batch(worker); 707b34883e0SZiye Yang } else { 708b34883e0SZiye Yang TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 709f17e6705Spaul luse } 7109f51cf32Spaul luse } 7119f51cf32Spaul luse 7129f51cf32Spaul luse static int 7139f51cf32Spaul luse dump_result(void) 7149f51cf32Spaul luse { 7159f51cf32Spaul luse uint64_t total_completed = 0; 7169f51cf32Spaul luse uint64_t total_failed = 0; 717b9218b7aSpaul luse uint64_t total_miscompared = 0; 7189f51cf32Spaul luse uint64_t total_xfer_per_sec, total_bw_in_MiBps; 7199f51cf32Spaul luse struct worker_thread *worker = g_workers; 7209f51cf32Spaul luse 721445fe74eSpaul luse printf("\nCore,Thread Transfers Bandwidth Failed Miscompares\n"); 722445fe74eSpaul luse printf("------------------------------------------------------------------------\n"); 7239f51cf32Spaul luse while (worker != NULL) { 7249f51cf32Spaul luse 7259f51cf32Spaul luse uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 7269f51cf32Spaul luse uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 7279f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 7289f51cf32Spaul luse 7299f51cf32Spaul luse total_completed += worker->xfer_completed; 7309f51cf32Spaul luse total_failed += worker->xfer_failed; 731b9218b7aSpaul luse total_miscompared += worker->injected_miscompares; 7329f51cf32Spaul luse 7339f51cf32Spaul luse if (xfer_per_sec) { 734445fe74eSpaul luse printf("%u,%u%17" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 735445fe74eSpaul luse worker->display.core, worker->display.thread, xfer_per_sec, 736b9218b7aSpaul luse bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 7379f51cf32Spaul luse } 7389f51cf32Spaul luse 7399f51cf32Spaul luse worker = worker->next; 7409f51cf32Spaul luse } 7419f51cf32Spaul luse 7429f51cf32Spaul luse total_xfer_per_sec = total_completed / g_time_in_sec; 7439f51cf32Spaul luse total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 7449f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 7459f51cf32Spaul luse 746445fe74eSpaul luse printf("=========================================================================\n"); 747445fe74eSpaul luse printf("Total:%15" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 748b9218b7aSpaul luse total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 7499f51cf32Spaul luse 7509f51cf32Spaul luse return total_failed ? 1 : 0; 7519f51cf32Spaul luse } 7529f51cf32Spaul luse 753e150f6b8SZiye Yang static inline void 754e150f6b8SZiye Yang _free_task_buffers_in_pool(struct worker_thread *worker) 755e150f6b8SZiye Yang { 756e150f6b8SZiye Yang struct ap_task *task; 757e150f6b8SZiye Yang 758e150f6b8SZiye Yang assert(worker); 759e150f6b8SZiye Yang while ((task = TAILQ_FIRST(&worker->tasks_pool))) { 760e150f6b8SZiye Yang TAILQ_REMOVE(&worker->tasks_pool, task, link); 761e150f6b8SZiye Yang _free_task_buffers(task); 762e150f6b8SZiye Yang } 763e150f6b8SZiye Yang } 764e150f6b8SZiye Yang 7659f51cf32Spaul luse static int 7669f51cf32Spaul luse _check_draining(void *arg) 7679f51cf32Spaul luse { 7689f51cf32Spaul luse struct worker_thread *worker = arg; 7699f51cf32Spaul luse 7709f51cf32Spaul luse assert(worker); 7719f51cf32Spaul luse 7729f51cf32Spaul luse if (worker->current_queue_depth == 0) { 773e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 7749f51cf32Spaul luse spdk_poller_unregister(&worker->is_draining_poller); 7759f51cf32Spaul luse unregister_worker(worker); 7769f51cf32Spaul luse } 7779f51cf32Spaul luse 7789f51cf32Spaul luse return -1; 7799f51cf32Spaul luse } 7809f51cf32Spaul luse 7819f51cf32Spaul luse static int 7829f51cf32Spaul luse _worker_stop(void *arg) 7839f51cf32Spaul luse { 7849f51cf32Spaul luse struct worker_thread *worker = arg; 7859f51cf32Spaul luse 7869f51cf32Spaul luse assert(worker); 7879f51cf32Spaul luse 7889f51cf32Spaul luse spdk_poller_unregister(&worker->stop_poller); 7899f51cf32Spaul luse 7909f51cf32Spaul luse /* now let the worker drain and check it's outstanding IO with a poller */ 7919f51cf32Spaul luse worker->is_draining = true; 792ab0bc5c2SShuhei Matsumoto worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 7939f51cf32Spaul luse 7949f51cf32Spaul luse return 0; 7959f51cf32Spaul luse } 7969f51cf32Spaul luse 7979f51cf32Spaul luse static void 798a34fc12bSpaul luse _init_thread(void *arg1) 799a34fc12bSpaul luse { 800a34fc12bSpaul luse struct worker_thread *worker; 801a34fc12bSpaul luse struct ap_task *task; 802f17e6705Spaul luse int i, rc, num_batches; 803f17e6705Spaul luse int max_per_batch; 804a34fc12bSpaul luse int remaining = g_queue_depth; 805e1bf63afSJim Harris int num_tasks = g_allocate_depth; 806f17e6705Spaul luse struct accel_batch *tmp; 807f17e6705Spaul luse struct accel_batch *worker_batch = NULL; 808445fe74eSpaul luse struct display_info *display = arg1; 809a34fc12bSpaul luse 810a34fc12bSpaul luse worker = calloc(1, sizeof(*worker)); 811a34fc12bSpaul luse if (worker == NULL) { 812a34fc12bSpaul luse fprintf(stderr, "Unable to allocate worker\n"); 813445fe74eSpaul luse free(display); 814a34fc12bSpaul luse return; 815a34fc12bSpaul luse } 816a34fc12bSpaul luse 817445fe74eSpaul luse worker->display.core = display->core; 818445fe74eSpaul luse worker->display.thread = display->thread; 819445fe74eSpaul luse free(display); 8209f51cf32Spaul luse worker->core = spdk_env_get_current_core(); 8219f51cf32Spaul luse worker->thread = spdk_get_thread(); 822eea826a2Spaul luse pthread_mutex_lock(&g_workers_lock); 823eea826a2Spaul luse g_num_workers++; 8249f51cf32Spaul luse worker->next = g_workers; 825eea826a2Spaul luse g_workers = worker; 826eea826a2Spaul luse pthread_mutex_unlock(&g_workers_lock); 8279f51cf32Spaul luse worker->ch = spdk_accel_engine_get_io_channel(); 828b9218b7aSpaul luse 829f17e6705Spaul luse TAILQ_INIT(&worker->tasks_pool); 830f17e6705Spaul luse 831f17e6705Spaul luse if (g_ops_per_batch > 0) { 832f17e6705Spaul luse 8330cecfcb1Spaul luse max_per_batch = spdk_accel_batch_get_max(worker->ch); 8340cecfcb1Spaul luse assert(max_per_batch > 0); 8350cecfcb1Spaul luse 836f17e6705Spaul luse if (g_ops_per_batch > max_per_batch) { 837f17e6705Spaul luse fprintf(stderr, "Reducing requested batch amount to max supported of %d\n", max_per_batch); 838f17e6705Spaul luse g_ops_per_batch = max_per_batch; 839f17e6705Spaul luse } 840f17e6705Spaul luse 841f17e6705Spaul luse if (g_ops_per_batch > g_queue_depth) { 842f17e6705Spaul luse fprintf(stderr, "Batch amount > queue depth, resetting to %d\n", g_queue_depth); 843f17e6705Spaul luse g_ops_per_batch = g_queue_depth; 844f17e6705Spaul luse } 845f17e6705Spaul luse 846f17e6705Spaul luse TAILQ_INIT(&worker->in_prep_batches); 847f17e6705Spaul luse TAILQ_INIT(&worker->to_submit_batches); 848f17e6705Spaul luse TAILQ_INIT(&worker->in_use_batches); 849f17e6705Spaul luse 850f17e6705Spaul luse /* A worker_batch will live on one of 3 lists: 851f17e6705Spaul luse * IN_PREP: as individual IOs complete new ones are built on on a 852f17e6705Spaul luse * worker_batch on this list until it reaches g_ops_per_batch. 853f17e6705Spaul luse * TO_SUBMIT: as batches are built up on IO completion they are moved 854f17e6705Spaul luse * to this list once they are full. This list is used in 855f17e6705Spaul luse * batch completion to start new batches. 856f17e6705Spaul luse * IN_USE: the worker_batch is outstanding and will be moved to in prep 857f17e6705Spaul luse * list when the batch is completed. 858f17e6705Spaul luse * 859f17e6705Spaul luse * So we need enough to cover Q depth loading and then one to replace 860f17e6705Spaul luse * each one of those and for when everything is outstanding there needs 861f17e6705Spaul luse * to be one extra batch to build up while the last batch is completing 862f17e6705Spaul luse * IO but before it's completed the batch command. 863f17e6705Spaul luse */ 864f17e6705Spaul luse num_batches = (g_queue_depth / g_ops_per_batch * 2) + 1; 865f17e6705Spaul luse worker->batch_base = calloc(num_batches, sizeof(struct accel_batch)); 866f17e6705Spaul luse worker_batch = worker->batch_base; 867f17e6705Spaul luse for (i = 0; i < num_batches; i++) { 868f17e6705Spaul luse worker_batch->worker = worker; 869f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_prep_batches, worker_batch, link); 870f17e6705Spaul luse worker_batch++; 871f17e6705Spaul luse } 872f17e6705Spaul luse } 873f17e6705Spaul luse 874ac9a1a83Spaul luse worker->task_base = calloc(num_tasks, sizeof(struct ap_task)); 875ac9a1a83Spaul luse if (worker->task_base == NULL) { 876ac9a1a83Spaul luse fprintf(stderr, "Could not allocate task base.\n"); 877ac9a1a83Spaul luse goto error; 8780cecfcb1Spaul luse } 879ac9a1a83Spaul luse 880ac9a1a83Spaul luse task = worker->task_base; 881ac9a1a83Spaul luse for (i = 0; i < num_tasks; i++) { 882ac9a1a83Spaul luse TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 8834cd7ca9bSJim Harris task->worker = worker; 884ac9a1a83Spaul luse if (_get_task_data_bufs(task)) { 885ac9a1a83Spaul luse fprintf(stderr, "Unable to get data bufs\n"); 886ac9a1a83Spaul luse goto error; 887ac9a1a83Spaul luse } 888ac9a1a83Spaul luse task++; 8899f51cf32Spaul luse } 8909f51cf32Spaul luse 8919f51cf32Spaul luse /* Register a poller that will stop the worker at time elapsed */ 892ab0bc5c2SShuhei Matsumoto worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 8939f51cf32Spaul luse g_time_in_sec * 1000000ULL); 8949f51cf32Spaul luse 895f17e6705Spaul luse /* If batching is enabled load up to the full Q depth before 896f17e6705Spaul luse * processing any completions, then ping pong between two batches, 897f17e6705Spaul luse * one processing and one being built up for when the other completes. 898a34fc12bSpaul luse */ 899f17e6705Spaul luse if (g_ops_per_batch > 0) { 900a34fc12bSpaul luse do { 901f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->in_prep_batches); 902f17e6705Spaul luse if (worker_batch == NULL) { 903f17e6705Spaul luse goto error; 904f17e6705Spaul luse } 905f17e6705Spaul luse 906f17e6705Spaul luse worker_batch->batch = spdk_accel_batch_create(worker->ch); 907f17e6705Spaul luse if (worker_batch->batch == NULL) { 908f17e6705Spaul luse raise(SIGINT); 909a34fc12bSpaul luse break; 910a34fc12bSpaul luse } 911a34fc12bSpaul luse 912f17e6705Spaul luse for (i = 0; i < g_ops_per_batch; i++) { 913ac9a1a83Spaul luse task = _get_task(worker); 9144cd7ca9bSJim Harris worker->current_queue_depth++; 915ac9a1a83Spaul luse if (task == NULL) { 916a34fc12bSpaul luse goto error; 9179f51cf32Spaul luse } 918b9218b7aSpaul luse 919f17e6705Spaul luse rc = _batch_prep_cmd(worker, task, worker_batch); 920a34fc12bSpaul luse if (rc) { 921a34fc12bSpaul luse fprintf(stderr, "error preping command\n"); 922a34fc12bSpaul luse goto error; 923a34fc12bSpaul luse } 924a34fc12bSpaul luse } 925a34fc12bSpaul luse 926f17e6705Spaul luse /* for the batch operation itself. */ 927f17e6705Spaul luse task->worker->current_queue_depth++; 928f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 929f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 930f17e6705Spaul luse 931f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 932a34fc12bSpaul luse if (rc) { 933f17e6705Spaul luse fprintf(stderr, "error ending batch\n"); 934a34fc12bSpaul luse goto error; 935a34fc12bSpaul luse } 936f17e6705Spaul luse assert(remaining >= g_ops_per_batch); 937f17e6705Spaul luse remaining -= g_ops_per_batch; 938f17e6705Spaul luse } while (remaining > 0); 939b9218b7aSpaul luse } 9400ef079c6Spaul luse 941f17e6705Spaul luse /* Submit as singles when no batching is enabled or we ran out of batches. */ 942a34fc12bSpaul luse for (i = 0; i < remaining; i++) { 943ac9a1a83Spaul luse task = _get_task(worker); 9444cd7ca9bSJim Harris worker->current_queue_depth++; 945ac9a1a83Spaul luse if (task == NULL) { 946a34fc12bSpaul luse goto error; 947b9218b7aSpaul luse } 948b9218b7aSpaul luse 9499f51cf32Spaul luse _submit_single(worker, task); 9509f51cf32Spaul luse } 951a34fc12bSpaul luse return; 952a34fc12bSpaul luse error: 953f17e6705Spaul luse if (worker_batch && worker_batch->batch) { 954f17e6705Spaul luse TAILQ_FOREACH_SAFE(worker_batch, &worker->in_use_batches, link, tmp) { 955f17e6705Spaul luse spdk_accel_batch_cancel(worker->ch, worker_batch->batch); 956f17e6705Spaul luse TAILQ_REMOVE(&worker->in_use_batches, worker_batch, link); 957f17e6705Spaul luse } 958f17e6705Spaul luse } 959e150f6b8SZiye Yang 960e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 961f17e6705Spaul luse free(worker->batch_base); 962ac9a1a83Spaul luse free(worker->task_base); 963a34fc12bSpaul luse free(worker); 964a34fc12bSpaul luse spdk_app_stop(-1); 9659f51cf32Spaul luse } 9669f51cf32Spaul luse 967c2605379SZiye Yang static inline void 968c2605379SZiye Yang identify_accel_engine_usage(void) 969c2605379SZiye Yang { 970c2605379SZiye Yang struct spdk_io_channel *ch; 971c2605379SZiye Yang uint64_t capabilities; 972c2605379SZiye Yang 973c2605379SZiye Yang ch = spdk_accel_engine_get_io_channel(); 974c2605379SZiye Yang assert(ch != NULL); 975c2605379SZiye Yang 976c2605379SZiye Yang capabilities = spdk_accel_get_capabilities(ch); 977c2605379SZiye Yang if ((capabilities & g_workload_selection) != g_workload_selection) { 978c2605379SZiye Yang SPDK_WARNLOG("The selected workload is not natively supported by the current engine\n"); 979c2605379SZiye Yang SPDK_WARNLOG("The software engine will be used instead.\n\n"); 980c2605379SZiye Yang } 981c2605379SZiye Yang 982c2605379SZiye Yang spdk_put_io_channel(ch); 983c2605379SZiye Yang } 984c2605379SZiye Yang 9859f51cf32Spaul luse static void 9869f51cf32Spaul luse accel_perf_start(void *arg1) 9879f51cf32Spaul luse { 988eea826a2Spaul luse struct spdk_cpuset tmp_cpumask = {}; 989eea826a2Spaul luse char thread_name[32]; 990eea826a2Spaul luse uint32_t i; 991445fe74eSpaul luse int j; 992eea826a2Spaul luse struct spdk_thread *thread; 993445fe74eSpaul luse struct display_info *display; 994514be889Spaul luse 995c2605379SZiye Yang identify_accel_engine_usage(); 996c2605379SZiye Yang 9979f51cf32Spaul luse g_tsc_rate = spdk_get_ticks_hz(); 9989f51cf32Spaul luse g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 9999f51cf32Spaul luse 10009f51cf32Spaul luse printf("Running for %d seconds...\n", g_time_in_sec); 10019f51cf32Spaul luse fflush(stdout); 10029f51cf32Spaul luse 1003eea826a2Spaul luse /* Create worker threads for each core that was specified. */ 1004eea826a2Spaul luse SPDK_ENV_FOREACH_CORE(i) { 1005445fe74eSpaul luse for (j = 0; j < g_threads_per_core; j++) { 1006445fe74eSpaul luse snprintf(thread_name, sizeof(thread_name), "ap_worker_%u_%u", i, j); 1007eea826a2Spaul luse spdk_cpuset_zero(&tmp_cpumask); 1008eea826a2Spaul luse spdk_cpuset_set_cpu(&tmp_cpumask, i, true); 1009eea826a2Spaul luse thread = spdk_thread_create(thread_name, &tmp_cpumask); 1010445fe74eSpaul luse display = calloc(1, sizeof(*display)); 1011445fe74eSpaul luse if (display == NULL) { 1012445fe74eSpaul luse fprintf(stderr, "Unable to allocate memory\n"); 1013445fe74eSpaul luse spdk_app_stop(-1); 1014445fe74eSpaul luse return; 1015445fe74eSpaul luse } 1016445fe74eSpaul luse display->core = i; 1017445fe74eSpaul luse display->thread = j; 1018445fe74eSpaul luse spdk_thread_send_msg(thread, _init_thread, display); 1019445fe74eSpaul luse } 1020eea826a2Spaul luse } 10219f51cf32Spaul luse } 10229f51cf32Spaul luse 10239f51cf32Spaul luse int 10249f51cf32Spaul luse main(int argc, char **argv) 10259f51cf32Spaul luse { 10269f51cf32Spaul luse struct spdk_app_opts opts = {}; 10279f51cf32Spaul luse struct worker_thread *worker, *tmp; 10289f51cf32Spaul luse 10299f51cf32Spaul luse pthread_mutex_init(&g_workers_lock, NULL); 103048701bd9SZiye Yang spdk_app_opts_init(&opts, sizeof(opts)); 10319f51cf32Spaul luse opts.reactor_mask = "0x1"; 1032e1bf63afSJim Harris if (spdk_app_parse_args(argc, argv, &opts, "a:C:o:q:t:yw:P:f:b:T:", NULL, parse_args, 10331e2b38baSyidong0635 usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { 10349b189667Spaul luse g_rc = -1; 10359f51cf32Spaul luse goto cleanup; 10369f51cf32Spaul luse } 10379f51cf32Spaul luse 1038b9218b7aSpaul luse if ((g_workload_selection != ACCEL_COPY) && 1039b9218b7aSpaul luse (g_workload_selection != ACCEL_FILL) && 1040b9218b7aSpaul luse (g_workload_selection != ACCEL_CRC32C) && 1041221eb3f4Spaul luse (g_workload_selection != ACCEL_COPY_CRC32C) && 10420ef079c6Spaul luse (g_workload_selection != ACCEL_COMPARE) && 10430ef079c6Spaul luse (g_workload_selection != ACCEL_DUALCAST)) { 10442a0c66d0Spaul luse usage(); 10459b189667Spaul luse g_rc = -1; 10462a0c66d0Spaul luse goto cleanup; 10472a0c66d0Spaul luse } 10482a0c66d0Spaul luse 1049f17e6705Spaul luse if (g_ops_per_batch > 0 && (g_queue_depth % g_ops_per_batch > 0)) { 1050f17e6705Spaul luse fprintf(stdout, "batch size must be a multiple of queue depth\n"); 1051f17e6705Spaul luse usage(); 10529b189667Spaul luse g_rc = -1; 1053f17e6705Spaul luse goto cleanup; 1054f17e6705Spaul luse } 1055f17e6705Spaul luse 1056e1bf63afSJim Harris if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 1057e1bf63afSJim Harris fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 1058e1bf63afSJim Harris usage(); 1059e1bf63afSJim Harris g_rc = -1; 1060e1bf63afSJim Harris goto cleanup; 1061e1bf63afSJim Harris } 1062e1bf63afSJim Harris 1063e1bf63afSJim Harris if (g_allocate_depth == 0) { 1064e1bf63afSJim Harris g_allocate_depth = g_queue_depth; 1065e1bf63afSJim Harris } 1066e1bf63afSJim Harris 1067221eb3f4Spaul luse if ((g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) && 106888754353SZiye Yang g_crc32c_chained_count == 0) { 106988754353SZiye Yang usage(); 107088754353SZiye Yang g_rc = -1; 107188754353SZiye Yang goto cleanup; 107288754353SZiye Yang } 107388754353SZiye Yang 10749f51cf32Spaul luse dump_user_config(&opts); 10759b189667Spaul luse g_rc = spdk_app_start(&opts, accel_perf_start, NULL); 10769b189667Spaul luse if (g_rc) { 10779f51cf32Spaul luse SPDK_ERRLOG("ERROR starting application\n"); 10789f51cf32Spaul luse } 10799f51cf32Spaul luse 10809f51cf32Spaul luse pthread_mutex_destroy(&g_workers_lock); 10819f51cf32Spaul luse 10829f51cf32Spaul luse worker = g_workers; 10839f51cf32Spaul luse while (worker) { 10849f51cf32Spaul luse tmp = worker->next; 10859f51cf32Spaul luse free(worker); 10869f51cf32Spaul luse worker = tmp; 10879f51cf32Spaul luse } 10889f51cf32Spaul luse cleanup: 10899f51cf32Spaul luse spdk_app_fini(); 10909b189667Spaul luse return g_rc; 10919f51cf32Spaul luse } 1092