19f51cf32Spaul luse /*- 29f51cf32Spaul luse * BSD LICENSE 39f51cf32Spaul luse * 49f51cf32Spaul luse * Copyright (c) Intel Corporation. 59f51cf32Spaul luse * All rights reserved. 69f51cf32Spaul luse * 79f51cf32Spaul luse * Redistribution and use in source and binary forms, with or without 89f51cf32Spaul luse * modification, are permitted provided that the following conditions 99f51cf32Spaul luse * are met: 109f51cf32Spaul luse * 119f51cf32Spaul luse * * Redistributions of source code must retain the above copyright 129f51cf32Spaul luse * notice, this list of conditions and the following disclaimer. 139f51cf32Spaul luse * * Redistributions in binary form must reproduce the above copyright 149f51cf32Spaul luse * notice, this list of conditions and the following disclaimer in 159f51cf32Spaul luse * the documentation and/or other materials provided with the 169f51cf32Spaul luse * distribution. 179f51cf32Spaul luse * * Neither the name of Intel Corporation nor the names of its 189f51cf32Spaul luse * contributors may be used to endorse or promote products derived 199f51cf32Spaul luse * from this software without specific prior written permission. 209f51cf32Spaul luse * 219f51cf32Spaul luse * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 229f51cf32Spaul luse * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 239f51cf32Spaul luse * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 249f51cf32Spaul luse * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 259f51cf32Spaul luse * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 269f51cf32Spaul luse * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 279f51cf32Spaul luse * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 289f51cf32Spaul luse * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 299f51cf32Spaul luse * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 309f51cf32Spaul luse * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 319f51cf32Spaul luse * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 329f51cf32Spaul luse */ 339f51cf32Spaul luse 349f51cf32Spaul luse #include "spdk/stdinc.h" 359f51cf32Spaul luse #include "spdk/thread.h" 369f51cf32Spaul luse #include "spdk/env.h" 379f51cf32Spaul luse #include "spdk/event.h" 389f51cf32Spaul luse #include "spdk/log.h" 399f51cf32Spaul luse #include "spdk/string.h" 409f51cf32Spaul luse #include "spdk/accel_engine.h" 41e69375bfSpaul luse #include "spdk/crc32.h" 420cecfcb1Spaul luse #include "spdk/util.h" 439f51cf32Spaul luse 44b9218b7aSpaul luse #define DATA_PATTERN 0x5a 450ef079c6Spaul luse #define ALIGN_4K 0x1000 46b9218b7aSpaul luse 473e2e2a57Spaul luse static bool g_using_sw_engine = false; 489f51cf32Spaul luse static uint64_t g_tsc_rate; 499f51cf32Spaul luse static uint64_t g_tsc_end; 509b189667Spaul luse static int g_rc; 519f51cf32Spaul luse static int g_xfer_size_bytes = 4096; 529f51cf32Spaul luse static int g_queue_depth = 32; 53e1bf63afSJim Harris /* g_allocate_depth indicates how many tasks we allocate per worker. It will 54e1bf63afSJim Harris * be at least as much as the queue depth. 55e1bf63afSJim Harris */ 56e1bf63afSJim Harris static int g_allocate_depth = 0; 57f17e6705Spaul luse static int g_ops_per_batch = 0; 58445fe74eSpaul luse static int g_threads_per_core = 1; 599f51cf32Spaul luse static int g_time_in_sec = 5; 60e69375bfSpaul luse static uint32_t g_crc32c_seed = 0; 6188754353SZiye Yang static uint32_t g_crc32c_chained_count = 1; 62b9218b7aSpaul luse static int g_fail_percent_goal = 0; 6389495464Spaul luse static uint8_t g_fill_pattern = 255; 649f51cf32Spaul luse static bool g_verify = false; 652a0c66d0Spaul luse static const char *g_workload_type = NULL; 66514be889Spaul luse static enum accel_capability g_workload_selection; 679f51cf32Spaul luse static struct worker_thread *g_workers = NULL; 689f51cf32Spaul luse static int g_num_workers = 0; 699f51cf32Spaul luse static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; 70cdefd3d3Spaul luse 71cdefd3d3Spaul luse struct worker_thread; 72cdefd3d3Spaul luse static void accel_done(void *ref, int status); 73cdefd3d3Spaul luse 74445fe74eSpaul luse struct display_info { 75445fe74eSpaul luse int core; 76445fe74eSpaul luse int thread; 77445fe74eSpaul luse }; 78445fe74eSpaul luse 79cdefd3d3Spaul luse struct ap_task { 80cdefd3d3Spaul luse void *src; 8188754353SZiye Yang struct iovec *iovs; 8288754353SZiye Yang uint32_t iov_cnt; 83cdefd3d3Spaul luse void *dst; 84cdefd3d3Spaul luse void *dst2; 85cdefd3d3Spaul luse struct worker_thread *worker; 86cdefd3d3Spaul luse int status; 87cdefd3d3Spaul luse int expected_status; /* used for the compare operation */ 88cdefd3d3Spaul luse TAILQ_ENTRY(ap_task) link; 89cdefd3d3Spaul luse }; 909f51cf32Spaul luse 91f17e6705Spaul luse struct accel_batch { 92f17e6705Spaul luse int status; 93f17e6705Spaul luse int cmd_count; 94f17e6705Spaul luse struct spdk_accel_batch *batch; 95f17e6705Spaul luse struct worker_thread *worker; 96f17e6705Spaul luse TAILQ_ENTRY(accel_batch) link; 97f17e6705Spaul luse }; 98f17e6705Spaul luse 999f51cf32Spaul luse struct worker_thread { 1009f51cf32Spaul luse struct spdk_io_channel *ch; 1019f51cf32Spaul luse uint64_t xfer_completed; 1029f51cf32Spaul luse uint64_t xfer_failed; 103b9218b7aSpaul luse uint64_t injected_miscompares; 1049f51cf32Spaul luse uint64_t current_queue_depth; 105ac9a1a83Spaul luse TAILQ_HEAD(, ap_task) tasks_pool; 1069f51cf32Spaul luse struct worker_thread *next; 1079f51cf32Spaul luse unsigned core; 1089f51cf32Spaul luse struct spdk_thread *thread; 1099f51cf32Spaul luse bool is_draining; 1109f51cf32Spaul luse struct spdk_poller *is_draining_poller; 1119f51cf32Spaul luse struct spdk_poller *stop_poller; 112ac9a1a83Spaul luse void *task_base; 113f17e6705Spaul luse struct accel_batch *batch_base; 114445fe74eSpaul luse struct display_info display; 115f17e6705Spaul luse TAILQ_HEAD(, accel_batch) in_prep_batches; 116f17e6705Spaul luse TAILQ_HEAD(, accel_batch) in_use_batches; 117f17e6705Spaul luse TAILQ_HEAD(, accel_batch) to_submit_batches; 1189f51cf32Spaul luse }; 1199f51cf32Spaul luse 1209f51cf32Spaul luse static void 1219f51cf32Spaul luse dump_user_config(struct spdk_app_opts *opts) 1229f51cf32Spaul luse { 1239f51cf32Spaul luse printf("SPDK Configuration:\n"); 1249f51cf32Spaul luse printf("Core mask: %s\n\n", opts->reactor_mask); 1259f51cf32Spaul luse printf("Accel Perf Configuration:\n"); 1262a0c66d0Spaul luse printf("Workload Type: %s\n", g_workload_type); 127b9218b7aSpaul luse if (g_workload_selection == ACCEL_CRC32C) { 128b9218b7aSpaul luse printf("CRC-32C seed: %u\n", g_crc32c_seed); 12988754353SZiye Yang printf("vector size: %u\n", g_crc32c_chained_count); 13089495464Spaul luse } else if (g_workload_selection == ACCEL_FILL) { 13189495464Spaul luse printf("Fill pattern: 0x%x\n", g_fill_pattern); 132b9218b7aSpaul luse } else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) { 13389495464Spaul luse printf("Failure inject: %u percent\n", g_fail_percent_goal); 134e69375bfSpaul luse } 1359f51cf32Spaul luse printf("Transfer size: %u bytes\n", g_xfer_size_bytes); 1369f51cf32Spaul luse printf("Queue depth: %u\n", g_queue_depth); 137e1bf63afSJim Harris printf("Allocate depth: %u\n", g_allocate_depth); 138445fe74eSpaul luse printf("# threads/core: %u\n", g_threads_per_core); 1399f51cf32Spaul luse printf("Run time: %u seconds\n", g_time_in_sec); 140f17e6705Spaul luse if (g_ops_per_batch > 0) { 141f17e6705Spaul luse printf("Batching: %u operations\n", g_ops_per_batch); 142f17e6705Spaul luse } else { 143f17e6705Spaul luse printf("Batching: Disabled\n"); 144f17e6705Spaul luse } 1459f51cf32Spaul luse printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); 1469f51cf32Spaul luse } 1479f51cf32Spaul luse 1489f51cf32Spaul luse static void 1499f51cf32Spaul luse usage(void) 1509f51cf32Spaul luse { 1519f51cf32Spaul luse printf("accel_perf options:\n"); 1529f51cf32Spaul luse printf("\t[-h help message]\n"); 153f17e6705Spaul luse printf("\t[-q queue depth per core]\n"); 15488754353SZiye Yang printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n"); 155445fe74eSpaul luse printf("\t[-T number of threads per core\n"); 15688754353SZiye Yang printf("\t[-n number of channels]\n"); 1579f51cf32Spaul luse printf("\t[-o transfer size in bytes]\n"); 1589f51cf32Spaul luse printf("\t[-t time in seconds]\n"); 1590ef079c6Spaul luse printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n"); 160e69375bfSpaul luse printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); 161b9218b7aSpaul luse printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); 16289495464Spaul luse printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); 1632a0c66d0Spaul luse printf("\t[-y verify result if this switch is on]\n"); 164f17e6705Spaul luse printf("\t[-b batch this number of operations at a time (default 0 = disabled)]\n"); 165e1bf63afSJim Harris printf("\t[-a tasks to allocate per core (default: same value as -q)]\n"); 166e1bf63afSJim Harris printf("\t\tCan be used to spread operations across a wider range of memory.\n"); 1679f51cf32Spaul luse } 1689f51cf32Spaul luse 1699f51cf32Spaul luse static int 1709f51cf32Spaul luse parse_args(int argc, char *argv) 1719f51cf32Spaul luse { 172*358b84b4SZiye Yang int argval = 0; 173c82d5789SJim Harris 1749f51cf32Spaul luse switch (argc) { 175e1bf63afSJim Harris case 'a': 176f17e6705Spaul luse case 'b': 177c82d5789SJim Harris case 'C': 178c82d5789SJim Harris case 'f': 179c82d5789SJim Harris case 'T': 180c82d5789SJim Harris case 'o': 181c82d5789SJim Harris case 'P': 182c82d5789SJim Harris case 'q': 183c82d5789SJim Harris case 's': 184c82d5789SJim Harris case 't': 185c82d5789SJim Harris argval = spdk_strtol(optarg, 10); 186c82d5789SJim Harris if (argval < 0) { 187c82d5789SJim Harris fprintf(stderr, "-%c option must be non-negative.\n", argc); 188c82d5789SJim Harris usage(); 189c82d5789SJim Harris return 1; 190c82d5789SJim Harris } 191c82d5789SJim Harris break; 192c82d5789SJim Harris default: 193c82d5789SJim Harris break; 194c82d5789SJim Harris }; 195c82d5789SJim Harris 196c82d5789SJim Harris switch (argc) { 197e1bf63afSJim Harris case 'a': 198e1bf63afSJim Harris g_allocate_depth = argval; 199e1bf63afSJim Harris break; 200c82d5789SJim Harris case 'b': 201c82d5789SJim Harris g_ops_per_batch = argval; 202f17e6705Spaul luse break; 20388754353SZiye Yang case 'C': 204c82d5789SJim Harris g_crc32c_chained_count = argval; 20588754353SZiye Yang break; 20689495464Spaul luse case 'f': 207c82d5789SJim Harris g_fill_pattern = (uint8_t)argval; 20889495464Spaul luse break; 209445fe74eSpaul luse case 'T': 210c82d5789SJim Harris g_threads_per_core = argval; 211445fe74eSpaul luse break; 2129f51cf32Spaul luse case 'o': 213c82d5789SJim Harris g_xfer_size_bytes = argval; 2149f51cf32Spaul luse break; 215b9218b7aSpaul luse case 'P': 216c82d5789SJim Harris g_fail_percent_goal = argval; 217b9218b7aSpaul luse break; 2189f51cf32Spaul luse case 'q': 219c82d5789SJim Harris g_queue_depth = argval; 2209f51cf32Spaul luse break; 221e69375bfSpaul luse case 's': 222c82d5789SJim Harris g_crc32c_seed = argval; 223e69375bfSpaul luse break; 2249f51cf32Spaul luse case 't': 225c82d5789SJim Harris g_time_in_sec = argval; 2269f51cf32Spaul luse break; 2279f51cf32Spaul luse case 'y': 2289f51cf32Spaul luse g_verify = true; 2299f51cf32Spaul luse break; 2302a0c66d0Spaul luse case 'w': 2312a0c66d0Spaul luse g_workload_type = optarg; 232514be889Spaul luse if (!strcmp(g_workload_type, "copy")) { 233514be889Spaul luse g_workload_selection = ACCEL_COPY; 234514be889Spaul luse } else if (!strcmp(g_workload_type, "fill")) { 235514be889Spaul luse g_workload_selection = ACCEL_FILL; 236e69375bfSpaul luse } else if (!strcmp(g_workload_type, "crc32c")) { 237e69375bfSpaul luse g_workload_selection = ACCEL_CRC32C; 238b9218b7aSpaul luse } else if (!strcmp(g_workload_type, "compare")) { 239b9218b7aSpaul luse g_workload_selection = ACCEL_COMPARE; 2400ef079c6Spaul luse } else if (!strcmp(g_workload_type, "dualcast")) { 2410ef079c6Spaul luse g_workload_selection = ACCEL_DUALCAST; 242514be889Spaul luse } 2432a0c66d0Spaul luse break; 2449f51cf32Spaul luse default: 2459f51cf32Spaul luse usage(); 2469f51cf32Spaul luse return 1; 2479f51cf32Spaul luse } 24888754353SZiye Yang 2499f51cf32Spaul luse return 0; 2509f51cf32Spaul luse } 2519f51cf32Spaul luse 252eea826a2Spaul luse static int dump_result(void); 2539f51cf32Spaul luse static void 2549f51cf32Spaul luse unregister_worker(void *arg1) 2559f51cf32Spaul luse { 2569f51cf32Spaul luse struct worker_thread *worker = arg1; 2579f51cf32Spaul luse 258ac9a1a83Spaul luse free(worker->task_base); 259f17e6705Spaul luse free(worker->batch_base); 2609f51cf32Spaul luse spdk_put_io_channel(worker->ch); 2619f51cf32Spaul luse pthread_mutex_lock(&g_workers_lock); 2629f51cf32Spaul luse assert(g_num_workers >= 1); 2639f51cf32Spaul luse if (--g_num_workers == 0) { 2649f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2659b189667Spaul luse g_rc = dump_result(); 2669f51cf32Spaul luse spdk_app_stop(0); 2679f51cf32Spaul luse } 2689f51cf32Spaul luse pthread_mutex_unlock(&g_workers_lock); 2699f51cf32Spaul luse } 2709f51cf32Spaul luse 2718da995c4Spaul luse static int 2728da995c4Spaul luse _get_task_data_bufs(struct ap_task *task) 2738da995c4Spaul luse { 2748da995c4Spaul luse uint32_t align = 0; 27588754353SZiye Yang uint32_t i = 0; 2768da995c4Spaul luse 2778da995c4Spaul luse /* For dualcast, the DSA HW requires 4K alignment on destination addresses but 2788da995c4Spaul luse * we do this for all engines to keep it simple. 2798da995c4Spaul luse */ 2808da995c4Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 2818da995c4Spaul luse align = ALIGN_4K; 2828da995c4Spaul luse } 2838da995c4Spaul luse 28488754353SZiye Yang if (g_workload_selection == ACCEL_CRC32C) { 28588754353SZiye Yang assert(g_crc32c_chained_count > 0); 28688754353SZiye Yang task->iov_cnt = g_crc32c_chained_count; 28788754353SZiye Yang task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); 28888754353SZiye Yang if (!task->iovs) { 28988754353SZiye Yang fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); 29088754353SZiye Yang return -ENOMEM; 29188754353SZiye Yang } 29288754353SZiye Yang 29388754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 29488754353SZiye Yang task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 29588754353SZiye Yang if (task->iovs[i].iov_base == NULL) { 29688754353SZiye Yang return -ENOMEM; 29788754353SZiye Yang } 29888754353SZiye Yang memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); 29988754353SZiye Yang task->iovs[i].iov_len = g_xfer_size_bytes; 30088754353SZiye Yang } 30188754353SZiye Yang 30288754353SZiye Yang } else { 3038da995c4Spaul luse task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); 3048da995c4Spaul luse if (task->src == NULL) { 3058da995c4Spaul luse fprintf(stderr, "Unable to alloc src buffer\n"); 3068da995c4Spaul luse return -ENOMEM; 3078da995c4Spaul luse } 30888754353SZiye Yang 30988754353SZiye Yang /* For fill, set the entire src buffer so we can check if verify is enabled. */ 31088754353SZiye Yang if (g_workload_selection == ACCEL_FILL) { 31188754353SZiye Yang memset(task->src, g_fill_pattern, g_xfer_size_bytes); 31288754353SZiye Yang } else { 3138da995c4Spaul luse memset(task->src, DATA_PATTERN, g_xfer_size_bytes); 31488754353SZiye Yang } 31588754353SZiye Yang } 3168da995c4Spaul luse 3178da995c4Spaul luse task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3188da995c4Spaul luse if (task->dst == NULL) { 3198da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3208da995c4Spaul luse return -ENOMEM; 3218da995c4Spaul luse } 3228da995c4Spaul luse 3238da995c4Spaul luse /* For compare we want the buffers to match, otherwise not. */ 3248da995c4Spaul luse if (g_workload_selection == ACCEL_COMPARE) { 3258da995c4Spaul luse memset(task->dst, DATA_PATTERN, g_xfer_size_bytes); 3268da995c4Spaul luse } else { 3278da995c4Spaul luse memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes); 3288da995c4Spaul luse } 3298da995c4Spaul luse 3308da995c4Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 3318da995c4Spaul luse task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); 3328da995c4Spaul luse if (task->dst2 == NULL) { 3338da995c4Spaul luse fprintf(stderr, "Unable to alloc dst buffer\n"); 3348da995c4Spaul luse return -ENOMEM; 3358da995c4Spaul luse } 3368da995c4Spaul luse memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); 3378da995c4Spaul luse } 3388da995c4Spaul luse 3398da995c4Spaul luse return 0; 3408da995c4Spaul luse } 3418da995c4Spaul luse 342ac9a1a83Spaul luse inline static struct ap_task * 343ac9a1a83Spaul luse _get_task(struct worker_thread *worker) 344ac9a1a83Spaul luse { 345ac9a1a83Spaul luse struct ap_task *task; 346ac9a1a83Spaul luse 347ac9a1a83Spaul luse if (!TAILQ_EMPTY(&worker->tasks_pool)) { 348ac9a1a83Spaul luse task = TAILQ_FIRST(&worker->tasks_pool); 349ac9a1a83Spaul luse TAILQ_REMOVE(&worker->tasks_pool, task, link); 350ac9a1a83Spaul luse } else { 351ac9a1a83Spaul luse fprintf(stderr, "Unable to get ap_task\n"); 352ac9a1a83Spaul luse return NULL; 353ac9a1a83Spaul luse } 354ac9a1a83Spaul luse 355ac9a1a83Spaul luse return task; 356ac9a1a83Spaul luse } 357ac9a1a83Spaul luse 358f17e6705Spaul luse /* Submit one operation using the same ap task that just completed. */ 3599f51cf32Spaul luse static void 360ac9a1a83Spaul luse _submit_single(struct worker_thread *worker, struct ap_task *task) 3619f51cf32Spaul luse { 362b9218b7aSpaul luse int random_num; 36340ec8e97Spaul luse int rc = 0; 3649f51cf32Spaul luse 3659f51cf32Spaul luse assert(worker); 3669f51cf32Spaul luse 367e69375bfSpaul luse switch (g_workload_selection) { 368e69375bfSpaul luse case ACCEL_COPY: 369e8463f87Spaul luse rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, 370e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 371e69375bfSpaul luse break; 372e69375bfSpaul luse case ACCEL_FILL: 3732a0c66d0Spaul luse /* For fill use the first byte of the task->dst buffer */ 374ee7e31f9Spaul luse rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, 375e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 376e69375bfSpaul luse break; 377e69375bfSpaul luse case ACCEL_CRC32C: 37890c56d96SZiye Yang rc = spdk_accel_submit_crc32cv(worker->ch, (uint32_t *)task->dst, 37988754353SZiye Yang task->iovs, task->iov_cnt, g_crc32c_seed, 38090c56d96SZiye Yang accel_done, task); 381e69375bfSpaul luse break; 382b9218b7aSpaul luse case ACCEL_COMPARE: 383b9218b7aSpaul luse random_num = rand() % 100; 384b9218b7aSpaul luse if (random_num < g_fail_percent_goal) { 385b9218b7aSpaul luse task->expected_status = -EILSEQ; 386b9218b7aSpaul luse *(uint8_t *)task->dst = ~DATA_PATTERN; 387b9218b7aSpaul luse } else { 388b9218b7aSpaul luse task->expected_status = 0; 389b9218b7aSpaul luse *(uint8_t *)task->dst = DATA_PATTERN; 390b9218b7aSpaul luse } 391ee7e31f9Spaul luse rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, 392e8463f87Spaul luse g_xfer_size_bytes, accel_done, task); 393b9218b7aSpaul luse break; 3940ef079c6Spaul luse case ACCEL_DUALCAST: 395ee7e31f9Spaul luse rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, 396e8463f87Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 3970ef079c6Spaul luse break; 398e69375bfSpaul luse default: 3992a0c66d0Spaul luse assert(false); 400e69375bfSpaul luse break; 401e69375bfSpaul luse 4022a0c66d0Spaul luse } 40340ec8e97Spaul luse 40440ec8e97Spaul luse if (rc) { 405e8463f87Spaul luse accel_done(task, rc); 40640ec8e97Spaul luse } 4079f51cf32Spaul luse } 4089f51cf32Spaul luse 409fab40895Spaul luse static int 410f17e6705Spaul luse _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task, 411f17e6705Spaul luse struct accel_batch *worker_batch) 412fab40895Spaul luse { 413f17e6705Spaul luse struct spdk_accel_batch *batch = worker_batch->batch; 414fab40895Spaul luse int rc = 0; 415fab40895Spaul luse 416f17e6705Spaul luse worker_batch->cmd_count++; 417f17e6705Spaul luse assert(worker_batch->cmd_count <= g_ops_per_batch); 418f17e6705Spaul luse 419fab40895Spaul luse switch (g_workload_selection) { 420fab40895Spaul luse case ACCEL_COPY: 421fab40895Spaul luse rc = spdk_accel_batch_prep_copy(worker->ch, batch, task->dst, 422fab40895Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 423fab40895Spaul luse break; 424fab40895Spaul luse case ACCEL_DUALCAST: 425fab40895Spaul luse rc = spdk_accel_batch_prep_dualcast(worker->ch, batch, task->dst, task->dst2, 426fab40895Spaul luse task->src, g_xfer_size_bytes, accel_done, task); 427fab40895Spaul luse break; 428fab40895Spaul luse case ACCEL_COMPARE: 429fab40895Spaul luse rc = spdk_accel_batch_prep_compare(worker->ch, batch, task->dst, task->src, 430fab40895Spaul luse g_xfer_size_bytes, accel_done, task); 431fab40895Spaul luse break; 432fab40895Spaul luse case ACCEL_FILL: 433fab40895Spaul luse rc = spdk_accel_batch_prep_fill(worker->ch, batch, task->dst, 434fab40895Spaul luse *(uint8_t *)task->src, 435fab40895Spaul luse g_xfer_size_bytes, accel_done, task); 436fab40895Spaul luse break; 437fab40895Spaul luse case ACCEL_CRC32C: 43890c56d96SZiye Yang rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst, 43988754353SZiye Yang task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task); 440fab40895Spaul luse break; 441fab40895Spaul luse default: 442fab40895Spaul luse assert(false); 443fab40895Spaul luse break; 444fab40895Spaul luse } 445fab40895Spaul luse 446fab40895Spaul luse return rc; 447fab40895Spaul luse } 448fab40895Spaul luse 4499f51cf32Spaul luse static void 450e150f6b8SZiye Yang _free_task_buffers(struct ap_task *task) 451ac9a1a83Spaul luse { 45288754353SZiye Yang uint32_t i; 45388754353SZiye Yang 45488754353SZiye Yang if (g_workload_selection == ACCEL_CRC32C) { 45588754353SZiye Yang if (task->iovs) { 45688754353SZiye Yang for (i = 0; i < task->iov_cnt; i++) { 45788754353SZiye Yang if (task->iovs[i].iov_base) { 45888754353SZiye Yang spdk_dma_free(task->iovs[i].iov_base); 45988754353SZiye Yang } 46088754353SZiye Yang } 46188754353SZiye Yang free(task->iovs); 46288754353SZiye Yang } 46388754353SZiye Yang } else { 464ac9a1a83Spaul luse spdk_dma_free(task->src); 46588754353SZiye Yang } 46688754353SZiye Yang 467ac9a1a83Spaul luse spdk_dma_free(task->dst); 468ac9a1a83Spaul luse if (g_workload_selection == ACCEL_DUALCAST) { 469ac9a1a83Spaul luse spdk_dma_free(task->dst2); 470ac9a1a83Spaul luse } 471ac9a1a83Spaul luse } 472ac9a1a83Spaul luse 473f17e6705Spaul luse static void _batch_done(void *cb_arg); 474f17e6705Spaul luse static void 475f17e6705Spaul luse _build_batch(struct worker_thread *worker, struct ap_task *task) 476f17e6705Spaul luse { 477f17e6705Spaul luse struct accel_batch *worker_batch = NULL; 478f17e6705Spaul luse int rc; 479f17e6705Spaul luse 480f17e6705Spaul luse assert(!TAILQ_EMPTY(&worker->in_prep_batches)); 481f17e6705Spaul luse 482f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->in_prep_batches); 483f17e6705Spaul luse 484f17e6705Spaul luse /* If an accel batch hasn't been created yet do so now. */ 485f17e6705Spaul luse if (worker_batch->batch == NULL) { 486f17e6705Spaul luse worker_batch->batch = spdk_accel_batch_create(worker->ch); 487f17e6705Spaul luse if (worker_batch->batch == NULL) { 488f17e6705Spaul luse fprintf(stderr, "error unable to create new batch\n"); 489f17e6705Spaul luse return; 490f17e6705Spaul luse } 491f17e6705Spaul luse } 492f17e6705Spaul luse 493f17e6705Spaul luse /* Prep the command re-using the last completed command's task */ 494f17e6705Spaul luse rc = _batch_prep_cmd(worker, task, worker_batch); 495f17e6705Spaul luse if (rc) { 496f17e6705Spaul luse fprintf(stderr, "error preping command for batch\n"); 497f17e6705Spaul luse goto error; 498f17e6705Spaul luse } 499f17e6705Spaul luse 500f17e6705Spaul luse /* If this batch is full move it to the to_submit list so it gets 501f17e6705Spaul luse * submitted as batches complete. 502f17e6705Spaul luse */ 503f17e6705Spaul luse if (worker_batch->cmd_count == g_ops_per_batch) { 504f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 505f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->to_submit_batches, worker_batch, link); 506f17e6705Spaul luse } 507f17e6705Spaul luse 508f17e6705Spaul luse return; 509f17e6705Spaul luse error: 510f17e6705Spaul luse spdk_accel_batch_cancel(worker->ch, worker_batch->batch); 511f17e6705Spaul luse 512f17e6705Spaul luse } 513f17e6705Spaul luse 514f17e6705Spaul luse static void batch_done(void *cb_arg, int status); 515f17e6705Spaul luse static void 516f17e6705Spaul luse _drain_batch(struct worker_thread *worker) 517f17e6705Spaul luse { 518f17e6705Spaul luse struct accel_batch *worker_batch, *tmp; 519f17e6705Spaul luse int rc; 520f17e6705Spaul luse 521f17e6705Spaul luse /* submit any batches that were being built up. */ 522f17e6705Spaul luse TAILQ_FOREACH_SAFE(worker_batch, &worker->in_prep_batches, link, tmp) { 523f17e6705Spaul luse if (worker_batch->cmd_count == 0) { 524f17e6705Spaul luse continue; 525f17e6705Spaul luse } 526f17e6705Spaul luse worker->current_queue_depth += worker_batch->cmd_count + 1; 527f17e6705Spaul luse 528f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 529f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 530f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 531f17e6705Spaul luse if (rc == 0) { 532f17e6705Spaul luse worker_batch->cmd_count = 0; 533f17e6705Spaul luse } else { 534f17e6705Spaul luse fprintf(stderr, "error sending final batch\n"); 535f17e6705Spaul luse worker->current_queue_depth -= worker_batch->cmd_count + 1; 536f17e6705Spaul luse break; 537f17e6705Spaul luse } 538f17e6705Spaul luse } 539f17e6705Spaul luse } 540f17e6705Spaul luse 541f17e6705Spaul luse static void 542f17e6705Spaul luse _batch_done(void *cb_arg) 543f17e6705Spaul luse { 544f17e6705Spaul luse struct accel_batch *worker_batch = (struct accel_batch *)cb_arg; 545f17e6705Spaul luse struct worker_thread *worker = worker_batch->worker; 546f17e6705Spaul luse int rc; 547f17e6705Spaul luse 548f17e6705Spaul luse assert(TAILQ_EMPTY(&worker->in_use_batches) == 0); 549f17e6705Spaul luse 550f17e6705Spaul luse if (worker_batch->status) { 551f17e6705Spaul luse SPDK_ERRLOG("error %d\n", worker_batch->status); 552f17e6705Spaul luse } 553f17e6705Spaul luse 554f17e6705Spaul luse worker->current_queue_depth--; 555f17e6705Spaul luse TAILQ_REMOVE(&worker->in_use_batches, worker_batch, link); 556f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_prep_batches, worker_batch, link); 557f17e6705Spaul luse worker_batch->batch = NULL; 558f17e6705Spaul luse worker_batch->cmd_count = 0; 559f17e6705Spaul luse 560f17e6705Spaul luse if (!worker->is_draining) { 561f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->to_submit_batches); 562f17e6705Spaul luse if (worker_batch != NULL) { 563f17e6705Spaul luse 564f17e6705Spaul luse assert(worker_batch->cmd_count == g_ops_per_batch); 565f17e6705Spaul luse 566f17e6705Spaul luse /* Add one for the batch command itself. */ 567f17e6705Spaul luse worker->current_queue_depth += g_ops_per_batch + 1; 568f17e6705Spaul luse TAILQ_REMOVE(&worker->to_submit_batches, worker_batch, link); 569f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 570f17e6705Spaul luse 571f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 572f17e6705Spaul luse if (rc) { 573f17e6705Spaul luse fprintf(stderr, "error ending batch\n"); 574f17e6705Spaul luse worker->current_queue_depth -= g_ops_per_batch + 1; 575f17e6705Spaul luse return; 576f17e6705Spaul luse } 577f17e6705Spaul luse } 578f17e6705Spaul luse } else { 579f17e6705Spaul luse _drain_batch(worker); 580f17e6705Spaul luse } 581f17e6705Spaul luse } 582f17e6705Spaul luse 583ac9a1a83Spaul luse static void 584fab40895Spaul luse batch_done(void *cb_arg, int status) 585fab40895Spaul luse { 586f17e6705Spaul luse struct accel_batch *worker_batch = (struct accel_batch *)cb_arg; 587fab40895Spaul luse 588f17e6705Spaul luse assert(worker_batch->worker); 589f17e6705Spaul luse 590f17e6705Spaul luse worker_batch->status = status; 591f17e6705Spaul luse spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch); 592fab40895Spaul luse } 593fab40895Spaul luse 594fab40895Spaul luse static void 5959f51cf32Spaul luse _accel_done(void *arg1) 5969f51cf32Spaul luse { 5979f51cf32Spaul luse struct ap_task *task = arg1; 5989f51cf32Spaul luse struct worker_thread *worker = task->worker; 599e69375bfSpaul luse uint32_t sw_crc32c; 6009f51cf32Spaul luse 6019f51cf32Spaul luse assert(worker); 6029f51cf32Spaul luse assert(worker->current_queue_depth > 0); 6039f51cf32Spaul luse 604b9218b7aSpaul luse if (g_verify && task->status == 0) { 605b9218b7aSpaul luse switch (g_workload_selection) { 606b9218b7aSpaul luse case ACCEL_CRC32C: 607b85127ccSZiye Yang sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed); 608e69375bfSpaul luse if (*(uint32_t *)task->dst != sw_crc32c) { 609e69375bfSpaul luse SPDK_NOTICELOG("CRC-32C miscompare\n"); 610e69375bfSpaul luse worker->xfer_failed++; 611e69375bfSpaul luse } 612b9218b7aSpaul luse break; 613b9218b7aSpaul luse case ACCEL_COPY: 614b9218b7aSpaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 6159f51cf32Spaul luse SPDK_NOTICELOG("Data miscompare\n"); 6169f51cf32Spaul luse worker->xfer_failed++; 617b9218b7aSpaul luse } 618b9218b7aSpaul luse break; 6190ef079c6Spaul luse case ACCEL_DUALCAST: 6200ef079c6Spaul luse if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { 6210ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, first destination\n"); 6220ef079c6Spaul luse worker->xfer_failed++; 6230ef079c6Spaul luse } 6240ef079c6Spaul luse if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { 6250ef079c6Spaul luse SPDK_NOTICELOG("Data miscompare, second destination\n"); 6260ef079c6Spaul luse worker->xfer_failed++; 6270ef079c6Spaul luse } 6280ef079c6Spaul luse break; 629d207237fSpaul luse case ACCEL_FILL: 630d207237fSpaul luse if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { 631d207237fSpaul luse SPDK_NOTICELOG("Data miscompare\n"); 632d207237fSpaul luse worker->xfer_failed++; 633d207237fSpaul luse } 634d207237fSpaul luse break; 6358cee297cSpaul luse case ACCEL_COMPARE: 6368cee297cSpaul luse break; 637b9218b7aSpaul luse default: 638b9218b7aSpaul luse assert(false); 639b9218b7aSpaul luse break; 6409f51cf32Spaul luse } 6419f51cf32Spaul luse } 642b9218b7aSpaul luse 643b9218b7aSpaul luse if (task->expected_status == -EILSEQ) { 644b9218b7aSpaul luse assert(task->status != 0); 645b9218b7aSpaul luse worker->injected_miscompares++; 646b9218b7aSpaul luse } else if (task->status) { 647f17e6705Spaul luse /* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */ 648b9218b7aSpaul luse worker->xfer_failed++; 649b9218b7aSpaul luse } 650b9218b7aSpaul luse 6519f51cf32Spaul luse worker->xfer_completed++; 6529f51cf32Spaul luse worker->current_queue_depth--; 6539f51cf32Spaul luse 65440ec8e97Spaul luse if (!worker->is_draining) { 655451462f6SJim Harris TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 656451462f6SJim Harris task = _get_task(worker); 657f17e6705Spaul luse if (g_ops_per_batch == 0) { 6589f51cf32Spaul luse _submit_single(worker, task); 659ac9a1a83Spaul luse worker->current_queue_depth++; 660f17e6705Spaul luse } else { 661f17e6705Spaul luse _build_batch(worker, task); 6629f51cf32Spaul luse } 663f17e6705Spaul luse } else if (g_ops_per_batch > 0) { 664f17e6705Spaul luse _drain_batch(worker); 665b34883e0SZiye Yang } else { 666b34883e0SZiye Yang TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 667f17e6705Spaul luse } 6689f51cf32Spaul luse } 6699f51cf32Spaul luse 6709f51cf32Spaul luse static int 6719f51cf32Spaul luse dump_result(void) 6729f51cf32Spaul luse { 6739f51cf32Spaul luse uint64_t total_completed = 0; 6749f51cf32Spaul luse uint64_t total_failed = 0; 675b9218b7aSpaul luse uint64_t total_miscompared = 0; 6769f51cf32Spaul luse uint64_t total_xfer_per_sec, total_bw_in_MiBps; 6779f51cf32Spaul luse struct worker_thread *worker = g_workers; 6789f51cf32Spaul luse 679445fe74eSpaul luse printf("\nCore,Thread Transfers Bandwidth Failed Miscompares\n"); 680445fe74eSpaul luse printf("------------------------------------------------------------------------\n"); 6819f51cf32Spaul luse while (worker != NULL) { 6829f51cf32Spaul luse 6839f51cf32Spaul luse uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; 6849f51cf32Spaul luse uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / 6859f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 6869f51cf32Spaul luse 6879f51cf32Spaul luse total_completed += worker->xfer_completed; 6889f51cf32Spaul luse total_failed += worker->xfer_failed; 689b9218b7aSpaul luse total_miscompared += worker->injected_miscompares; 6909f51cf32Spaul luse 6919f51cf32Spaul luse if (xfer_per_sec) { 692445fe74eSpaul luse printf("%u,%u%17" PRIu64 "/s%9" PRIu64 " MiB/s%7" PRIu64 " %11" PRIu64 "\n", 693445fe74eSpaul luse worker->display.core, worker->display.thread, xfer_per_sec, 694b9218b7aSpaul luse bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); 6959f51cf32Spaul luse } 6969f51cf32Spaul luse 6979f51cf32Spaul luse worker = worker->next; 6989f51cf32Spaul luse } 6999f51cf32Spaul luse 7009f51cf32Spaul luse total_xfer_per_sec = total_completed / g_time_in_sec; 7019f51cf32Spaul luse total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / 7029f51cf32Spaul luse (g_time_in_sec * 1024 * 1024); 7039f51cf32Spaul luse 704445fe74eSpaul luse printf("=========================================================================\n"); 705445fe74eSpaul luse printf("Total:%15" PRIu64 "/s%9" PRIu64 " MiB/s%6" PRIu64 " %11" PRIu64"\n\n", 706b9218b7aSpaul luse total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); 7079f51cf32Spaul luse 7089f51cf32Spaul luse return total_failed ? 1 : 0; 7099f51cf32Spaul luse } 7109f51cf32Spaul luse 711e150f6b8SZiye Yang static inline void 712e150f6b8SZiye Yang _free_task_buffers_in_pool(struct worker_thread *worker) 713e150f6b8SZiye Yang { 714e150f6b8SZiye Yang struct ap_task *task; 715e150f6b8SZiye Yang 716e150f6b8SZiye Yang assert(worker); 717e150f6b8SZiye Yang while ((task = TAILQ_FIRST(&worker->tasks_pool))) { 718e150f6b8SZiye Yang TAILQ_REMOVE(&worker->tasks_pool, task, link); 719e150f6b8SZiye Yang _free_task_buffers(task); 720e150f6b8SZiye Yang } 721e150f6b8SZiye Yang } 722e150f6b8SZiye Yang 7239f51cf32Spaul luse static int 7249f51cf32Spaul luse _check_draining(void *arg) 7259f51cf32Spaul luse { 7269f51cf32Spaul luse struct worker_thread *worker = arg; 7279f51cf32Spaul luse 7289f51cf32Spaul luse assert(worker); 7299f51cf32Spaul luse 7309f51cf32Spaul luse if (worker->current_queue_depth == 0) { 731e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 7329f51cf32Spaul luse spdk_poller_unregister(&worker->is_draining_poller); 7339f51cf32Spaul luse unregister_worker(worker); 7349f51cf32Spaul luse } 7359f51cf32Spaul luse 7369f51cf32Spaul luse return -1; 7379f51cf32Spaul luse } 7389f51cf32Spaul luse 7399f51cf32Spaul luse static int 7409f51cf32Spaul luse _worker_stop(void *arg) 7419f51cf32Spaul luse { 7429f51cf32Spaul luse struct worker_thread *worker = arg; 7439f51cf32Spaul luse 7449f51cf32Spaul luse assert(worker); 7459f51cf32Spaul luse 7469f51cf32Spaul luse spdk_poller_unregister(&worker->stop_poller); 7479f51cf32Spaul luse 7489f51cf32Spaul luse /* now let the worker drain and check it's outstanding IO with a poller */ 7499f51cf32Spaul luse worker->is_draining = true; 750ab0bc5c2SShuhei Matsumoto worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); 7519f51cf32Spaul luse 7529f51cf32Spaul luse return 0; 7539f51cf32Spaul luse } 7549f51cf32Spaul luse 7559f51cf32Spaul luse static void 756a34fc12bSpaul luse _init_thread(void *arg1) 757a34fc12bSpaul luse { 758a34fc12bSpaul luse struct worker_thread *worker; 759a34fc12bSpaul luse struct ap_task *task; 760f17e6705Spaul luse int i, rc, num_batches; 761f17e6705Spaul luse int max_per_batch; 762a34fc12bSpaul luse int remaining = g_queue_depth; 763e1bf63afSJim Harris int num_tasks = g_allocate_depth; 764f17e6705Spaul luse struct accel_batch *tmp; 765f17e6705Spaul luse struct accel_batch *worker_batch = NULL; 766445fe74eSpaul luse struct display_info *display = arg1; 767475fadf3Spaul luse uint64_t capabilities; 768a34fc12bSpaul luse 769a34fc12bSpaul luse worker = calloc(1, sizeof(*worker)); 770a34fc12bSpaul luse if (worker == NULL) { 771a34fc12bSpaul luse fprintf(stderr, "Unable to allocate worker\n"); 772445fe74eSpaul luse free(display); 773a34fc12bSpaul luse return; 774a34fc12bSpaul luse } 775a34fc12bSpaul luse 776445fe74eSpaul luse worker->display.core = display->core; 777445fe74eSpaul luse worker->display.thread = display->thread; 778445fe74eSpaul luse free(display); 7799f51cf32Spaul luse worker->core = spdk_env_get_current_core(); 7809f51cf32Spaul luse worker->thread = spdk_get_thread(); 781eea826a2Spaul luse pthread_mutex_lock(&g_workers_lock); 782eea826a2Spaul luse g_num_workers++; 7839f51cf32Spaul luse worker->next = g_workers; 784eea826a2Spaul luse g_workers = worker; 785eea826a2Spaul luse pthread_mutex_unlock(&g_workers_lock); 7869f51cf32Spaul luse worker->ch = spdk_accel_engine_get_io_channel(); 787b9218b7aSpaul luse 788475fadf3Spaul luse if (g_num_workers == 1) { 789475fadf3Spaul luse capabilities = spdk_accel_get_capabilities(worker->ch); 790475fadf3Spaul luse if ((capabilities & g_workload_selection) != g_workload_selection) { 791475fadf3Spaul luse g_using_sw_engine = true; 792475fadf3Spaul luse SPDK_WARNLOG("The selected workload is not natively supported by the current engine\n"); 793475fadf3Spaul luse SPDK_WARNLOG("The software engine will be used instead.\n\n"); 794475fadf3Spaul luse } 795475fadf3Spaul luse } 796475fadf3Spaul luse 797f17e6705Spaul luse TAILQ_INIT(&worker->tasks_pool); 798f17e6705Spaul luse 799f17e6705Spaul luse if (g_ops_per_batch > 0) { 800f17e6705Spaul luse 8010cecfcb1Spaul luse max_per_batch = spdk_accel_batch_get_max(worker->ch); 8020cecfcb1Spaul luse assert(max_per_batch > 0); 8030cecfcb1Spaul luse 804f17e6705Spaul luse if (g_ops_per_batch > max_per_batch) { 805f17e6705Spaul luse fprintf(stderr, "Reducing requested batch amount to max supported of %d\n", max_per_batch); 806f17e6705Spaul luse g_ops_per_batch = max_per_batch; 807f17e6705Spaul luse } 808f17e6705Spaul luse 809f17e6705Spaul luse if (g_ops_per_batch > g_queue_depth) { 810f17e6705Spaul luse fprintf(stderr, "Batch amount > queue depth, resetting to %d\n", g_queue_depth); 811f17e6705Spaul luse g_ops_per_batch = g_queue_depth; 812f17e6705Spaul luse } 813f17e6705Spaul luse 814f17e6705Spaul luse TAILQ_INIT(&worker->in_prep_batches); 815f17e6705Spaul luse TAILQ_INIT(&worker->to_submit_batches); 816f17e6705Spaul luse TAILQ_INIT(&worker->in_use_batches); 817f17e6705Spaul luse 818f17e6705Spaul luse /* A worker_batch will live on one of 3 lists: 819f17e6705Spaul luse * IN_PREP: as individual IOs complete new ones are built on on a 820f17e6705Spaul luse * worker_batch on this list until it reaches g_ops_per_batch. 821f17e6705Spaul luse * TO_SUBMIT: as batches are built up on IO completion they are moved 822f17e6705Spaul luse * to this list once they are full. This list is used in 823f17e6705Spaul luse * batch completion to start new batches. 824f17e6705Spaul luse * IN_USE: the worker_batch is outstanding and will be moved to in prep 825f17e6705Spaul luse * list when the batch is completed. 826f17e6705Spaul luse * 827f17e6705Spaul luse * So we need enough to cover Q depth loading and then one to replace 828f17e6705Spaul luse * each one of those and for when everything is outstanding there needs 829f17e6705Spaul luse * to be one extra batch to build up while the last batch is completing 830f17e6705Spaul luse * IO but before it's completed the batch command. 831f17e6705Spaul luse */ 832f17e6705Spaul luse num_batches = (g_queue_depth / g_ops_per_batch * 2) + 1; 833f17e6705Spaul luse worker->batch_base = calloc(num_batches, sizeof(struct accel_batch)); 834f17e6705Spaul luse worker_batch = worker->batch_base; 835f17e6705Spaul luse for (i = 0; i < num_batches; i++) { 836f17e6705Spaul luse worker_batch->worker = worker; 837f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_prep_batches, worker_batch, link); 838f17e6705Spaul luse worker_batch++; 839f17e6705Spaul luse } 840f17e6705Spaul luse } 841f17e6705Spaul luse 842ac9a1a83Spaul luse worker->task_base = calloc(num_tasks, sizeof(struct ap_task)); 843ac9a1a83Spaul luse if (worker->task_base == NULL) { 844ac9a1a83Spaul luse fprintf(stderr, "Could not allocate task base.\n"); 845ac9a1a83Spaul luse goto error; 8460cecfcb1Spaul luse } 847ac9a1a83Spaul luse 848ac9a1a83Spaul luse task = worker->task_base; 849ac9a1a83Spaul luse for (i = 0; i < num_tasks; i++) { 850ac9a1a83Spaul luse TAILQ_INSERT_TAIL(&worker->tasks_pool, task, link); 8514cd7ca9bSJim Harris task->worker = worker; 852ac9a1a83Spaul luse if (_get_task_data_bufs(task)) { 853ac9a1a83Spaul luse fprintf(stderr, "Unable to get data bufs\n"); 854ac9a1a83Spaul luse goto error; 855ac9a1a83Spaul luse } 856ac9a1a83Spaul luse task++; 8579f51cf32Spaul luse } 8589f51cf32Spaul luse 8599f51cf32Spaul luse /* Register a poller that will stop the worker at time elapsed */ 860ab0bc5c2SShuhei Matsumoto worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, 8619f51cf32Spaul luse g_time_in_sec * 1000000ULL); 8629f51cf32Spaul luse 863f17e6705Spaul luse /* If batching is enabled load up to the full Q depth before 864f17e6705Spaul luse * processing any completions, then ping pong between two batches, 865f17e6705Spaul luse * one processing and one being built up for when the other completes. 866a34fc12bSpaul luse */ 867f17e6705Spaul luse if (g_ops_per_batch > 0) { 868a34fc12bSpaul luse do { 869f17e6705Spaul luse worker_batch = TAILQ_FIRST(&worker->in_prep_batches); 870f17e6705Spaul luse if (worker_batch == NULL) { 871f17e6705Spaul luse goto error; 872f17e6705Spaul luse } 873f17e6705Spaul luse 874f17e6705Spaul luse worker_batch->batch = spdk_accel_batch_create(worker->ch); 875f17e6705Spaul luse if (worker_batch->batch == NULL) { 876f17e6705Spaul luse raise(SIGINT); 877a34fc12bSpaul luse break; 878a34fc12bSpaul luse } 879a34fc12bSpaul luse 880f17e6705Spaul luse for (i = 0; i < g_ops_per_batch; i++) { 881ac9a1a83Spaul luse task = _get_task(worker); 8824cd7ca9bSJim Harris worker->current_queue_depth++; 883ac9a1a83Spaul luse if (task == NULL) { 884a34fc12bSpaul luse goto error; 8859f51cf32Spaul luse } 886b9218b7aSpaul luse 887f17e6705Spaul luse rc = _batch_prep_cmd(worker, task, worker_batch); 888a34fc12bSpaul luse if (rc) { 889a34fc12bSpaul luse fprintf(stderr, "error preping command\n"); 890a34fc12bSpaul luse goto error; 891a34fc12bSpaul luse } 892a34fc12bSpaul luse } 893a34fc12bSpaul luse 894f17e6705Spaul luse /* for the batch operation itself. */ 895f17e6705Spaul luse task->worker->current_queue_depth++; 896f17e6705Spaul luse TAILQ_REMOVE(&worker->in_prep_batches, worker_batch, link); 897f17e6705Spaul luse TAILQ_INSERT_TAIL(&worker->in_use_batches, worker_batch, link); 898f17e6705Spaul luse 899f17e6705Spaul luse rc = spdk_accel_batch_submit(worker->ch, worker_batch->batch, batch_done, worker_batch); 900a34fc12bSpaul luse if (rc) { 901f17e6705Spaul luse fprintf(stderr, "error ending batch\n"); 902a34fc12bSpaul luse goto error; 903a34fc12bSpaul luse } 904f17e6705Spaul luse assert(remaining >= g_ops_per_batch); 905f17e6705Spaul luse remaining -= g_ops_per_batch; 906f17e6705Spaul luse } while (remaining > 0); 907b9218b7aSpaul luse } 9080ef079c6Spaul luse 909f17e6705Spaul luse /* Submit as singles when no batching is enabled or we ran out of batches. */ 910a34fc12bSpaul luse for (i = 0; i < remaining; i++) { 911ac9a1a83Spaul luse task = _get_task(worker); 9124cd7ca9bSJim Harris worker->current_queue_depth++; 913ac9a1a83Spaul luse if (task == NULL) { 914a34fc12bSpaul luse goto error; 915b9218b7aSpaul luse } 916b9218b7aSpaul luse 9179f51cf32Spaul luse _submit_single(worker, task); 9189f51cf32Spaul luse } 919a34fc12bSpaul luse return; 920a34fc12bSpaul luse error: 921f17e6705Spaul luse if (worker_batch && worker_batch->batch) { 922f17e6705Spaul luse TAILQ_FOREACH_SAFE(worker_batch, &worker->in_use_batches, link, tmp) { 923f17e6705Spaul luse spdk_accel_batch_cancel(worker->ch, worker_batch->batch); 924f17e6705Spaul luse TAILQ_REMOVE(&worker->in_use_batches, worker_batch, link); 925f17e6705Spaul luse } 926f17e6705Spaul luse } 927e150f6b8SZiye Yang 928e150f6b8SZiye Yang _free_task_buffers_in_pool(worker); 929f17e6705Spaul luse free(worker->batch_base); 930ac9a1a83Spaul luse free(worker->task_base); 931a34fc12bSpaul luse free(worker); 932a34fc12bSpaul luse spdk_app_stop(-1); 9339f51cf32Spaul luse } 9349f51cf32Spaul luse 9359f51cf32Spaul luse static void 936e8463f87Spaul luse accel_done(void *cb_arg, int status) 9379f51cf32Spaul luse { 938e8463f87Spaul luse struct ap_task *task = (struct ap_task *)cb_arg; 9399f51cf32Spaul luse struct worker_thread *worker = task->worker; 9409f51cf32Spaul luse 9419f51cf32Spaul luse assert(worker); 9429f51cf32Spaul luse 943b9218b7aSpaul luse task->status = status; 9443e2e2a57Spaul luse if (g_using_sw_engine == false) { 9453e2e2a57Spaul luse _accel_done(task); 9463e2e2a57Spaul luse } else { 9479f51cf32Spaul luse spdk_thread_send_msg(worker->thread, _accel_done, task); 9489f51cf32Spaul luse } 9493e2e2a57Spaul luse } 9509f51cf32Spaul luse 9519f51cf32Spaul luse static void 9529f51cf32Spaul luse accel_perf_start(void *arg1) 9539f51cf32Spaul luse { 954eea826a2Spaul luse struct spdk_cpuset tmp_cpumask = {}; 955eea826a2Spaul luse char thread_name[32]; 956eea826a2Spaul luse uint32_t i; 957445fe74eSpaul luse int j; 958eea826a2Spaul luse struct spdk_thread *thread; 959445fe74eSpaul luse struct display_info *display; 960514be889Spaul luse 9619f51cf32Spaul luse g_tsc_rate = spdk_get_ticks_hz(); 9629f51cf32Spaul luse g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 9639f51cf32Spaul luse 9649f51cf32Spaul luse printf("Running for %d seconds...\n", g_time_in_sec); 9659f51cf32Spaul luse fflush(stdout); 9669f51cf32Spaul luse 967eea826a2Spaul luse /* Create worker threads for each core that was specified. */ 968eea826a2Spaul luse SPDK_ENV_FOREACH_CORE(i) { 969445fe74eSpaul luse for (j = 0; j < g_threads_per_core; j++) { 970445fe74eSpaul luse snprintf(thread_name, sizeof(thread_name), "ap_worker_%u_%u", i, j); 971eea826a2Spaul luse spdk_cpuset_zero(&tmp_cpumask); 972eea826a2Spaul luse spdk_cpuset_set_cpu(&tmp_cpumask, i, true); 973eea826a2Spaul luse thread = spdk_thread_create(thread_name, &tmp_cpumask); 974445fe74eSpaul luse display = calloc(1, sizeof(*display)); 975445fe74eSpaul luse if (display == NULL) { 976445fe74eSpaul luse fprintf(stderr, "Unable to allocate memory\n"); 977445fe74eSpaul luse spdk_app_stop(-1); 978445fe74eSpaul luse return; 979445fe74eSpaul luse } 980445fe74eSpaul luse display->core = i; 981445fe74eSpaul luse display->thread = j; 982445fe74eSpaul luse spdk_thread_send_msg(thread, _init_thread, display); 983445fe74eSpaul luse } 984eea826a2Spaul luse } 9859f51cf32Spaul luse } 9869f51cf32Spaul luse 9879f51cf32Spaul luse int 9889f51cf32Spaul luse main(int argc, char **argv) 9899f51cf32Spaul luse { 9909f51cf32Spaul luse struct spdk_app_opts opts = {}; 9919f51cf32Spaul luse struct worker_thread *worker, *tmp; 9929f51cf32Spaul luse 9939f51cf32Spaul luse pthread_mutex_init(&g_workers_lock, NULL); 99448701bd9SZiye Yang spdk_app_opts_init(&opts, sizeof(opts)); 9959f51cf32Spaul luse opts.reactor_mask = "0x1"; 996e1bf63afSJim Harris if (spdk_app_parse_args(argc, argv, &opts, "a:C:o:q:t:yw:P:f:b:T:", NULL, parse_args, 9971e2b38baSyidong0635 usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { 9989b189667Spaul luse g_rc = -1; 9999f51cf32Spaul luse goto cleanup; 10009f51cf32Spaul luse } 10019f51cf32Spaul luse 1002b9218b7aSpaul luse if ((g_workload_selection != ACCEL_COPY) && 1003b9218b7aSpaul luse (g_workload_selection != ACCEL_FILL) && 1004b9218b7aSpaul luse (g_workload_selection != ACCEL_CRC32C) && 10050ef079c6Spaul luse (g_workload_selection != ACCEL_COMPARE) && 10060ef079c6Spaul luse (g_workload_selection != ACCEL_DUALCAST)) { 10072a0c66d0Spaul luse usage(); 10089b189667Spaul luse g_rc = -1; 10092a0c66d0Spaul luse goto cleanup; 10102a0c66d0Spaul luse } 10112a0c66d0Spaul luse 1012f17e6705Spaul luse if (g_ops_per_batch > 0 && (g_queue_depth % g_ops_per_batch > 0)) { 1013f17e6705Spaul luse fprintf(stdout, "batch size must be a multiple of queue depth\n"); 1014f17e6705Spaul luse usage(); 10159b189667Spaul luse g_rc = -1; 1016f17e6705Spaul luse goto cleanup; 1017f17e6705Spaul luse } 1018f17e6705Spaul luse 1019e1bf63afSJim Harris if (g_allocate_depth > 0 && g_queue_depth > g_allocate_depth) { 1020e1bf63afSJim Harris fprintf(stdout, "allocate depth must be at least as big as queue depth\n"); 1021e1bf63afSJim Harris usage(); 1022e1bf63afSJim Harris g_rc = -1; 1023e1bf63afSJim Harris goto cleanup; 1024e1bf63afSJim Harris } 1025e1bf63afSJim Harris 1026e1bf63afSJim Harris if (g_allocate_depth == 0) { 1027e1bf63afSJim Harris g_allocate_depth = g_queue_depth; 1028e1bf63afSJim Harris } 1029e1bf63afSJim Harris 103088754353SZiye Yang if (g_workload_selection == ACCEL_CRC32C && 103188754353SZiye Yang g_crc32c_chained_count == 0) { 103288754353SZiye Yang usage(); 103388754353SZiye Yang g_rc = -1; 103488754353SZiye Yang goto cleanup; 103588754353SZiye Yang } 103688754353SZiye Yang 10379f51cf32Spaul luse dump_user_config(&opts); 10389b189667Spaul luse g_rc = spdk_app_start(&opts, accel_perf_start, NULL); 10399b189667Spaul luse if (g_rc) { 10409f51cf32Spaul luse SPDK_ERRLOG("ERROR starting application\n"); 10419f51cf32Spaul luse } 10429f51cf32Spaul luse 10439f51cf32Spaul luse pthread_mutex_destroy(&g_workers_lock); 10449f51cf32Spaul luse 10459f51cf32Spaul luse worker = g_workers; 10469f51cf32Spaul luse while (worker) { 10479f51cf32Spaul luse tmp = worker->next; 10489f51cf32Spaul luse free(worker); 10499f51cf32Spaul luse worker = tmp; 10509f51cf32Spaul luse } 10519f51cf32Spaul luse cleanup: 10529f51cf32Spaul luse spdk_app_fini(); 10539b189667Spaul luse return g_rc; 10549f51cf32Spaul luse } 1055