xref: /spdk/examples/accel/perf/accel_perf.c (revision 367c980b453f48310e52d2574afe7d2774df800c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk/thread.h"
36 #include "spdk/env.h"
37 #include "spdk/event.h"
38 #include "spdk/log.h"
39 #include "spdk/string.h"
40 #include "spdk/accel_engine.h"
41 #include "spdk/crc32.h"
42 
43 #define DATA_PATTERN 0x5a
44 #define ALIGN_4K 0x1000
45 
46 static uint64_t	g_tsc_rate;
47 static uint64_t g_tsc_us_rate;
48 static uint64_t g_tsc_end;
49 static int g_xfer_size_bytes = 4096;
50 static int g_queue_depth = 32;
51 static int g_time_in_sec = 5;
52 static uint32_t g_crc32c_seed = 0;
53 static int g_fail_percent_goal = 0;
54 static uint8_t g_fill_pattern = 255;
55 static bool g_verify = false;
56 static const char *g_workload_type = NULL;
57 static enum accel_capability g_workload_selection;
58 static struct worker_thread *g_workers = NULL;
59 static int g_num_workers = 0;
60 static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER;
61 
62 struct worker_thread {
63 	struct spdk_io_channel		*ch;
64 	uint64_t			xfer_completed;
65 	uint64_t			xfer_failed;
66 	uint64_t			injected_miscompares;
67 	uint64_t			current_queue_depth;
68 	struct spdk_mempool		*task_pool;
69 	struct worker_thread		*next;
70 	unsigned			core;
71 	struct spdk_thread		*thread;
72 	bool				is_draining;
73 	struct spdk_poller		*is_draining_poller;
74 	struct spdk_poller		*stop_poller;
75 };
76 
77 struct ap_task {
78 	void			*src;
79 	void			*dst;
80 	void			*dst2;
81 	struct worker_thread	*worker;
82 	int			status;
83 	int			expected_status; /* used for compare */
84 };
85 
86 inline static struct ap_task *
87 __ap_task_from_accel_task(struct spdk_accel_task *at)
88 {
89 	return (struct ap_task *)((uintptr_t)at - sizeof(struct ap_task));
90 }
91 
92 inline static struct spdk_accel_task *
93 __accel_task_from_ap_task(struct ap_task *ap)
94 {
95 	return (struct spdk_accel_task *)((uintptr_t)ap + sizeof(struct ap_task));
96 }
97 
98 static void
99 dump_user_config(struct spdk_app_opts *opts)
100 {
101 	printf("SPDK Configuration:\n");
102 	printf("Core mask:      %s\n\n", opts->reactor_mask);
103 	printf("Accel Perf Configuration:\n");
104 	printf("Workload Type:  %s\n", g_workload_type);
105 	if (g_workload_selection == ACCEL_CRC32C) {
106 		printf("CRC-32C seed:   %u\n", g_crc32c_seed);
107 	} else if (g_workload_selection == ACCEL_FILL) {
108 		printf("Fill pattern:   0x%x\n", g_fill_pattern);
109 	} else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) {
110 		printf("Failure inject: %u percent\n", g_fail_percent_goal);
111 	}
112 	printf("Transfer size:  %u bytes\n", g_xfer_size_bytes);
113 	printf("Queue depth:    %u\n", g_queue_depth);
114 	printf("Run time:       %u seconds\n", g_time_in_sec);
115 	printf("Verify:         %s\n\n", g_verify ? "Yes" : "No");
116 }
117 
118 static void
119 usage(void)
120 {
121 	printf("accel_perf options:\n");
122 	printf("\t[-h help message]\n");
123 	printf("\t[-q queue depth]\n");
124 	printf("\t[-n number of channels]\n");
125 	printf("\t[-o transfer size in bytes]\n");
126 	printf("\t[-t time in seconds]\n");
127 	printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n");
128 	printf("\t[-s for crc32c workload, use this seed value (default 0)\n");
129 	printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n");
130 	printf("\t[-f for fill workload, use this BYTE value (default 255)\n");
131 	printf("\t[-y verify result if this switch is on]\n");
132 }
133 
134 static int
135 parse_args(int argc, char *argv)
136 {
137 	switch (argc) {
138 	case 'f':
139 		g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10);
140 		break;
141 	case 'o':
142 		g_xfer_size_bytes = spdk_strtol(optarg, 10);
143 		break;
144 	case 'P':
145 		g_fail_percent_goal = spdk_strtol(optarg, 10);
146 		break;
147 	case 'q':
148 		g_queue_depth = spdk_strtol(optarg, 10);
149 		break;
150 	case 's':
151 		g_crc32c_seed = spdk_strtol(optarg, 10);
152 		break;
153 	case 't':
154 		g_time_in_sec = spdk_strtol(optarg, 10);
155 		break;
156 	case 'y':
157 		g_verify = true;
158 		break;
159 	case 'w':
160 		g_workload_type = optarg;
161 		if (!strcmp(g_workload_type, "copy")) {
162 			g_workload_selection = ACCEL_COPY;
163 		} else if (!strcmp(g_workload_type, "fill")) {
164 			g_workload_selection = ACCEL_FILL;
165 		} else if (!strcmp(g_workload_type, "crc32c")) {
166 			g_workload_selection = ACCEL_CRC32C;
167 		} else if (!strcmp(g_workload_type, "compare")) {
168 			g_workload_selection = ACCEL_COMPARE;
169 		} else if (!strcmp(g_workload_type, "dualcast")) {
170 			g_workload_selection = ACCEL_DUALCAST;
171 		}
172 		break;
173 	default:
174 		usage();
175 		return 1;
176 	}
177 	return 0;
178 }
179 
180 static void
181 unregister_worker(void *arg1)
182 {
183 	struct worker_thread *worker = arg1;
184 
185 	spdk_mempool_free(worker->task_pool);
186 	spdk_put_io_channel(worker->ch);
187 	pthread_mutex_lock(&g_workers_lock);
188 	assert(g_num_workers >= 1);
189 	if (--g_num_workers == 0) {
190 		pthread_mutex_unlock(&g_workers_lock);
191 		spdk_app_stop(0);
192 	}
193 	pthread_mutex_unlock(&g_workers_lock);
194 }
195 
196 static void accel_done(void *ref, int status);
197 
198 static void
199 _submit_single(void *arg1, void *arg2)
200 {
201 	struct worker_thread *worker = arg1;
202 	struct ap_task *task = arg2;
203 	int random_num;
204 	int rc = 0;
205 
206 	assert(worker);
207 
208 	task->worker = worker;
209 	task->worker->current_queue_depth++;
210 	switch (g_workload_selection) {
211 	case ACCEL_COPY:
212 		rc = spdk_accel_submit_copy(__accel_task_from_ap_task(task),
213 					    worker->ch, task->dst,
214 					    task->src, g_xfer_size_bytes, accel_done);
215 		break;
216 	case ACCEL_FILL:
217 		/* For fill use the first byte of the task->dst buffer */
218 		rc = spdk_accel_submit_fill(__accel_task_from_ap_task(task),
219 					    worker->ch, task->dst, *(uint8_t *)task->src,
220 					    g_xfer_size_bytes, accel_done);
221 		break;
222 	case ACCEL_CRC32C:
223 		rc = spdk_accel_submit_crc32c(__accel_task_from_ap_task(task),
224 					      worker->ch, (uint32_t *)task->dst, task->src, g_crc32c_seed,
225 					      g_xfer_size_bytes, accel_done);
226 		break;
227 	case ACCEL_COMPARE:
228 		random_num = rand() % 100;
229 		if (random_num < g_fail_percent_goal) {
230 			task->expected_status = -EILSEQ;
231 			*(uint8_t *)task->dst = ~DATA_PATTERN;
232 		} else {
233 			task->expected_status = 0;
234 			*(uint8_t *)task->dst = DATA_PATTERN;
235 		}
236 		rc = spdk_accel_submit_compare(__accel_task_from_ap_task(task),
237 					       worker->ch, task->dst, task->src,
238 					       g_xfer_size_bytes, accel_done);
239 		break;
240 	case ACCEL_DUALCAST:
241 		rc = spdk_accel_submit_dualcast(__accel_task_from_ap_task(task),
242 						worker->ch, task->dst, task->dst2,
243 						task->src, g_xfer_size_bytes, accel_done);
244 		break;
245 	default:
246 		assert(false);
247 		break;
248 
249 	}
250 
251 	if (rc) {
252 		accel_done(__accel_task_from_ap_task(task), rc);
253 	}
254 }
255 
256 static void
257 _accel_done(void *arg1)
258 {
259 	struct ap_task *task = arg1;
260 	struct worker_thread *worker = task->worker;
261 	uint32_t sw_crc32c;
262 
263 	assert(worker);
264 	assert(worker->current_queue_depth > 0);
265 
266 	if (g_verify && task->status == 0) {
267 		switch (g_workload_selection) {
268 		case ACCEL_CRC32C:
269 			/* calculate sw CRC-32C and compare to sw aceel result. */
270 			sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed);
271 			if (*(uint32_t *)task->dst != sw_crc32c) {
272 				SPDK_NOTICELOG("CRC-32C miscompare\n");
273 				worker->xfer_failed++;
274 			}
275 			break;
276 		case ACCEL_COPY:
277 			if (memcmp(task->src, task->dst, g_xfer_size_bytes)) {
278 				SPDK_NOTICELOG("Data miscompare\n");
279 				worker->xfer_failed++;
280 			}
281 			break;
282 		case ACCEL_DUALCAST:
283 			if (memcmp(task->src, task->dst, g_xfer_size_bytes)) {
284 				SPDK_NOTICELOG("Data miscompare, first destination\n");
285 				worker->xfer_failed++;
286 			}
287 			if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) {
288 				SPDK_NOTICELOG("Data miscompare, second destination\n");
289 				worker->xfer_failed++;
290 			}
291 			break;
292 		default:
293 			assert(false);
294 			break;
295 		}
296 	}
297 
298 	if (task->expected_status == -EILSEQ) {
299 		assert(task->status != 0);
300 		worker->injected_miscompares++;
301 	} else if (task->status) {
302 		/* Expected to pass but API reported error. */
303 		worker->xfer_failed++;
304 	}
305 
306 	worker->xfer_completed++;
307 	worker->current_queue_depth--;
308 
309 	if (!worker->is_draining) {
310 		_submit_single(worker, task);
311 	} else {
312 		spdk_free(task->src);
313 		spdk_free(task->dst);
314 		if (g_workload_selection == ACCEL_DUALCAST) {
315 			spdk_free(task->dst2);
316 		}
317 		spdk_mempool_put(worker->task_pool, task);
318 	}
319 }
320 
321 static int
322 dump_result(void)
323 {
324 	uint64_t total_completed = 0;
325 	uint64_t total_failed = 0;
326 	uint64_t total_miscompared = 0;
327 	uint64_t total_xfer_per_sec, total_bw_in_MiBps;
328 	struct worker_thread *worker = g_workers;
329 
330 	printf("\nCore           Transfers     Bandwidth     Failed     Miscompares\n");
331 	printf("-----------------------------------------------------------------\n");
332 	while (worker != NULL) {
333 
334 		uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec;
335 		uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) /
336 				       (g_time_in_sec * 1024 * 1024);
337 
338 		total_completed += worker->xfer_completed;
339 		total_failed += worker->xfer_failed;
340 		total_miscompared += worker->injected_miscompares;
341 
342 		if (xfer_per_sec) {
343 			printf("%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64 "\n",
344 			       worker->core, xfer_per_sec,
345 			       bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares);
346 		}
347 
348 		worker = worker->next;
349 	}
350 
351 	total_xfer_per_sec = total_completed / g_time_in_sec;
352 	total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) /
353 			    (g_time_in_sec * 1024 * 1024);
354 
355 	printf("==================================================================\n");
356 	printf("Total:%16" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64"\n\n",
357 	       total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared);
358 
359 	return total_failed ? 1 : 0;
360 }
361 
362 static int
363 _check_draining(void *arg)
364 {
365 	struct worker_thread *worker = arg;
366 
367 	assert(worker);
368 
369 	if (worker->current_queue_depth == 0) {
370 		spdk_poller_unregister(&worker->is_draining_poller);
371 		unregister_worker(worker);
372 	}
373 
374 	return -1;
375 }
376 
377 static int
378 _worker_stop(void *arg)
379 {
380 	struct worker_thread *worker = arg;
381 
382 	assert(worker);
383 
384 	spdk_poller_unregister(&worker->stop_poller);
385 
386 	/* now let the worker drain and check it's outstanding IO with a poller */
387 	worker->is_draining = true;
388 	worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0);
389 
390 	return 0;
391 }
392 
393 static void
394 _init_thread_done(void *ctx)
395 {
396 }
397 
398 static void
399 _init_thread(void *arg1)
400 {
401 	struct worker_thread *worker;
402 	char task_pool_name[30];
403 	struct ap_task *task;
404 	int i;
405 	uint32_t align = 0;
406 
407 	worker = calloc(1, sizeof(*worker));
408 	if (worker == NULL) {
409 		fprintf(stderr, "Unable to allocate worker\n");
410 		return;
411 	}
412 
413 	/* For dualcast, the DSA HW requires 4K alignment on destination addresses but
414 	 * we do this for all engines to keep it simple.
415 	 */
416 	if (g_workload_selection == ACCEL_DUALCAST) {
417 		align = ALIGN_4K;
418 	}
419 
420 	worker->core = spdk_env_get_current_core();
421 	worker->thread = spdk_get_thread();
422 	worker->next = g_workers;
423 	worker->ch = spdk_accel_engine_get_io_channel();
424 
425 	snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", g_num_workers);
426 	worker->task_pool = spdk_mempool_create(task_pool_name,
427 						g_queue_depth,
428 						spdk_accel_task_size() + sizeof(struct ap_task),
429 						SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
430 						SPDK_ENV_SOCKET_ID_ANY);
431 	if (!worker->task_pool) {
432 		fprintf(stderr, "Could not allocate buffer pool.\n");
433 		free(worker);
434 		return;
435 	}
436 
437 	/* Register a poller that will stop the worker at time elapsed */
438 	worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker,
439 			      g_time_in_sec * 1000000ULL);
440 
441 	g_workers = worker;
442 	pthread_mutex_lock(&g_workers_lock);
443 	g_num_workers++;
444 	pthread_mutex_unlock(&g_workers_lock);
445 
446 	for (i = 0; i < g_queue_depth; i++) {
447 		task = spdk_mempool_get(worker->task_pool);
448 		if (!task) {
449 			fprintf(stderr, "Unable to get accel_task\n");
450 			return;
451 		}
452 
453 		task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
454 		if (task->src == NULL) {
455 			fprintf(stderr, "Unable to alloc src buffer\n");
456 			return;
457 		}
458 		memset(task->src, DATA_PATTERN, g_xfer_size_bytes);
459 
460 		task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
461 		if (task->dst == NULL) {
462 			fprintf(stderr, "Unable to alloc dst buffer\n");
463 			return;
464 		}
465 
466 		if (g_workload_selection == ACCEL_DUALCAST) {
467 			task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
468 			if (task->dst2 == NULL) {
469 				fprintf(stderr, "Unable to alloc dst buffer\n");
470 				return;
471 			}
472 			memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes);
473 		}
474 
475 		/* For compare we want the buffers to match, otherwise not. */
476 		if (g_workload_selection == ACCEL_COMPARE) {
477 			memset(task->dst, DATA_PATTERN, g_xfer_size_bytes);
478 		} else {
479 			memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes);
480 		}
481 
482 		_submit_single(worker, task);
483 	}
484 }
485 
486 static void
487 accel_done(void *ref, int status)
488 {
489 	struct ap_task *task = __ap_task_from_accel_task(ref);
490 	struct worker_thread *worker = task->worker;
491 
492 	assert(worker);
493 
494 	task->status = status;
495 	spdk_thread_send_msg(worker->thread, _accel_done, task);
496 }
497 
498 static void
499 accel_perf_start(void *arg1)
500 {
501 	uint64_t capabilites;
502 	struct spdk_io_channel *accel_ch;
503 
504 	accel_ch = spdk_accel_engine_get_io_channel();
505 	capabilites = spdk_accel_get_capabilities(accel_ch);
506 	spdk_put_io_channel(accel_ch);
507 
508 	if ((capabilites & g_workload_selection) != g_workload_selection) {
509 		SPDK_ERRLOG("Selected workload is not supported by the current engine\n");
510 		SPDK_NOTICELOG("Software engine is selected by default, enable a HW engine via RPC\n\n");
511 		spdk_app_stop(-1);
512 		return;
513 	}
514 
515 	g_tsc_rate = spdk_get_ticks_hz();
516 	g_tsc_us_rate = g_tsc_rate / (1000 * 1000);
517 	g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
518 
519 	printf("Running for %d seconds...\n", g_time_in_sec);
520 	fflush(stdout);
521 
522 	spdk_for_each_thread(_init_thread, NULL, _init_thread_done);
523 }
524 
525 int
526 main(int argc, char **argv)
527 {
528 	struct spdk_app_opts opts = {};
529 	struct worker_thread *worker, *tmp;
530 	int rc = 0;
531 
532 	pthread_mutex_init(&g_workers_lock, NULL);
533 	spdk_app_opts_init(&opts);
534 	opts.reactor_mask = "0x1";
535 	if ((rc = spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:", NULL, parse_args,
536 				      usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) {
537 		rc = -1;
538 		goto cleanup;
539 	}
540 
541 	if ((g_workload_selection != ACCEL_COPY) &&
542 	    (g_workload_selection != ACCEL_FILL) &&
543 	    (g_workload_selection != ACCEL_CRC32C) &&
544 	    (g_workload_selection != ACCEL_COMPARE) &&
545 	    (g_workload_selection != ACCEL_DUALCAST)) {
546 		usage();
547 		rc = -1;
548 		goto cleanup;
549 	}
550 
551 	dump_user_config(&opts);
552 	rc = spdk_app_start(&opts, accel_perf_start, NULL);
553 	if (rc) {
554 		SPDK_ERRLOG("ERROR starting application\n");
555 	} else {
556 		dump_result();
557 	}
558 
559 	pthread_mutex_destroy(&g_workers_lock);
560 
561 	worker = g_workers;
562 	while (worker) {
563 		tmp = worker->next;
564 		free(worker);
565 		worker = tmp;
566 	}
567 cleanup:
568 	spdk_app_fini();
569 	return rc;
570 }
571