xref: /dpdk/app/test-crypto-perf/cperf_test_latency.c (revision 1e1d4fb791097c9cca687c3ee3f56284e2be2c25)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40 
41 
42 struct cperf_op_result {
43 	uint64_t tsc_start;
44 	uint64_t tsc_end;
45 	enum rte_crypto_op_status status;
46 };
47 
48 struct cperf_latency_ctx {
49 	uint8_t dev_id;
50 	uint16_t qp_id;
51 	uint8_t lcore_id;
52 
53 	struct rte_mempool *pkt_mbuf_pool_in;
54 	struct rte_mempool *pkt_mbuf_pool_out;
55 	struct rte_mbuf **mbufs_in;
56 	struct rte_mbuf **mbufs_out;
57 
58 	struct rte_mempool *crypto_op_pool;
59 
60 	struct rte_cryptodev_sym_session *sess;
61 
62 	cperf_populate_ops_t populate_ops;
63 
64 	const struct cperf_options *options;
65 	const struct cperf_test_vector *test_vector;
66 	struct cperf_op_result *res;
67 };
68 
69 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
70 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
71 
72 static void
73 cperf_latency_test_free(struct cperf_latency_ctx *ctx, uint32_t mbuf_nb)
74 {
75 	uint32_t i;
76 
77 	if (ctx) {
78 		if (ctx->sess)
79 			rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
80 
81 		if (ctx->mbufs_in) {
82 			for (i = 0; i < mbuf_nb; i++)
83 				rte_pktmbuf_free(ctx->mbufs_in[i]);
84 
85 			rte_free(ctx->mbufs_in);
86 		}
87 
88 		if (ctx->mbufs_out) {
89 			for (i = 0; i < mbuf_nb; i++) {
90 				if (ctx->mbufs_out[i] != NULL)
91 					rte_pktmbuf_free(ctx->mbufs_out[i]);
92 			}
93 
94 			rte_free(ctx->mbufs_out);
95 		}
96 
97 		if (ctx->pkt_mbuf_pool_in)
98 			rte_mempool_free(ctx->pkt_mbuf_pool_in);
99 
100 		if (ctx->pkt_mbuf_pool_out)
101 			rte_mempool_free(ctx->pkt_mbuf_pool_out);
102 
103 		if (ctx->crypto_op_pool)
104 			rte_mempool_free(ctx->crypto_op_pool);
105 
106 		rte_free(ctx->res);
107 		rte_free(ctx);
108 	}
109 }
110 
111 static struct rte_mbuf *
112 cperf_mbuf_create(struct rte_mempool *mempool,
113 		uint32_t segments_nb,
114 		const struct cperf_options *options,
115 		const struct cperf_test_vector *test_vector)
116 {
117 	struct rte_mbuf *mbuf;
118 	uint32_t segment_sz = options->max_buffer_size / segments_nb;
119 	uint32_t last_sz = options->max_buffer_size % segments_nb;
120 	uint8_t *mbuf_data;
121 	uint8_t *test_data =
122 			(options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
123 					test_vector->plaintext.data :
124 					test_vector->ciphertext.data;
125 
126 	mbuf = rte_pktmbuf_alloc(mempool);
127 	if (mbuf == NULL)
128 		goto error;
129 
130 	mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
131 	if (mbuf_data == NULL)
132 		goto error;
133 
134 	memcpy(mbuf_data, test_data, segment_sz);
135 	test_data += segment_sz;
136 	segments_nb--;
137 
138 	while (segments_nb) {
139 		struct rte_mbuf *m;
140 
141 		m = rte_pktmbuf_alloc(mempool);
142 		if (m == NULL)
143 			goto error;
144 
145 		rte_pktmbuf_chain(mbuf, m);
146 
147 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
148 		if (mbuf_data == NULL)
149 			goto error;
150 
151 		memcpy(mbuf_data, test_data, segment_sz);
152 		test_data += segment_sz;
153 		segments_nb--;
154 	}
155 
156 	if (last_sz) {
157 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
158 		if (mbuf_data == NULL)
159 			goto error;
160 
161 		memcpy(mbuf_data, test_data, last_sz);
162 	}
163 
164 	if (options->op_type != CPERF_CIPHER_ONLY) {
165 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
166 			options->auth_digest_sz);
167 		if (mbuf_data == NULL)
168 			goto error;
169 	}
170 
171 	if (options->op_type == CPERF_AEAD) {
172 		uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
173 			RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
174 
175 		if (aead == NULL)
176 			goto error;
177 
178 		memcpy(aead, test_vector->aad.data, test_vector->aad.length);
179 	}
180 
181 	return mbuf;
182 error:
183 	if (mbuf != NULL)
184 		rte_pktmbuf_free(mbuf);
185 
186 	return NULL;
187 }
188 
189 void *
190 cperf_latency_test_constructor(uint8_t dev_id, uint16_t qp_id,
191 		const struct cperf_options *options,
192 		const struct cperf_test_vector *test_vector,
193 		const struct cperf_op_fns *op_fns)
194 {
195 	struct cperf_latency_ctx *ctx = NULL;
196 	unsigned int mbuf_idx = 0;
197 	char pool_name[32] = "";
198 
199 	ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
200 	if (ctx == NULL)
201 		goto err;
202 
203 	ctx->dev_id = dev_id;
204 	ctx->qp_id = qp_id;
205 
206 	ctx->populate_ops = op_fns->populate_ops;
207 	ctx->options = options;
208 	ctx->test_vector = test_vector;
209 
210 	ctx->sess = op_fns->sess_create(dev_id, options, test_vector);
211 	if (ctx->sess == NULL)
212 		goto err;
213 
214 	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
215 				dev_id);
216 
217 	ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
218 			options->pool_sz * options->segments_nb, 0, 0,
219 			RTE_PKTMBUF_HEADROOM +
220 			RTE_CACHE_LINE_ROUNDUP(
221 				(options->max_buffer_size / options->segments_nb) +
222 				(options->max_buffer_size % options->segments_nb) +
223 					options->auth_digest_sz),
224 			rte_socket_id());
225 
226 	if (ctx->pkt_mbuf_pool_in == NULL)
227 		goto err;
228 
229 	/* Generate mbufs_in with plaintext populated for test */
230 	ctx->mbufs_in = rte_malloc(NULL,
231 			(sizeof(struct rte_mbuf *) *
232 			ctx->options->pool_sz), 0);
233 
234 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
235 		ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
236 				ctx->pkt_mbuf_pool_in, options->segments_nb,
237 				options, test_vector);
238 		if (ctx->mbufs_in[mbuf_idx] == NULL)
239 			goto err;
240 	}
241 
242 	if (options->out_of_place == 1)	{
243 
244 		snprintf(pool_name, sizeof(pool_name),
245 				"cperf_pool_out_cdev_%d",
246 				dev_id);
247 
248 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
249 				pool_name, options->pool_sz, 0, 0,
250 				RTE_PKTMBUF_HEADROOM +
251 				RTE_CACHE_LINE_ROUNDUP(
252 					options->max_buffer_size +
253 					options->auth_digest_sz),
254 				rte_socket_id());
255 
256 		if (ctx->pkt_mbuf_pool_out == NULL)
257 			goto err;
258 	}
259 
260 	ctx->mbufs_out = rte_malloc(NULL,
261 			(sizeof(struct rte_mbuf *) *
262 			ctx->options->pool_sz), 0);
263 
264 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
265 		if (options->out_of_place == 1)	{
266 			ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
267 					ctx->pkt_mbuf_pool_out, 1,
268 					options, test_vector);
269 			if (ctx->mbufs_out[mbuf_idx] == NULL)
270 				goto err;
271 		} else {
272 			ctx->mbufs_out[mbuf_idx] = NULL;
273 		}
274 	}
275 
276 	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
277 			dev_id);
278 
279 	ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
280 			RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0,
281 			rte_socket_id());
282 	if (ctx->crypto_op_pool == NULL)
283 		goto err;
284 
285 	ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
286 			ctx->options->total_ops, 0);
287 
288 	if (ctx->res == NULL)
289 		goto err;
290 
291 	return ctx;
292 err:
293 	cperf_latency_test_free(ctx, mbuf_idx);
294 
295 	return NULL;
296 }
297 
298 int
299 cperf_latency_test_runner(void *arg)
300 {
301 	struct cperf_latency_ctx *ctx = arg;
302 	struct cperf_op_result *pres;
303 	uint16_t test_burst_size;
304 	uint8_t burst_size_idx = 0;
305 
306 	static int only_once;
307 
308 	if (ctx == NULL)
309 		return 0;
310 
311 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
312 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
313 	uint64_t i;
314 
315 	uint32_t lcore = rte_lcore_id();
316 
317 #ifdef CPERF_LINEARIZATION_ENABLE
318 	struct rte_cryptodev_info dev_info;
319 	int linearize = 0;
320 
321 	/* Check if source mbufs require coalescing */
322 	if (ctx->options->segments_nb > 1) {
323 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
324 		if ((dev_info.feature_flags &
325 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
326 			linearize = 1;
327 	}
328 #endif /* CPERF_LINEARIZATION_ENABLE */
329 
330 	ctx->lcore_id = lcore;
331 
332 	/* Warm up the host CPU before starting the test */
333 	for (i = 0; i < ctx->options->total_ops; i++)
334 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
335 
336 	/* Get first size from range or list */
337 	if (ctx->options->inc_burst_size != 0)
338 		test_burst_size = ctx->options->min_burst_size;
339 	else
340 		test_burst_size = ctx->options->burst_size_list[0];
341 
342 	while (test_burst_size <= ctx->options->max_burst_size) {
343 		uint64_t ops_enqd = 0, ops_deqd = 0;
344 		uint64_t m_idx = 0, b_idx = 0;
345 
346 		uint64_t tsc_val, tsc_end, tsc_start;
347 		uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
348 		uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
349 		uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
350 
351 		while (enqd_tot < ctx->options->total_ops) {
352 
353 			uint16_t burst_size = ((enqd_tot + test_burst_size)
354 					<= ctx->options->total_ops) ?
355 							test_burst_size :
356 							ctx->options->total_ops -
357 							enqd_tot;
358 
359 			/* Allocate crypto ops from pool */
360 			if (burst_size != rte_crypto_op_bulk_alloc(
361 					ctx->crypto_op_pool,
362 					RTE_CRYPTO_OP_TYPE_SYMMETRIC,
363 					ops, burst_size))
364 				return -1;
365 
366 			/* Setup crypto op, attach mbuf etc */
367 			(ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
368 					&ctx->mbufs_out[m_idx],
369 					burst_size, ctx->sess, ctx->options,
370 					ctx->test_vector);
371 
372 			tsc_start = rte_rdtsc_precise();
373 
374 #ifdef CPERF_LINEARIZATION_ENABLE
375 			if (linearize) {
376 				/* PMD doesn't support scatter-gather and source buffer
377 				 * is segmented.
378 				 * We need to linearize it before enqueuing.
379 				 */
380 				for (i = 0; i < burst_size; i++)
381 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
382 			}
383 #endif /* CPERF_LINEARIZATION_ENABLE */
384 
385 			/* Enqueue burst of ops on crypto device */
386 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
387 					ops, burst_size);
388 
389 			/* Dequeue processed burst of ops from crypto device */
390 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
391 					ops_processed, test_burst_size);
392 
393 			tsc_end = rte_rdtsc_precise();
394 
395 			/* Free memory for not enqueued operations */
396 			for (i = ops_enqd; i < burst_size; i++)
397 				rte_crypto_op_free(ops[i]);
398 
399 			for (i = 0; i < ops_enqd; i++) {
400 				ctx->res[tsc_idx].tsc_start = tsc_start;
401 				ops[i]->opaque_data = (void *)&ctx->res[tsc_idx];
402 				tsc_idx++;
403 			}
404 
405 			if (likely(ops_deqd))  {
406 				/*
407 				 * free crypto ops so they can be reused. We don't free
408 				 * the mbufs here as we don't want to reuse them as
409 				 * the crypto operation will change the data and cause
410 				 * failures.
411 				 */
412 				for (i = 0; i < ops_deqd; i++) {
413 					pres = (struct cperf_op_result *)
414 							(ops_processed[i]->opaque_data);
415 					pres->status = ops_processed[i]->status;
416 					pres->tsc_end = tsc_end;
417 
418 					rte_crypto_op_free(ops_processed[i]);
419 				}
420 
421 				deqd_tot += ops_deqd;
422 				deqd_max = max(ops_deqd, deqd_max);
423 				deqd_min = min(ops_deqd, deqd_min);
424 			}
425 
426 			enqd_tot += ops_enqd;
427 			enqd_max = max(ops_enqd, enqd_max);
428 			enqd_min = min(ops_enqd, enqd_min);
429 
430 			m_idx += ops_enqd;
431 			m_idx = m_idx + test_burst_size > ctx->options->pool_sz ?
432 					0 : m_idx;
433 			b_idx++;
434 		}
435 
436 		/* Dequeue any operations still in the crypto device */
437 		while (deqd_tot < ctx->options->total_ops) {
438 			/* Sending 0 length burst to flush sw crypto device */
439 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
440 
441 			/* dequeue burst */
442 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
443 					ops_processed, test_burst_size);
444 
445 			tsc_end = rte_rdtsc_precise();
446 
447 			if (ops_deqd != 0) {
448 				for (i = 0; i < ops_deqd; i++) {
449 					pres = (struct cperf_op_result *)
450 							(ops_processed[i]->opaque_data);
451 					pres->status = ops_processed[i]->status;
452 					pres->tsc_end = tsc_end;
453 
454 					rte_crypto_op_free(ops_processed[i]);
455 				}
456 
457 				deqd_tot += ops_deqd;
458 				deqd_max = max(ops_deqd, deqd_max);
459 				deqd_min = min(ops_deqd, deqd_min);
460 			}
461 		}
462 
463 		for (i = 0; i < tsc_idx; i++) {
464 			tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
465 			tsc_max = max(tsc_val, tsc_max);
466 			tsc_min = min(tsc_val, tsc_min);
467 			tsc_tot += tsc_val;
468 		}
469 
470 		double time_tot, time_avg, time_max, time_min;
471 
472 		const uint64_t tunit = 1000000; /* us */
473 		const uint64_t tsc_hz = rte_get_tsc_hz();
474 
475 		uint64_t enqd_avg = enqd_tot / b_idx;
476 		uint64_t deqd_avg = deqd_tot / b_idx;
477 		uint64_t tsc_avg = tsc_tot / tsc_idx;
478 
479 		time_tot = tunit*(double)(tsc_tot) / tsc_hz;
480 		time_avg = tunit*(double)(tsc_avg) / tsc_hz;
481 		time_max = tunit*(double)(tsc_max) / tsc_hz;
482 		time_min = tunit*(double)(tsc_min) / tsc_hz;
483 
484 		if (ctx->options->csv) {
485 			if (!only_once)
486 				printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
487 						"Packet Size, cycles, time (us)");
488 
489 			for (i = 0; i < ctx->options->total_ops; i++) {
490 
491 				printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f",
492 					ctx->lcore_id, ctx->options->test_buffer_size,
493 					test_burst_size, i + 1,
494 					ctx->res[i].tsc_end - ctx->res[i].tsc_start,
495 					tunit * (double) (ctx->res[i].tsc_end
496 							- ctx->res[i].tsc_start)
497 						/ tsc_hz);
498 
499 			}
500 			only_once = 1;
501 		} else {
502 			printf("\n# Device %d on lcore %u\n", ctx->dev_id,
503 				ctx->lcore_id);
504 			printf("\n# total operations: %u", ctx->options->total_ops);
505 			printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
506 			printf("\n# Burst size: %u", test_burst_size);
507 			printf("\n#     Number of bursts: %"PRIu64,
508 					b_idx);
509 
510 			printf("\n#");
511 			printf("\n#          \t       Total\t   Average\t   "
512 					"Maximum\t   Minimum");
513 			printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
514 					"%10"PRIu64"\t%10"PRIu64, enqd_tot,
515 					enqd_avg, enqd_max, enqd_min);
516 			printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
517 					"%10"PRIu64"\t%10"PRIu64, deqd_tot,
518 					deqd_avg, deqd_max, deqd_min);
519 			printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
520 					"%10"PRIu64"\t%10"PRIu64, tsc_tot,
521 					tsc_avg, tsc_max, tsc_min);
522 			printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
523 					time_tot, time_avg, time_max, time_min);
524 			printf("\n\n");
525 
526 		}
527 
528 		/* Get next size from range or list */
529 		if (ctx->options->inc_burst_size != 0)
530 			test_burst_size += ctx->options->inc_burst_size;
531 		else {
532 			if (++burst_size_idx == ctx->options->burst_size_count)
533 				break;
534 			test_burst_size =
535 				ctx->options->burst_size_list[burst_size_idx];
536 		}
537 	}
538 
539 	return 0;
540 }
541 
542 void
543 cperf_latency_test_destructor(void *arg)
544 {
545 	struct cperf_latency_ctx *ctx = arg;
546 
547 	if (ctx == NULL)
548 		return;
549 
550 	cperf_latency_test_free(ctx, ctx->options->pool_sz);
551 
552 }
553