xref: /dpdk/app/test-crypto-perf/cperf_test_latency.c (revision 15b4beab8a300a5d74b4a0913a9f2faaa33f0ecf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40 
41 
42 struct cperf_op_result {
43 	uint64_t tsc_start;
44 	uint64_t tsc_end;
45 	enum rte_crypto_op_status status;
46 };
47 
48 struct cperf_latency_ctx {
49 	uint8_t dev_id;
50 	uint16_t qp_id;
51 	uint8_t lcore_id;
52 
53 	struct rte_mempool *pkt_mbuf_pool_in;
54 	struct rte_mempool *pkt_mbuf_pool_out;
55 	struct rte_mbuf **mbufs_in;
56 	struct rte_mbuf **mbufs_out;
57 
58 	struct rte_mempool *crypto_op_pool;
59 
60 	struct rte_cryptodev_sym_session *sess;
61 
62 	cperf_populate_ops_t populate_ops;
63 
64 	const struct cperf_options *options;
65 	const struct cperf_test_vector *test_vector;
66 	struct cperf_op_result *res;
67 };
68 
69 struct priv_op_data {
70 	struct cperf_op_result *result;
71 };
72 
73 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
74 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
75 
76 static void
77 cperf_latency_test_free(struct cperf_latency_ctx *ctx, uint32_t mbuf_nb)
78 {
79 	uint32_t i;
80 
81 	if (ctx) {
82 		if (ctx->sess) {
83 			rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
84 			rte_cryptodev_sym_session_free(ctx->sess);
85 		}
86 
87 		if (ctx->mbufs_in) {
88 			for (i = 0; i < mbuf_nb; i++)
89 				rte_pktmbuf_free(ctx->mbufs_in[i]);
90 
91 			rte_free(ctx->mbufs_in);
92 		}
93 
94 		if (ctx->mbufs_out) {
95 			for (i = 0; i < mbuf_nb; i++) {
96 				if (ctx->mbufs_out[i] != NULL)
97 					rte_pktmbuf_free(ctx->mbufs_out[i]);
98 			}
99 
100 			rte_free(ctx->mbufs_out);
101 		}
102 
103 		if (ctx->pkt_mbuf_pool_in)
104 			rte_mempool_free(ctx->pkt_mbuf_pool_in);
105 
106 		if (ctx->pkt_mbuf_pool_out)
107 			rte_mempool_free(ctx->pkt_mbuf_pool_out);
108 
109 		if (ctx->crypto_op_pool)
110 			rte_mempool_free(ctx->crypto_op_pool);
111 
112 		rte_free(ctx->res);
113 		rte_free(ctx);
114 	}
115 }
116 
117 static struct rte_mbuf *
118 cperf_mbuf_create(struct rte_mempool *mempool,
119 		uint32_t segments_nb,
120 		const struct cperf_options *options,
121 		const struct cperf_test_vector *test_vector)
122 {
123 	struct rte_mbuf *mbuf;
124 	uint32_t segment_sz = options->max_buffer_size / segments_nb;
125 	uint32_t last_sz = options->max_buffer_size % segments_nb;
126 	uint8_t *mbuf_data;
127 	uint8_t *test_data =
128 			(options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
129 					test_vector->plaintext.data :
130 					test_vector->ciphertext.data;
131 
132 	mbuf = rte_pktmbuf_alloc(mempool);
133 	if (mbuf == NULL)
134 		goto error;
135 
136 	mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
137 	if (mbuf_data == NULL)
138 		goto error;
139 
140 	memcpy(mbuf_data, test_data, segment_sz);
141 	test_data += segment_sz;
142 	segments_nb--;
143 
144 	while (segments_nb) {
145 		struct rte_mbuf *m;
146 
147 		m = rte_pktmbuf_alloc(mempool);
148 		if (m == NULL)
149 			goto error;
150 
151 		rte_pktmbuf_chain(mbuf, m);
152 
153 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
154 		if (mbuf_data == NULL)
155 			goto error;
156 
157 		memcpy(mbuf_data, test_data, segment_sz);
158 		test_data += segment_sz;
159 		segments_nb--;
160 	}
161 
162 	if (last_sz) {
163 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
164 		if (mbuf_data == NULL)
165 			goto error;
166 
167 		memcpy(mbuf_data, test_data, last_sz);
168 	}
169 
170 	if (options->op_type != CPERF_CIPHER_ONLY) {
171 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
172 			options->digest_sz);
173 		if (mbuf_data == NULL)
174 			goto error;
175 	}
176 
177 	if (options->op_type == CPERF_AEAD) {
178 		uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
179 			RTE_ALIGN_CEIL(options->aead_aad_sz, 16));
180 
181 		if (aead == NULL)
182 			goto error;
183 
184 		memcpy(aead, test_vector->aad.data, test_vector->aad.length);
185 	}
186 
187 	return mbuf;
188 error:
189 	if (mbuf != NULL)
190 		rte_pktmbuf_free(mbuf);
191 
192 	return NULL;
193 }
194 
195 void *
196 cperf_latency_test_constructor(struct rte_mempool *sess_mp,
197 		uint8_t dev_id, uint16_t qp_id,
198 		const struct cperf_options *options,
199 		const struct cperf_test_vector *test_vector,
200 		const struct cperf_op_fns *op_fns)
201 {
202 	struct cperf_latency_ctx *ctx = NULL;
203 	unsigned int mbuf_idx = 0;
204 	char pool_name[32] = "";
205 
206 	ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
207 	if (ctx == NULL)
208 		goto err;
209 
210 	ctx->dev_id = dev_id;
211 	ctx->qp_id = qp_id;
212 
213 	ctx->populate_ops = op_fns->populate_ops;
214 	ctx->options = options;
215 	ctx->test_vector = test_vector;
216 
217 	/* IV goes at the end of the crypto operation */
218 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
219 		sizeof(struct rte_crypto_sym_op) +
220 		sizeof(struct cperf_op_result *);
221 
222 	ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector,
223 			iv_offset);
224 	if (ctx->sess == NULL)
225 		goto err;
226 
227 	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
228 				dev_id);
229 
230 	ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
231 			options->pool_sz * options->segments_nb, 0, 0,
232 			RTE_PKTMBUF_HEADROOM +
233 			RTE_CACHE_LINE_ROUNDUP(
234 				(options->max_buffer_size / options->segments_nb) +
235 				(options->max_buffer_size % options->segments_nb) +
236 					options->digest_sz),
237 			rte_socket_id());
238 
239 	if (ctx->pkt_mbuf_pool_in == NULL)
240 		goto err;
241 
242 	/* Generate mbufs_in with plaintext populated for test */
243 	ctx->mbufs_in = rte_malloc(NULL,
244 			(sizeof(struct rte_mbuf *) *
245 			ctx->options->pool_sz), 0);
246 
247 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
248 		ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
249 				ctx->pkt_mbuf_pool_in, options->segments_nb,
250 				options, test_vector);
251 		if (ctx->mbufs_in[mbuf_idx] == NULL)
252 			goto err;
253 	}
254 
255 	if (options->out_of_place == 1)	{
256 
257 		snprintf(pool_name, sizeof(pool_name),
258 				"cperf_pool_out_cdev_%d",
259 				dev_id);
260 
261 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
262 				pool_name, options->pool_sz, 0, 0,
263 				RTE_PKTMBUF_HEADROOM +
264 				RTE_CACHE_LINE_ROUNDUP(
265 					options->max_buffer_size +
266 					options->digest_sz),
267 				rte_socket_id());
268 
269 		if (ctx->pkt_mbuf_pool_out == NULL)
270 			goto err;
271 	}
272 
273 	ctx->mbufs_out = rte_malloc(NULL,
274 			(sizeof(struct rte_mbuf *) *
275 			ctx->options->pool_sz), 0);
276 
277 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
278 		if (options->out_of_place == 1)	{
279 			ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
280 					ctx->pkt_mbuf_pool_out, 1,
281 					options, test_vector);
282 			if (ctx->mbufs_out[mbuf_idx] == NULL)
283 				goto err;
284 		} else {
285 			ctx->mbufs_out[mbuf_idx] = NULL;
286 		}
287 	}
288 
289 	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
290 			dev_id);
291 
292 	uint16_t priv_size = sizeof(struct priv_op_data) +
293 			test_vector->cipher_iv.length +
294 			test_vector->auth_iv.length;
295 	ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
296 			RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz,
297 			512, priv_size, rte_socket_id());
298 
299 	if (ctx->crypto_op_pool == NULL)
300 		goto err;
301 
302 	ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
303 			ctx->options->total_ops, 0);
304 
305 	if (ctx->res == NULL)
306 		goto err;
307 
308 	return ctx;
309 err:
310 	cperf_latency_test_free(ctx, mbuf_idx);
311 
312 	return NULL;
313 }
314 
315 static inline void
316 store_timestamp(struct rte_crypto_op *op, uint64_t timestamp)
317 {
318 	struct priv_op_data *priv_data;
319 
320 	priv_data = (struct priv_op_data *) (op->sym + 1);
321 	priv_data->result->status = op->status;
322 	priv_data->result->tsc_end = timestamp;
323 }
324 
325 int
326 cperf_latency_test_runner(void *arg)
327 {
328 	struct cperf_latency_ctx *ctx = arg;
329 	uint16_t test_burst_size;
330 	uint8_t burst_size_idx = 0;
331 
332 	static int only_once;
333 
334 	if (ctx == NULL)
335 		return 0;
336 
337 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
338 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
339 	uint64_t i;
340 	struct priv_op_data *priv_data;
341 
342 	uint32_t lcore = rte_lcore_id();
343 
344 #ifdef CPERF_LINEARIZATION_ENABLE
345 	struct rte_cryptodev_info dev_info;
346 	int linearize = 0;
347 
348 	/* Check if source mbufs require coalescing */
349 	if (ctx->options->segments_nb > 1) {
350 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
351 		if ((dev_info.feature_flags &
352 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
353 			linearize = 1;
354 	}
355 #endif /* CPERF_LINEARIZATION_ENABLE */
356 
357 	ctx->lcore_id = lcore;
358 
359 	/* Warm up the host CPU before starting the test */
360 	for (i = 0; i < ctx->options->total_ops; i++)
361 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
362 
363 	/* Get first size from range or list */
364 	if (ctx->options->inc_burst_size != 0)
365 		test_burst_size = ctx->options->min_burst_size;
366 	else
367 		test_burst_size = ctx->options->burst_size_list[0];
368 
369 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
370 		sizeof(struct rte_crypto_sym_op) +
371 		sizeof(struct cperf_op_result *);
372 
373 	while (test_burst_size <= ctx->options->max_burst_size) {
374 		uint64_t ops_enqd = 0, ops_deqd = 0;
375 		uint64_t m_idx = 0, b_idx = 0;
376 
377 		uint64_t tsc_val, tsc_end, tsc_start;
378 		uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
379 		uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
380 		uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
381 
382 		while (enqd_tot < ctx->options->total_ops) {
383 
384 			uint16_t burst_size = ((enqd_tot + test_burst_size)
385 					<= ctx->options->total_ops) ?
386 							test_burst_size :
387 							ctx->options->total_ops -
388 							enqd_tot;
389 
390 			/* Allocate crypto ops from pool */
391 			if (burst_size != rte_crypto_op_bulk_alloc(
392 					ctx->crypto_op_pool,
393 					RTE_CRYPTO_OP_TYPE_SYMMETRIC,
394 					ops, burst_size)) {
395 				RTE_LOG(ERR, USER1,
396 					"Failed to allocate more crypto operations "
397 					"from the the crypto operation pool.\n"
398 					"Consider increasing the pool size "
399 					"with --pool-sz\n");
400 				return -1;
401 			}
402 
403 			/* Setup crypto op, attach mbuf etc */
404 			(ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
405 					&ctx->mbufs_out[m_idx],
406 					burst_size, ctx->sess, ctx->options,
407 					ctx->test_vector, iv_offset);
408 
409 			tsc_start = rte_rdtsc_precise();
410 
411 #ifdef CPERF_LINEARIZATION_ENABLE
412 			if (linearize) {
413 				/* PMD doesn't support scatter-gather and source buffer
414 				 * is segmented.
415 				 * We need to linearize it before enqueuing.
416 				 */
417 				for (i = 0; i < burst_size; i++)
418 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
419 			}
420 #endif /* CPERF_LINEARIZATION_ENABLE */
421 
422 			/* Enqueue burst of ops on crypto device */
423 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
424 					ops, burst_size);
425 
426 			/* Dequeue processed burst of ops from crypto device */
427 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
428 					ops_processed, test_burst_size);
429 
430 			tsc_end = rte_rdtsc_precise();
431 
432 			/* Free memory for not enqueued operations */
433 			if (ops_enqd != burst_size)
434 				rte_mempool_put_bulk(ctx->crypto_op_pool,
435 						(void **)&ops_processed[ops_enqd],
436 						burst_size - ops_enqd);
437 
438 			for (i = 0; i < ops_enqd; i++) {
439 				ctx->res[tsc_idx].tsc_start = tsc_start;
440 				/*
441 				 * Private data structure starts after the end of the
442 				 * rte_crypto_sym_op structure.
443 				 */
444 				priv_data = (struct priv_op_data *) (ops[i]->sym + 1);
445 				priv_data->result = (void *)&ctx->res[tsc_idx];
446 				tsc_idx++;
447 			}
448 
449 			if (likely(ops_deqd))  {
450 				/*
451 				 * free crypto ops so they can be reused. We don't free
452 				 * the mbufs here as we don't want to reuse them as
453 				 * the crypto operation will change the data and cause
454 				 * failures.
455 				 */
456 				for (i = 0; i < ops_deqd; i++)
457 					store_timestamp(ops_processed[i], tsc_end);
458 
459 				rte_mempool_put_bulk(ctx->crypto_op_pool,
460 						(void **)ops_processed, ops_deqd);
461 
462 				deqd_tot += ops_deqd;
463 				deqd_max = max(ops_deqd, deqd_max);
464 				deqd_min = min(ops_deqd, deqd_min);
465 			}
466 
467 			enqd_tot += ops_enqd;
468 			enqd_max = max(ops_enqd, enqd_max);
469 			enqd_min = min(ops_enqd, enqd_min);
470 
471 			m_idx += ops_enqd;
472 			m_idx = m_idx + test_burst_size > ctx->options->pool_sz ?
473 					0 : m_idx;
474 			b_idx++;
475 		}
476 
477 		/* Dequeue any operations still in the crypto device */
478 		while (deqd_tot < ctx->options->total_ops) {
479 			/* Sending 0 length burst to flush sw crypto device */
480 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
481 
482 			/* dequeue burst */
483 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
484 					ops_processed, test_burst_size);
485 
486 			tsc_end = rte_rdtsc_precise();
487 
488 			if (ops_deqd != 0) {
489 				for (i = 0; i < ops_deqd; i++)
490 					store_timestamp(ops_processed[i], tsc_end);
491 
492 				rte_mempool_put_bulk(ctx->crypto_op_pool,
493 						(void **)ops_processed, ops_deqd);
494 
495 				deqd_tot += ops_deqd;
496 				deqd_max = max(ops_deqd, deqd_max);
497 				deqd_min = min(ops_deqd, deqd_min);
498 			}
499 		}
500 
501 		for (i = 0; i < tsc_idx; i++) {
502 			tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
503 			tsc_max = max(tsc_val, tsc_max);
504 			tsc_min = min(tsc_val, tsc_min);
505 			tsc_tot += tsc_val;
506 		}
507 
508 		double time_tot, time_avg, time_max, time_min;
509 
510 		const uint64_t tunit = 1000000; /* us */
511 		const uint64_t tsc_hz = rte_get_tsc_hz();
512 
513 		uint64_t enqd_avg = enqd_tot / b_idx;
514 		uint64_t deqd_avg = deqd_tot / b_idx;
515 		uint64_t tsc_avg = tsc_tot / tsc_idx;
516 
517 		time_tot = tunit*(double)(tsc_tot) / tsc_hz;
518 		time_avg = tunit*(double)(tsc_avg) / tsc_hz;
519 		time_max = tunit*(double)(tsc_max) / tsc_hz;
520 		time_min = tunit*(double)(tsc_min) / tsc_hz;
521 
522 		if (ctx->options->csv) {
523 			if (!only_once)
524 				printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
525 						"Packet Size, cycles, time (us)");
526 
527 			for (i = 0; i < ctx->options->total_ops; i++) {
528 
529 				printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f",
530 					ctx->lcore_id, ctx->options->test_buffer_size,
531 					test_burst_size, i + 1,
532 					ctx->res[i].tsc_end - ctx->res[i].tsc_start,
533 					tunit * (double) (ctx->res[i].tsc_end
534 							- ctx->res[i].tsc_start)
535 						/ tsc_hz);
536 
537 			}
538 			only_once = 1;
539 		} else {
540 			printf("\n# Device %d on lcore %u\n", ctx->dev_id,
541 				ctx->lcore_id);
542 			printf("\n# total operations: %u", ctx->options->total_ops);
543 			printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
544 			printf("\n# Burst size: %u", test_burst_size);
545 			printf("\n#     Number of bursts: %"PRIu64,
546 					b_idx);
547 
548 			printf("\n#");
549 			printf("\n#          \t       Total\t   Average\t   "
550 					"Maximum\t   Minimum");
551 			printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
552 					"%10"PRIu64"\t%10"PRIu64, enqd_tot,
553 					enqd_avg, enqd_max, enqd_min);
554 			printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
555 					"%10"PRIu64"\t%10"PRIu64, deqd_tot,
556 					deqd_avg, deqd_max, deqd_min);
557 			printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
558 					"%10"PRIu64"\t%10"PRIu64, tsc_tot,
559 					tsc_avg, tsc_max, tsc_min);
560 			printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
561 					time_tot, time_avg, time_max, time_min);
562 			printf("\n\n");
563 
564 		}
565 
566 		/* Get next size from range or list */
567 		if (ctx->options->inc_burst_size != 0)
568 			test_burst_size += ctx->options->inc_burst_size;
569 		else {
570 			if (++burst_size_idx == ctx->options->burst_size_count)
571 				break;
572 			test_burst_size =
573 				ctx->options->burst_size_list[burst_size_idx];
574 		}
575 	}
576 
577 	return 0;
578 }
579 
580 void
581 cperf_latency_test_destructor(void *arg)
582 {
583 	struct cperf_latency_ctx *ctx = arg;
584 
585 	if (ctx == NULL)
586 		return;
587 
588 	rte_cryptodev_stop(ctx->dev_id);
589 
590 	cperf_latency_test_free(ctx, ctx->options->pool_sz);
591 }
592