xref: /dpdk/app/test-crypto-perf/cperf_test_latency.c (revision 0fbd75a99fc9d2c8c7618d677d3f50fb9872b80c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40 
41 
42 struct cperf_op_result {
43 	uint64_t tsc_start;
44 	uint64_t tsc_end;
45 	enum rte_crypto_op_status status;
46 };
47 
48 struct cperf_latency_ctx {
49 	uint8_t dev_id;
50 	uint16_t qp_id;
51 	uint8_t lcore_id;
52 
53 	struct rte_mempool *pkt_mbuf_pool_in;
54 	struct rte_mempool *pkt_mbuf_pool_out;
55 	struct rte_mbuf **mbufs_in;
56 	struct rte_mbuf **mbufs_out;
57 
58 	struct rte_mempool *crypto_op_pool;
59 
60 	struct rte_cryptodev_sym_session *sess;
61 
62 	cperf_populate_ops_t populate_ops;
63 
64 	const struct cperf_options *options;
65 	const struct cperf_test_vector *test_vector;
66 	struct cperf_op_result *res;
67 };
68 
69 struct priv_op_data {
70 	struct cperf_op_result *result;
71 };
72 
73 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
74 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
75 
76 static void
77 cperf_latency_test_free(struct cperf_latency_ctx *ctx, uint32_t mbuf_nb)
78 {
79 	uint32_t i;
80 
81 	if (ctx) {
82 		if (ctx->sess)
83 			rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess);
84 
85 		if (ctx->mbufs_in) {
86 			for (i = 0; i < mbuf_nb; i++)
87 				rte_pktmbuf_free(ctx->mbufs_in[i]);
88 
89 			rte_free(ctx->mbufs_in);
90 		}
91 
92 		if (ctx->mbufs_out) {
93 			for (i = 0; i < mbuf_nb; i++) {
94 				if (ctx->mbufs_out[i] != NULL)
95 					rte_pktmbuf_free(ctx->mbufs_out[i]);
96 			}
97 
98 			rte_free(ctx->mbufs_out);
99 		}
100 
101 		if (ctx->pkt_mbuf_pool_in)
102 			rte_mempool_free(ctx->pkt_mbuf_pool_in);
103 
104 		if (ctx->pkt_mbuf_pool_out)
105 			rte_mempool_free(ctx->pkt_mbuf_pool_out);
106 
107 		if (ctx->crypto_op_pool)
108 			rte_mempool_free(ctx->crypto_op_pool);
109 
110 		rte_free(ctx->res);
111 		rte_free(ctx);
112 	}
113 }
114 
115 static struct rte_mbuf *
116 cperf_mbuf_create(struct rte_mempool *mempool,
117 		uint32_t segments_nb,
118 		const struct cperf_options *options,
119 		const struct cperf_test_vector *test_vector)
120 {
121 	struct rte_mbuf *mbuf;
122 	uint32_t segment_sz = options->max_buffer_size / segments_nb;
123 	uint32_t last_sz = options->max_buffer_size % segments_nb;
124 	uint8_t *mbuf_data;
125 	uint8_t *test_data =
126 			(options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
127 					test_vector->plaintext.data :
128 					test_vector->ciphertext.data;
129 
130 	mbuf = rte_pktmbuf_alloc(mempool);
131 	if (mbuf == NULL)
132 		goto error;
133 
134 	mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
135 	if (mbuf_data == NULL)
136 		goto error;
137 
138 	memcpy(mbuf_data, test_data, segment_sz);
139 	test_data += segment_sz;
140 	segments_nb--;
141 
142 	while (segments_nb) {
143 		struct rte_mbuf *m;
144 
145 		m = rte_pktmbuf_alloc(mempool);
146 		if (m == NULL)
147 			goto error;
148 
149 		rte_pktmbuf_chain(mbuf, m);
150 
151 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz);
152 		if (mbuf_data == NULL)
153 			goto error;
154 
155 		memcpy(mbuf_data, test_data, segment_sz);
156 		test_data += segment_sz;
157 		segments_nb--;
158 	}
159 
160 	if (last_sz) {
161 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz);
162 		if (mbuf_data == NULL)
163 			goto error;
164 
165 		memcpy(mbuf_data, test_data, last_sz);
166 	}
167 
168 	if (options->op_type != CPERF_CIPHER_ONLY) {
169 		mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf,
170 			options->auth_digest_sz);
171 		if (mbuf_data == NULL)
172 			goto error;
173 	}
174 
175 	if (options->op_type == CPERF_AEAD) {
176 		uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf,
177 			RTE_ALIGN_CEIL(options->auth_aad_sz, 16));
178 
179 		if (aead == NULL)
180 			goto error;
181 
182 		memcpy(aead, test_vector->aad.data, test_vector->aad.length);
183 	}
184 
185 	return mbuf;
186 error:
187 	if (mbuf != NULL)
188 		rte_pktmbuf_free(mbuf);
189 
190 	return NULL;
191 }
192 
193 void *
194 cperf_latency_test_constructor(uint8_t dev_id, uint16_t qp_id,
195 		const struct cperf_options *options,
196 		const struct cperf_test_vector *test_vector,
197 		const struct cperf_op_fns *op_fns)
198 {
199 	struct cperf_latency_ctx *ctx = NULL;
200 	unsigned int mbuf_idx = 0;
201 	char pool_name[32] = "";
202 
203 	ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
204 	if (ctx == NULL)
205 		goto err;
206 
207 	ctx->dev_id = dev_id;
208 	ctx->qp_id = qp_id;
209 
210 	ctx->populate_ops = op_fns->populate_ops;
211 	ctx->options = options;
212 	ctx->test_vector = test_vector;
213 
214 	/* IV goes at the end of the crypto operation */
215 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
216 		sizeof(struct rte_crypto_sym_op) +
217 		sizeof(struct cperf_op_result *);
218 
219 	ctx->sess = op_fns->sess_create(dev_id, options, test_vector, iv_offset);
220 	if (ctx->sess == NULL)
221 		goto err;
222 
223 	snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d",
224 				dev_id);
225 
226 	ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name,
227 			options->pool_sz * options->segments_nb, 0, 0,
228 			RTE_PKTMBUF_HEADROOM +
229 			RTE_CACHE_LINE_ROUNDUP(
230 				(options->max_buffer_size / options->segments_nb) +
231 				(options->max_buffer_size % options->segments_nb) +
232 					options->auth_digest_sz),
233 			rte_socket_id());
234 
235 	if (ctx->pkt_mbuf_pool_in == NULL)
236 		goto err;
237 
238 	/* Generate mbufs_in with plaintext populated for test */
239 	ctx->mbufs_in = rte_malloc(NULL,
240 			(sizeof(struct rte_mbuf *) *
241 			ctx->options->pool_sz), 0);
242 
243 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
244 		ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create(
245 				ctx->pkt_mbuf_pool_in, options->segments_nb,
246 				options, test_vector);
247 		if (ctx->mbufs_in[mbuf_idx] == NULL)
248 			goto err;
249 	}
250 
251 	if (options->out_of_place == 1)	{
252 
253 		snprintf(pool_name, sizeof(pool_name),
254 				"cperf_pool_out_cdev_%d",
255 				dev_id);
256 
257 		ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create(
258 				pool_name, options->pool_sz, 0, 0,
259 				RTE_PKTMBUF_HEADROOM +
260 				RTE_CACHE_LINE_ROUNDUP(
261 					options->max_buffer_size +
262 					options->auth_digest_sz),
263 				rte_socket_id());
264 
265 		if (ctx->pkt_mbuf_pool_out == NULL)
266 			goto err;
267 	}
268 
269 	ctx->mbufs_out = rte_malloc(NULL,
270 			(sizeof(struct rte_mbuf *) *
271 			ctx->options->pool_sz), 0);
272 
273 	for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) {
274 		if (options->out_of_place == 1)	{
275 			ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create(
276 					ctx->pkt_mbuf_pool_out, 1,
277 					options, test_vector);
278 			if (ctx->mbufs_out[mbuf_idx] == NULL)
279 				goto err;
280 		} else {
281 			ctx->mbufs_out[mbuf_idx] = NULL;
282 		}
283 	}
284 
285 	snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d",
286 			dev_id);
287 
288 	uint16_t priv_size = sizeof(struct priv_op_data) + test_vector->iv.length;
289 	ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name,
290 			RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz,
291 			512, priv_size, rte_socket_id());
292 
293 	if (ctx->crypto_op_pool == NULL)
294 		goto err;
295 
296 	ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
297 			ctx->options->total_ops, 0);
298 
299 	if (ctx->res == NULL)
300 		goto err;
301 
302 	return ctx;
303 err:
304 	cperf_latency_test_free(ctx, mbuf_idx);
305 
306 	return NULL;
307 }
308 
309 static inline void
310 store_timestamp(struct rte_crypto_op *op, uint64_t timestamp)
311 {
312 	struct priv_op_data *priv_data;
313 
314 	priv_data = (struct priv_op_data *) (op->sym + 1);
315 	priv_data->result->status = op->status;
316 	priv_data->result->tsc_end = timestamp;
317 }
318 
319 int
320 cperf_latency_test_runner(void *arg)
321 {
322 	struct cperf_latency_ctx *ctx = arg;
323 	uint16_t test_burst_size;
324 	uint8_t burst_size_idx = 0;
325 
326 	static int only_once;
327 
328 	if (ctx == NULL)
329 		return 0;
330 
331 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
332 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
333 	uint64_t i;
334 	struct priv_op_data *priv_data;
335 
336 	uint32_t lcore = rte_lcore_id();
337 
338 #ifdef CPERF_LINEARIZATION_ENABLE
339 	struct rte_cryptodev_info dev_info;
340 	int linearize = 0;
341 
342 	/* Check if source mbufs require coalescing */
343 	if (ctx->options->segments_nb > 1) {
344 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
345 		if ((dev_info.feature_flags &
346 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
347 			linearize = 1;
348 	}
349 #endif /* CPERF_LINEARIZATION_ENABLE */
350 
351 	ctx->lcore_id = lcore;
352 
353 	/* Warm up the host CPU before starting the test */
354 	for (i = 0; i < ctx->options->total_ops; i++)
355 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
356 
357 	/* Get first size from range or list */
358 	if (ctx->options->inc_burst_size != 0)
359 		test_burst_size = ctx->options->min_burst_size;
360 	else
361 		test_burst_size = ctx->options->burst_size_list[0];
362 
363 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
364 		sizeof(struct rte_crypto_sym_op) +
365 		sizeof(struct cperf_op_result *);
366 
367 	while (test_burst_size <= ctx->options->max_burst_size) {
368 		uint64_t ops_enqd = 0, ops_deqd = 0;
369 		uint64_t m_idx = 0, b_idx = 0;
370 
371 		uint64_t tsc_val, tsc_end, tsc_start;
372 		uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
373 		uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
374 		uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
375 
376 		while (enqd_tot < ctx->options->total_ops) {
377 
378 			uint16_t burst_size = ((enqd_tot + test_burst_size)
379 					<= ctx->options->total_ops) ?
380 							test_burst_size :
381 							ctx->options->total_ops -
382 							enqd_tot;
383 
384 			/* Allocate crypto ops from pool */
385 			if (burst_size != rte_crypto_op_bulk_alloc(
386 					ctx->crypto_op_pool,
387 					RTE_CRYPTO_OP_TYPE_SYMMETRIC,
388 					ops, burst_size))
389 				return -1;
390 
391 			/* Setup crypto op, attach mbuf etc */
392 			(ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
393 					&ctx->mbufs_out[m_idx],
394 					burst_size, ctx->sess, ctx->options,
395 					ctx->test_vector, iv_offset);
396 
397 			tsc_start = rte_rdtsc_precise();
398 
399 #ifdef CPERF_LINEARIZATION_ENABLE
400 			if (linearize) {
401 				/* PMD doesn't support scatter-gather and source buffer
402 				 * is segmented.
403 				 * We need to linearize it before enqueuing.
404 				 */
405 				for (i = 0; i < burst_size; i++)
406 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
407 			}
408 #endif /* CPERF_LINEARIZATION_ENABLE */
409 
410 			/* Enqueue burst of ops on crypto device */
411 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
412 					ops, burst_size);
413 
414 			/* Dequeue processed burst of ops from crypto device */
415 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
416 					ops_processed, test_burst_size);
417 
418 			tsc_end = rte_rdtsc_precise();
419 
420 			/* Free memory for not enqueued operations */
421 			if (ops_enqd != burst_size)
422 				rte_mempool_put_bulk(ctx->crypto_op_pool,
423 						(void **)&ops_processed[ops_enqd],
424 						burst_size - ops_enqd);
425 
426 			for (i = 0; i < ops_enqd; i++) {
427 				ctx->res[tsc_idx].tsc_start = tsc_start;
428 				/*
429 				 * Private data structure starts after the end of the
430 				 * rte_crypto_sym_op structure.
431 				 */
432 				priv_data = (struct priv_op_data *) (ops[i]->sym + 1);
433 				priv_data->result = (void *)&ctx->res[tsc_idx];
434 				tsc_idx++;
435 			}
436 
437 			if (likely(ops_deqd))  {
438 				/*
439 				 * free crypto ops so they can be reused. We don't free
440 				 * the mbufs here as we don't want to reuse them as
441 				 * the crypto operation will change the data and cause
442 				 * failures.
443 				 */
444 				for (i = 0; i < ops_deqd; i++)
445 					store_timestamp(ops_processed[i], tsc_end);
446 
447 				rte_mempool_put_bulk(ctx->crypto_op_pool,
448 						(void **)ops_processed, ops_deqd);
449 
450 				deqd_tot += ops_deqd;
451 				deqd_max = max(ops_deqd, deqd_max);
452 				deqd_min = min(ops_deqd, deqd_min);
453 			}
454 
455 			enqd_tot += ops_enqd;
456 			enqd_max = max(ops_enqd, enqd_max);
457 			enqd_min = min(ops_enqd, enqd_min);
458 
459 			m_idx += ops_enqd;
460 			m_idx = m_idx + test_burst_size > ctx->options->pool_sz ?
461 					0 : m_idx;
462 			b_idx++;
463 		}
464 
465 		/* Dequeue any operations still in the crypto device */
466 		while (deqd_tot < ctx->options->total_ops) {
467 			/* Sending 0 length burst to flush sw crypto device */
468 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
469 
470 			/* dequeue burst */
471 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
472 					ops_processed, test_burst_size);
473 
474 			tsc_end = rte_rdtsc_precise();
475 
476 			if (ops_deqd != 0) {
477 				for (i = 0; i < ops_deqd; i++)
478 					store_timestamp(ops_processed[i], tsc_end);
479 
480 				rte_mempool_put_bulk(ctx->crypto_op_pool,
481 						(void **)ops_processed, ops_deqd);
482 
483 				deqd_tot += ops_deqd;
484 				deqd_max = max(ops_deqd, deqd_max);
485 				deqd_min = min(ops_deqd, deqd_min);
486 			}
487 		}
488 
489 		for (i = 0; i < tsc_idx; i++) {
490 			tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
491 			tsc_max = max(tsc_val, tsc_max);
492 			tsc_min = min(tsc_val, tsc_min);
493 			tsc_tot += tsc_val;
494 		}
495 
496 		double time_tot, time_avg, time_max, time_min;
497 
498 		const uint64_t tunit = 1000000; /* us */
499 		const uint64_t tsc_hz = rte_get_tsc_hz();
500 
501 		uint64_t enqd_avg = enqd_tot / b_idx;
502 		uint64_t deqd_avg = deqd_tot / b_idx;
503 		uint64_t tsc_avg = tsc_tot / tsc_idx;
504 
505 		time_tot = tunit*(double)(tsc_tot) / tsc_hz;
506 		time_avg = tunit*(double)(tsc_avg) / tsc_hz;
507 		time_max = tunit*(double)(tsc_max) / tsc_hz;
508 		time_min = tunit*(double)(tsc_min) / tsc_hz;
509 
510 		if (ctx->options->csv) {
511 			if (!only_once)
512 				printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
513 						"Packet Size, cycles, time (us)");
514 
515 			for (i = 0; i < ctx->options->total_ops; i++) {
516 
517 				printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f",
518 					ctx->lcore_id, ctx->options->test_buffer_size,
519 					test_burst_size, i + 1,
520 					ctx->res[i].tsc_end - ctx->res[i].tsc_start,
521 					tunit * (double) (ctx->res[i].tsc_end
522 							- ctx->res[i].tsc_start)
523 						/ tsc_hz);
524 
525 			}
526 			only_once = 1;
527 		} else {
528 			printf("\n# Device %d on lcore %u\n", ctx->dev_id,
529 				ctx->lcore_id);
530 			printf("\n# total operations: %u", ctx->options->total_ops);
531 			printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
532 			printf("\n# Burst size: %u", test_burst_size);
533 			printf("\n#     Number of bursts: %"PRIu64,
534 					b_idx);
535 
536 			printf("\n#");
537 			printf("\n#          \t       Total\t   Average\t   "
538 					"Maximum\t   Minimum");
539 			printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
540 					"%10"PRIu64"\t%10"PRIu64, enqd_tot,
541 					enqd_avg, enqd_max, enqd_min);
542 			printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
543 					"%10"PRIu64"\t%10"PRIu64, deqd_tot,
544 					deqd_avg, deqd_max, deqd_min);
545 			printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
546 					"%10"PRIu64"\t%10"PRIu64, tsc_tot,
547 					tsc_avg, tsc_max, tsc_min);
548 			printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
549 					time_tot, time_avg, time_max, time_min);
550 			printf("\n\n");
551 
552 		}
553 
554 		/* Get next size from range or list */
555 		if (ctx->options->inc_burst_size != 0)
556 			test_burst_size += ctx->options->inc_burst_size;
557 		else {
558 			if (++burst_size_idx == ctx->options->burst_size_count)
559 				break;
560 			test_burst_size =
561 				ctx->options->burst_size_list[burst_size_idx];
562 		}
563 	}
564 
565 	return 0;
566 }
567 
568 void
569 cperf_latency_test_destructor(void *arg)
570 {
571 	struct cperf_latency_ctx *ctx = arg;
572 
573 	if (ctx == NULL)
574 		return;
575 
576 	cperf_latency_test_free(ctx, ctx->options->pool_sz);
577 
578 }
579