xref: /dpdk/app/test-crypto-perf/cperf_test_latency.c (revision 253624f46c9d34e6970ffa0dd709bb30399547fd)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40 #include "cperf_test_common.h"
41 
42 struct cperf_op_result {
43 	uint64_t tsc_start;
44 	uint64_t tsc_end;
45 	enum rte_crypto_op_status status;
46 };
47 
48 struct cperf_latency_ctx {
49 	uint8_t dev_id;
50 	uint16_t qp_id;
51 	uint8_t lcore_id;
52 
53 	struct rte_mempool *pkt_mbuf_pool_in;
54 	struct rte_mempool *pkt_mbuf_pool_out;
55 	struct rte_mbuf **mbufs_in;
56 	struct rte_mbuf **mbufs_out;
57 
58 	struct rte_mempool *crypto_op_pool;
59 
60 	struct rte_cryptodev_sym_session *sess;
61 
62 	cperf_populate_ops_t populate_ops;
63 
64 	const struct cperf_options *options;
65 	const struct cperf_test_vector *test_vector;
66 	struct cperf_op_result *res;
67 };
68 
69 struct priv_op_data {
70 	struct cperf_op_result *result;
71 };
72 
73 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
74 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
75 
76 static void
77 cperf_latency_test_free(struct cperf_latency_ctx *ctx)
78 {
79 	if (ctx) {
80 		if (ctx->sess) {
81 			rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
82 			rte_cryptodev_sym_session_free(ctx->sess);
83 		}
84 
85 		cperf_free_common_memory(ctx->options,
86 				ctx->pkt_mbuf_pool_in,
87 				ctx->pkt_mbuf_pool_out,
88 				ctx->mbufs_in, ctx->mbufs_out,
89 				ctx->crypto_op_pool);
90 
91 		rte_free(ctx->res);
92 		rte_free(ctx);
93 	}
94 }
95 
96 void *
97 cperf_latency_test_constructor(struct rte_mempool *sess_mp,
98 		uint8_t dev_id, uint16_t qp_id,
99 		const struct cperf_options *options,
100 		const struct cperf_test_vector *test_vector,
101 		const struct cperf_op_fns *op_fns)
102 {
103 	struct cperf_latency_ctx *ctx = NULL;
104 	size_t extra_op_priv_size = sizeof(struct priv_op_data);
105 
106 	ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
107 	if (ctx == NULL)
108 		goto err;
109 
110 	ctx->dev_id = dev_id;
111 	ctx->qp_id = qp_id;
112 
113 	ctx->populate_ops = op_fns->populate_ops;
114 	ctx->options = options;
115 	ctx->test_vector = test_vector;
116 
117 	/* IV goes at the end of the crypto operation */
118 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
119 		sizeof(struct rte_crypto_sym_op) +
120 		sizeof(struct cperf_op_result *);
121 
122 	ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector,
123 			iv_offset);
124 	if (ctx->sess == NULL)
125 		goto err;
126 
127 	if (cperf_alloc_common_memory(options, test_vector, dev_id,
128 			extra_op_priv_size,
129 			&ctx->pkt_mbuf_pool_in, &ctx->pkt_mbuf_pool_out,
130 			&ctx->mbufs_in, &ctx->mbufs_out,
131 			&ctx->crypto_op_pool) < 0)
132 		goto err;
133 
134 	ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
135 			ctx->options->total_ops, 0);
136 
137 	if (ctx->res == NULL)
138 		goto err;
139 
140 	return ctx;
141 err:
142 	cperf_latency_test_free(ctx);
143 
144 	return NULL;
145 }
146 
147 static inline void
148 store_timestamp(struct rte_crypto_op *op, uint64_t timestamp)
149 {
150 	struct priv_op_data *priv_data;
151 
152 	priv_data = (struct priv_op_data *) (op->sym + 1);
153 	priv_data->result->status = op->status;
154 	priv_data->result->tsc_end = timestamp;
155 }
156 
157 int
158 cperf_latency_test_runner(void *arg)
159 {
160 	struct cperf_latency_ctx *ctx = arg;
161 	uint16_t test_burst_size;
162 	uint8_t burst_size_idx = 0;
163 
164 	static int only_once;
165 
166 	if (ctx == NULL)
167 		return 0;
168 
169 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
170 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
171 	uint64_t i;
172 	struct priv_op_data *priv_data;
173 
174 	uint32_t lcore = rte_lcore_id();
175 
176 #ifdef CPERF_LINEARIZATION_ENABLE
177 	struct rte_cryptodev_info dev_info;
178 	int linearize = 0;
179 
180 	/* Check if source mbufs require coalescing */
181 	if (ctx->options->segments_nb > 1) {
182 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
183 		if ((dev_info.feature_flags &
184 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
185 			linearize = 1;
186 	}
187 #endif /* CPERF_LINEARIZATION_ENABLE */
188 
189 	ctx->lcore_id = lcore;
190 
191 	/* Warm up the host CPU before starting the test */
192 	for (i = 0; i < ctx->options->total_ops; i++)
193 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
194 
195 	/* Get first size from range or list */
196 	if (ctx->options->inc_burst_size != 0)
197 		test_burst_size = ctx->options->min_burst_size;
198 	else
199 		test_burst_size = ctx->options->burst_size_list[0];
200 
201 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
202 		sizeof(struct rte_crypto_sym_op) +
203 		sizeof(struct cperf_op_result *);
204 
205 	while (test_burst_size <= ctx->options->max_burst_size) {
206 		uint64_t ops_enqd = 0, ops_deqd = 0;
207 		uint64_t m_idx = 0, b_idx = 0;
208 
209 		uint64_t tsc_val, tsc_end, tsc_start;
210 		uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
211 		uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
212 		uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
213 
214 		while (enqd_tot < ctx->options->total_ops) {
215 
216 			uint16_t burst_size = ((enqd_tot + test_burst_size)
217 					<= ctx->options->total_ops) ?
218 							test_burst_size :
219 							ctx->options->total_ops -
220 							enqd_tot;
221 
222 			/* Allocate crypto ops from pool */
223 			if (burst_size != rte_crypto_op_bulk_alloc(
224 					ctx->crypto_op_pool,
225 					RTE_CRYPTO_OP_TYPE_SYMMETRIC,
226 					ops, burst_size)) {
227 				RTE_LOG(ERR, USER1,
228 					"Failed to allocate more crypto operations "
229 					"from the the crypto operation pool.\n"
230 					"Consider increasing the pool size "
231 					"with --pool-sz\n");
232 				return -1;
233 			}
234 
235 			/* Setup crypto op, attach mbuf etc */
236 			(ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
237 					&ctx->mbufs_out[m_idx],
238 					burst_size, ctx->sess, ctx->options,
239 					ctx->test_vector, iv_offset);
240 
241 			tsc_start = rte_rdtsc_precise();
242 
243 #ifdef CPERF_LINEARIZATION_ENABLE
244 			if (linearize) {
245 				/* PMD doesn't support scatter-gather and source buffer
246 				 * is segmented.
247 				 * We need to linearize it before enqueuing.
248 				 */
249 				for (i = 0; i < burst_size; i++)
250 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
251 			}
252 #endif /* CPERF_LINEARIZATION_ENABLE */
253 
254 			/* Enqueue burst of ops on crypto device */
255 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
256 					ops, burst_size);
257 
258 			/* Dequeue processed burst of ops from crypto device */
259 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
260 					ops_processed, test_burst_size);
261 
262 			tsc_end = rte_rdtsc_precise();
263 
264 			/* Free memory for not enqueued operations */
265 			if (ops_enqd != burst_size)
266 				rte_mempool_put_bulk(ctx->crypto_op_pool,
267 						(void **)&ops[ops_enqd],
268 						burst_size - ops_enqd);
269 
270 			for (i = 0; i < ops_enqd; i++) {
271 				ctx->res[tsc_idx].tsc_start = tsc_start;
272 				/*
273 				 * Private data structure starts after the end of the
274 				 * rte_crypto_sym_op structure.
275 				 */
276 				priv_data = (struct priv_op_data *) (ops[i]->sym + 1);
277 				priv_data->result = (void *)&ctx->res[tsc_idx];
278 				tsc_idx++;
279 			}
280 
281 			if (likely(ops_deqd))  {
282 				/*
283 				 * free crypto ops so they can be reused. We don't free
284 				 * the mbufs here as we don't want to reuse them as
285 				 * the crypto operation will change the data and cause
286 				 * failures.
287 				 */
288 				for (i = 0; i < ops_deqd; i++)
289 					store_timestamp(ops_processed[i], tsc_end);
290 
291 				rte_mempool_put_bulk(ctx->crypto_op_pool,
292 						(void **)ops_processed, ops_deqd);
293 
294 				deqd_tot += ops_deqd;
295 				deqd_max = max(ops_deqd, deqd_max);
296 				deqd_min = min(ops_deqd, deqd_min);
297 			}
298 
299 			enqd_tot += ops_enqd;
300 			enqd_max = max(ops_enqd, enqd_max);
301 			enqd_min = min(ops_enqd, enqd_min);
302 
303 			m_idx += ops_enqd;
304 			m_idx = m_idx + test_burst_size > ctx->options->pool_sz ?
305 					0 : m_idx;
306 			b_idx++;
307 		}
308 
309 		/* Dequeue any operations still in the crypto device */
310 		while (deqd_tot < ctx->options->total_ops) {
311 			/* Sending 0 length burst to flush sw crypto device */
312 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
313 
314 			/* dequeue burst */
315 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
316 					ops_processed, test_burst_size);
317 
318 			tsc_end = rte_rdtsc_precise();
319 
320 			if (ops_deqd != 0) {
321 				for (i = 0; i < ops_deqd; i++)
322 					store_timestamp(ops_processed[i], tsc_end);
323 
324 				rte_mempool_put_bulk(ctx->crypto_op_pool,
325 						(void **)ops_processed, ops_deqd);
326 
327 				deqd_tot += ops_deqd;
328 				deqd_max = max(ops_deqd, deqd_max);
329 				deqd_min = min(ops_deqd, deqd_min);
330 			}
331 		}
332 
333 		for (i = 0; i < tsc_idx; i++) {
334 			tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
335 			tsc_max = max(tsc_val, tsc_max);
336 			tsc_min = min(tsc_val, tsc_min);
337 			tsc_tot += tsc_val;
338 		}
339 
340 		double time_tot, time_avg, time_max, time_min;
341 
342 		const uint64_t tunit = 1000000; /* us */
343 		const uint64_t tsc_hz = rte_get_tsc_hz();
344 
345 		uint64_t enqd_avg = enqd_tot / b_idx;
346 		uint64_t deqd_avg = deqd_tot / b_idx;
347 		uint64_t tsc_avg = tsc_tot / tsc_idx;
348 
349 		time_tot = tunit*(double)(tsc_tot) / tsc_hz;
350 		time_avg = tunit*(double)(tsc_avg) / tsc_hz;
351 		time_max = tunit*(double)(tsc_max) / tsc_hz;
352 		time_min = tunit*(double)(tsc_min) / tsc_hz;
353 
354 		if (ctx->options->csv) {
355 			if (!only_once)
356 				printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
357 						"Packet Size, cycles, time (us)");
358 
359 			for (i = 0; i < ctx->options->total_ops; i++) {
360 
361 				printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f",
362 					ctx->lcore_id, ctx->options->test_buffer_size,
363 					test_burst_size, i + 1,
364 					ctx->res[i].tsc_end - ctx->res[i].tsc_start,
365 					tunit * (double) (ctx->res[i].tsc_end
366 							- ctx->res[i].tsc_start)
367 						/ tsc_hz);
368 
369 			}
370 			only_once = 1;
371 		} else {
372 			printf("\n# Device %d on lcore %u\n", ctx->dev_id,
373 				ctx->lcore_id);
374 			printf("\n# total operations: %u", ctx->options->total_ops);
375 			printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
376 			printf("\n# Burst size: %u", test_burst_size);
377 			printf("\n#     Number of bursts: %"PRIu64,
378 					b_idx);
379 
380 			printf("\n#");
381 			printf("\n#          \t       Total\t   Average\t   "
382 					"Maximum\t   Minimum");
383 			printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
384 					"%10"PRIu64"\t%10"PRIu64, enqd_tot,
385 					enqd_avg, enqd_max, enqd_min);
386 			printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
387 					"%10"PRIu64"\t%10"PRIu64, deqd_tot,
388 					deqd_avg, deqd_max, deqd_min);
389 			printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
390 					"%10"PRIu64"\t%10"PRIu64, tsc_tot,
391 					tsc_avg, tsc_max, tsc_min);
392 			printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
393 					time_tot, time_avg, time_max, time_min);
394 			printf("\n\n");
395 
396 		}
397 
398 		/* Get next size from range or list */
399 		if (ctx->options->inc_burst_size != 0)
400 			test_burst_size += ctx->options->inc_burst_size;
401 		else {
402 			if (++burst_size_idx == ctx->options->burst_size_count)
403 				break;
404 			test_burst_size =
405 				ctx->options->burst_size_list[burst_size_idx];
406 		}
407 	}
408 
409 	return 0;
410 }
411 
412 void
413 cperf_latency_test_destructor(void *arg)
414 {
415 	struct cperf_latency_ctx *ctx = arg;
416 
417 	if (ctx == NULL)
418 		return;
419 
420 	rte_cryptodev_stop(ctx->dev_id);
421 
422 	cperf_latency_test_free(ctx);
423 }
424