xref: /dpdk/app/test-crypto-perf/cperf_test_throughput.c (revision 253624f46c9d34e6970ffa0dd709bb30399547fd)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_throughput.h"
39 #include "cperf_ops.h"
40 #include "cperf_test_common.h"
41 
42 struct cperf_throughput_ctx {
43 	uint8_t dev_id;
44 	uint16_t qp_id;
45 	uint8_t lcore_id;
46 
47 	struct rte_mempool *pkt_mbuf_pool_in;
48 	struct rte_mempool *pkt_mbuf_pool_out;
49 	struct rte_mbuf **mbufs_in;
50 	struct rte_mbuf **mbufs_out;
51 
52 	struct rte_mempool *crypto_op_pool;
53 
54 	struct rte_cryptodev_sym_session *sess;
55 
56 	cperf_populate_ops_t populate_ops;
57 
58 	const struct cperf_options *options;
59 	const struct cperf_test_vector *test_vector;
60 };
61 
62 static void
63 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx)
64 {
65 	if (ctx) {
66 		if (ctx->sess) {
67 			rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
68 			rte_cryptodev_sym_session_free(ctx->sess);
69 		}
70 
71 		cperf_free_common_memory(ctx->options,
72 				ctx->pkt_mbuf_pool_in,
73 				ctx->pkt_mbuf_pool_out,
74 				ctx->mbufs_in, ctx->mbufs_out,
75 				ctx->crypto_op_pool);
76 
77 		rte_free(ctx);
78 	}
79 }
80 
81 void *
82 cperf_throughput_test_constructor(struct rte_mempool *sess_mp,
83 		uint8_t dev_id, uint16_t qp_id,
84 		const struct cperf_options *options,
85 		const struct cperf_test_vector *test_vector,
86 		const struct cperf_op_fns *op_fns)
87 {
88 	struct cperf_throughput_ctx *ctx = NULL;
89 
90 	ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0);
91 	if (ctx == NULL)
92 		goto err;
93 
94 	ctx->dev_id = dev_id;
95 	ctx->qp_id = qp_id;
96 
97 	ctx->populate_ops = op_fns->populate_ops;
98 	ctx->options = options;
99 	ctx->test_vector = test_vector;
100 
101 	/* IV goes at the end of the crypto operation */
102 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
103 		sizeof(struct rte_crypto_sym_op);
104 
105 	ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector,
106 					iv_offset);
107 	if (ctx->sess == NULL)
108 		goto err;
109 
110 	if (cperf_alloc_common_memory(options, test_vector, dev_id, 0,
111 			&ctx->pkt_mbuf_pool_in, &ctx->pkt_mbuf_pool_out,
112 			&ctx->mbufs_in, &ctx->mbufs_out,
113 			&ctx->crypto_op_pool) < 0)
114 		goto err;
115 
116 	return ctx;
117 err:
118 	cperf_throughput_test_free(ctx);
119 
120 	return NULL;
121 }
122 
123 int
124 cperf_throughput_test_runner(void *test_ctx)
125 {
126 	struct cperf_throughput_ctx *ctx = test_ctx;
127 	uint16_t test_burst_size;
128 	uint8_t burst_size_idx = 0;
129 
130 	static int only_once;
131 
132 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
133 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
134 	uint64_t i;
135 
136 	uint32_t lcore = rte_lcore_id();
137 
138 #ifdef CPERF_LINEARIZATION_ENABLE
139 	struct rte_cryptodev_info dev_info;
140 	int linearize = 0;
141 
142 	/* Check if source mbufs require coalescing */
143 	if (ctx->options->segments_nb > 1) {
144 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
145 		if ((dev_info.feature_flags &
146 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
147 			linearize = 1;
148 	}
149 #endif /* CPERF_LINEARIZATION_ENABLE */
150 
151 	ctx->lcore_id = lcore;
152 
153 	/* Warm up the host CPU before starting the test */
154 	for (i = 0; i < ctx->options->total_ops; i++)
155 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
156 
157 	/* Get first size from range or list */
158 	if (ctx->options->inc_burst_size != 0)
159 		test_burst_size = ctx->options->min_burst_size;
160 	else
161 		test_burst_size = ctx->options->burst_size_list[0];
162 
163 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
164 		sizeof(struct rte_crypto_sym_op);
165 
166 	while (test_burst_size <= ctx->options->max_burst_size) {
167 		uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0;
168 		uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
169 
170 		uint64_t m_idx = 0, tsc_start, tsc_end, tsc_duration;
171 
172 		uint16_t ops_unused = 0;
173 
174 		tsc_start = rte_rdtsc_precise();
175 
176 		while (ops_enqd_total < ctx->options->total_ops) {
177 
178 			uint16_t burst_size = ((ops_enqd_total + test_burst_size)
179 					<= ctx->options->total_ops) ?
180 							test_burst_size :
181 							ctx->options->total_ops -
182 							ops_enqd_total;
183 
184 			uint16_t ops_needed = burst_size - ops_unused;
185 
186 			/* Allocate crypto ops from pool */
187 			if (ops_needed != rte_crypto_op_bulk_alloc(
188 					ctx->crypto_op_pool,
189 					RTE_CRYPTO_OP_TYPE_SYMMETRIC,
190 					ops, ops_needed)) {
191 				RTE_LOG(ERR, USER1,
192 					"Failed to allocate more crypto operations "
193 					"from the the crypto operation pool.\n"
194 					"Consider increasing the pool size "
195 					"with --pool-sz\n");
196 				return -1;
197 			}
198 
199 			/* Setup crypto op, attach mbuf etc */
200 			(ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx],
201 					&ctx->mbufs_out[m_idx],
202 					ops_needed, ctx->sess, ctx->options,
203 					ctx->test_vector, iv_offset);
204 
205 			/**
206 			 * When ops_needed is smaller than ops_enqd, the
207 			 * unused ops need to be moved to the front for
208 			 * next round use.
209 			 */
210 			if (unlikely(ops_enqd > ops_needed)) {
211 				size_t nb_b_to_mov = ops_unused * sizeof(
212 						struct rte_crypto_op *);
213 
214 				memmove(&ops[ops_needed], &ops[ops_enqd],
215 					nb_b_to_mov);
216 			}
217 
218 #ifdef CPERF_LINEARIZATION_ENABLE
219 			if (linearize) {
220 				/* PMD doesn't support scatter-gather and source buffer
221 				 * is segmented.
222 				 * We need to linearize it before enqueuing.
223 				 */
224 				for (i = 0; i < burst_size; i++)
225 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
226 			}
227 #endif /* CPERF_LINEARIZATION_ENABLE */
228 
229 			/* Enqueue burst of ops on crypto device */
230 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
231 					ops, burst_size);
232 			if (ops_enqd < burst_size)
233 				ops_enqd_failed++;
234 
235 			/**
236 			 * Calculate number of ops not enqueued (mainly for hw
237 			 * accelerators whose ingress queue can fill up).
238 			 */
239 			ops_unused = burst_size - ops_enqd;
240 			ops_enqd_total += ops_enqd;
241 
242 
243 			/* Dequeue processed burst of ops from crypto device */
244 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
245 					ops_processed, test_burst_size);
246 
247 			if (likely(ops_deqd))  {
248 				/* free crypto ops so they can be reused. We don't free
249 				 * the mbufs here as we don't want to reuse them as
250 				 * the crypto operation will change the data and cause
251 				 * failures.
252 				 */
253 				rte_mempool_put_bulk(ctx->crypto_op_pool,
254 						(void **)ops_processed, ops_deqd);
255 
256 				ops_deqd_total += ops_deqd;
257 			} else {
258 				/**
259 				 * Count dequeue polls which didn't return any
260 				 * processed operations. This statistic is mainly
261 				 * relevant to hw accelerators.
262 				 */
263 				ops_deqd_failed++;
264 			}
265 
266 			m_idx += ops_needed;
267 			m_idx = m_idx + test_burst_size > ctx->options->pool_sz ?
268 					0 : m_idx;
269 		}
270 
271 		/* Dequeue any operations still in the crypto device */
272 
273 		while (ops_deqd_total < ctx->options->total_ops) {
274 			/* Sending 0 length burst to flush sw crypto device */
275 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
276 
277 			/* dequeue burst */
278 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
279 					ops_processed, test_burst_size);
280 			if (ops_deqd == 0)
281 				ops_deqd_failed++;
282 			else {
283 				rte_mempool_put_bulk(ctx->crypto_op_pool,
284 						(void **)ops_processed, ops_deqd);
285 
286 				ops_deqd_total += ops_deqd;
287 			}
288 		}
289 
290 		tsc_end = rte_rdtsc_precise();
291 		tsc_duration = (tsc_end - tsc_start);
292 
293 		/* Calculate average operations processed per second */
294 		double ops_per_second = ((double)ctx->options->total_ops /
295 				tsc_duration) * rte_get_tsc_hz();
296 
297 		/* Calculate average throughput (Gbps) in bits per second */
298 		double throughput_gbps = ((ops_per_second *
299 				ctx->options->test_buffer_size * 8) / 1000000000);
300 
301 		/* Calculate average cycles per packet */
302 		double cycles_per_packet = ((double)tsc_duration /
303 				ctx->options->total_ops);
304 
305 		if (!ctx->options->csv) {
306 			if (!only_once)
307 				printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
308 					"lcore id", "Buf Size", "Burst Size",
309 					"Enqueued", "Dequeued", "Failed Enq",
310 					"Failed Deq", "MOps", "Gbps",
311 					"Cycles/Buf");
312 			only_once = 1;
313 
314 			printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64
315 					"%12"PRIu64"%12.4f%12.4f%12.2f\n",
316 					ctx->lcore_id,
317 					ctx->options->test_buffer_size,
318 					test_burst_size,
319 					ops_enqd_total,
320 					ops_deqd_total,
321 					ops_enqd_failed,
322 					ops_deqd_failed,
323 					ops_per_second/1000000,
324 					throughput_gbps,
325 					cycles_per_packet);
326 		} else {
327 			if (!only_once)
328 				printf("#lcore id,Buffer Size(B),"
329 					"Burst Size,Enqueued,Dequeued,Failed Enq,"
330 					"Failed Deq,Ops(Millions),Throughput(Gbps),"
331 					"Cycles/Buf\n\n");
332 			only_once = 1;
333 
334 			printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
335 					"%.3f;%.3f;%.3f\n",
336 					ctx->lcore_id,
337 					ctx->options->test_buffer_size,
338 					test_burst_size,
339 					ops_enqd_total,
340 					ops_deqd_total,
341 					ops_enqd_failed,
342 					ops_deqd_failed,
343 					ops_per_second/1000000,
344 					throughput_gbps,
345 					cycles_per_packet);
346 		}
347 
348 		/* Get next size from range or list */
349 		if (ctx->options->inc_burst_size != 0)
350 			test_burst_size += ctx->options->inc_burst_size;
351 		else {
352 			if (++burst_size_idx == ctx->options->burst_size_count)
353 				break;
354 			test_burst_size = ctx->options->burst_size_list[burst_size_idx];
355 		}
356 
357 	}
358 
359 	return 0;
360 }
361 
362 
363 void
364 cperf_throughput_test_destructor(void *arg)
365 {
366 	struct cperf_throughput_ctx *ctx = arg;
367 
368 	if (ctx == NULL)
369 		return;
370 
371 	rte_cryptodev_stop(ctx->dev_id);
372 
373 	cperf_throughput_test_free(ctx);
374 }
375