xref: /dpdk/app/test-crypto-perf/cperf_test_throughput.c (revision 1a4998dc4d9446c58e1813bb05b92b572edb381e)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37 
38 #include "cperf_test_throughput.h"
39 #include "cperf_ops.h"
40 #include "cperf_test_common.h"
41 
42 struct cperf_throughput_ctx {
43 	uint8_t dev_id;
44 	uint16_t qp_id;
45 	uint8_t lcore_id;
46 
47 	struct rte_mempool *pool;
48 
49 	struct rte_cryptodev_sym_session *sess;
50 
51 	cperf_populate_ops_t populate_ops;
52 
53 	uint32_t src_buf_offset;
54 	uint32_t dst_buf_offset;
55 
56 	const struct cperf_options *options;
57 	const struct cperf_test_vector *test_vector;
58 };
59 
60 static void
61 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx)
62 {
63 	if (ctx) {
64 		if (ctx->sess) {
65 			rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
66 			rte_cryptodev_sym_session_free(ctx->sess);
67 		}
68 
69 		if (ctx->pool)
70 			rte_mempool_free(ctx->pool);
71 
72 		rte_free(ctx);
73 	}
74 }
75 
76 void *
77 cperf_throughput_test_constructor(struct rte_mempool *sess_mp,
78 		uint8_t dev_id, uint16_t qp_id,
79 		const struct cperf_options *options,
80 		const struct cperf_test_vector *test_vector,
81 		const struct cperf_op_fns *op_fns)
82 {
83 	struct cperf_throughput_ctx *ctx = NULL;
84 
85 	ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0);
86 	if (ctx == NULL)
87 		goto err;
88 
89 	ctx->dev_id = dev_id;
90 	ctx->qp_id = qp_id;
91 
92 	ctx->populate_ops = op_fns->populate_ops;
93 	ctx->options = options;
94 	ctx->test_vector = test_vector;
95 
96 	/* IV goes at the end of the crypto operation */
97 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
98 		sizeof(struct rte_crypto_sym_op);
99 
100 	ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector,
101 					iv_offset);
102 	if (ctx->sess == NULL)
103 		goto err;
104 
105 	if (cperf_alloc_common_memory(options, test_vector, dev_id, qp_id, 0,
106 			&ctx->src_buf_offset, &ctx->dst_buf_offset,
107 			&ctx->pool) < 0)
108 		goto err;
109 
110 	return ctx;
111 err:
112 	cperf_throughput_test_free(ctx);
113 
114 	return NULL;
115 }
116 
117 int
118 cperf_throughput_test_runner(void *test_ctx)
119 {
120 	struct cperf_throughput_ctx *ctx = test_ctx;
121 	uint16_t test_burst_size;
122 	uint8_t burst_size_idx = 0;
123 
124 	static int only_once;
125 
126 	struct rte_crypto_op *ops[ctx->options->max_burst_size];
127 	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
128 	uint64_t i;
129 
130 	uint32_t lcore = rte_lcore_id();
131 
132 #ifdef CPERF_LINEARIZATION_ENABLE
133 	struct rte_cryptodev_info dev_info;
134 	int linearize = 0;
135 
136 	/* Check if source mbufs require coalescing */
137 	if (ctx->options->segment_sz < ctx->options->max_buffer_size) {
138 		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
139 		if ((dev_info.feature_flags &
140 				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
141 			linearize = 1;
142 	}
143 #endif /* CPERF_LINEARIZATION_ENABLE */
144 
145 	ctx->lcore_id = lcore;
146 
147 	/* Warm up the host CPU before starting the test */
148 	for (i = 0; i < ctx->options->total_ops; i++)
149 		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
150 
151 	/* Get first size from range or list */
152 	if (ctx->options->inc_burst_size != 0)
153 		test_burst_size = ctx->options->min_burst_size;
154 	else
155 		test_burst_size = ctx->options->burst_size_list[0];
156 
157 	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
158 		sizeof(struct rte_crypto_sym_op);
159 
160 	while (test_burst_size <= ctx->options->max_burst_size) {
161 		uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0;
162 		uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
163 
164 		uint64_t tsc_start, tsc_end, tsc_duration;
165 
166 		uint16_t ops_unused = 0;
167 
168 		tsc_start = rte_rdtsc_precise();
169 
170 		while (ops_enqd_total < ctx->options->total_ops) {
171 
172 			uint16_t burst_size = ((ops_enqd_total + test_burst_size)
173 					<= ctx->options->total_ops) ?
174 							test_burst_size :
175 							ctx->options->total_ops -
176 							ops_enqd_total;
177 
178 			uint16_t ops_needed = burst_size - ops_unused;
179 
180 			/* Allocate objects containing crypto operations and mbufs */
181 			if (rte_mempool_get_bulk(ctx->pool, (void **)ops,
182 						ops_needed) != 0) {
183 				RTE_LOG(ERR, USER1,
184 					"Failed to allocate more crypto operations "
185 					"from the the crypto operation pool.\n"
186 					"Consider increasing the pool size "
187 					"with --pool-sz\n");
188 				return -1;
189 			}
190 
191 			/* Setup crypto op, attach mbuf etc */
192 			(ctx->populate_ops)(ops, ctx->src_buf_offset,
193 					ctx->dst_buf_offset,
194 					ops_needed, ctx->sess,
195 					ctx->options, ctx->test_vector,
196 					iv_offset);
197 
198 			/**
199 			 * When ops_needed is smaller than ops_enqd, the
200 			 * unused ops need to be moved to the front for
201 			 * next round use.
202 			 */
203 			if (unlikely(ops_enqd > ops_needed)) {
204 				size_t nb_b_to_mov = ops_unused * sizeof(
205 						struct rte_crypto_op *);
206 
207 				memmove(&ops[ops_needed], &ops[ops_enqd],
208 					nb_b_to_mov);
209 			}
210 
211 #ifdef CPERF_LINEARIZATION_ENABLE
212 			if (linearize) {
213 				/* PMD doesn't support scatter-gather and source buffer
214 				 * is segmented.
215 				 * We need to linearize it before enqueuing.
216 				 */
217 				for (i = 0; i < burst_size; i++)
218 					rte_pktmbuf_linearize(ops[i]->sym->m_src);
219 			}
220 #endif /* CPERF_LINEARIZATION_ENABLE */
221 
222 			/* Enqueue burst of ops on crypto device */
223 			ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
224 					ops, burst_size);
225 			if (ops_enqd < burst_size)
226 				ops_enqd_failed++;
227 
228 			/**
229 			 * Calculate number of ops not enqueued (mainly for hw
230 			 * accelerators whose ingress queue can fill up).
231 			 */
232 			ops_unused = burst_size - ops_enqd;
233 			ops_enqd_total += ops_enqd;
234 
235 
236 			/* Dequeue processed burst of ops from crypto device */
237 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
238 					ops_processed, test_burst_size);
239 
240 			if (likely(ops_deqd))  {
241 				/* Free crypto ops so they can be reused. */
242 				rte_mempool_put_bulk(ctx->pool,
243 						(void **)ops_processed, ops_deqd);
244 
245 				ops_deqd_total += ops_deqd;
246 			} else {
247 				/**
248 				 * Count dequeue polls which didn't return any
249 				 * processed operations. This statistic is mainly
250 				 * relevant to hw accelerators.
251 				 */
252 				ops_deqd_failed++;
253 			}
254 
255 		}
256 
257 		/* Dequeue any operations still in the crypto device */
258 
259 		while (ops_deqd_total < ctx->options->total_ops) {
260 			/* Sending 0 length burst to flush sw crypto device */
261 			rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
262 
263 			/* dequeue burst */
264 			ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
265 					ops_processed, test_burst_size);
266 			if (ops_deqd == 0)
267 				ops_deqd_failed++;
268 			else {
269 				rte_mempool_put_bulk(ctx->pool,
270 						(void **)ops_processed, ops_deqd);
271 				ops_deqd_total += ops_deqd;
272 			}
273 		}
274 
275 		tsc_end = rte_rdtsc_precise();
276 		tsc_duration = (tsc_end - tsc_start);
277 
278 		/* Calculate average operations processed per second */
279 		double ops_per_second = ((double)ctx->options->total_ops /
280 				tsc_duration) * rte_get_tsc_hz();
281 
282 		/* Calculate average throughput (Gbps) in bits per second */
283 		double throughput_gbps = ((ops_per_second *
284 				ctx->options->test_buffer_size * 8) / 1000000000);
285 
286 		/* Calculate average cycles per packet */
287 		double cycles_per_packet = ((double)tsc_duration /
288 				ctx->options->total_ops);
289 
290 		if (!ctx->options->csv) {
291 			if (!only_once)
292 				printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
293 					"lcore id", "Buf Size", "Burst Size",
294 					"Enqueued", "Dequeued", "Failed Enq",
295 					"Failed Deq", "MOps", "Gbps",
296 					"Cycles/Buf");
297 			only_once = 1;
298 
299 			printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64
300 					"%12"PRIu64"%12.4f%12.4f%12.2f\n",
301 					ctx->lcore_id,
302 					ctx->options->test_buffer_size,
303 					test_burst_size,
304 					ops_enqd_total,
305 					ops_deqd_total,
306 					ops_enqd_failed,
307 					ops_deqd_failed,
308 					ops_per_second/1000000,
309 					throughput_gbps,
310 					cycles_per_packet);
311 		} else {
312 			if (!only_once)
313 				printf("#lcore id,Buffer Size(B),"
314 					"Burst Size,Enqueued,Dequeued,Failed Enq,"
315 					"Failed Deq,Ops(Millions),Throughput(Gbps),"
316 					"Cycles/Buf\n\n");
317 			only_once = 1;
318 
319 			printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
320 					"%.3f;%.3f;%.3f\n",
321 					ctx->lcore_id,
322 					ctx->options->test_buffer_size,
323 					test_burst_size,
324 					ops_enqd_total,
325 					ops_deqd_total,
326 					ops_enqd_failed,
327 					ops_deqd_failed,
328 					ops_per_second/1000000,
329 					throughput_gbps,
330 					cycles_per_packet);
331 		}
332 
333 		/* Get next size from range or list */
334 		if (ctx->options->inc_burst_size != 0)
335 			test_burst_size += ctx->options->inc_burst_size;
336 		else {
337 			if (++burst_size_idx == ctx->options->burst_size_count)
338 				break;
339 			test_burst_size = ctx->options->burst_size_list[burst_size_idx];
340 		}
341 
342 	}
343 
344 	return 0;
345 }
346 
347 
348 void
349 cperf_throughput_test_destructor(void *arg)
350 {
351 	struct cperf_throughput_ctx *ctx = arg;
352 
353 	if (ctx == NULL)
354 		return;
355 
356 	cperf_throughput_test_free(ctx);
357 }
358