xref: /dpdk/app/test-compress-perf/comp_perf_test_throughput.c (revision f8dbaebbf1c9efcbb2e2354b341ed62175466a57)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <rte_malloc.h>
6 #include <rte_eal.h>
7 #include <rte_log.h>
8 #include <rte_cycles.h>
9 #include <rte_compressdev.h>
10 
11 #include "comp_perf_test_throughput.h"
12 
13 void
14 cperf_throughput_test_destructor(void *arg)
15 {
16 	if (arg) {
17 		comp_perf_free_memory(
18 			((struct cperf_benchmark_ctx *)arg)->ver.options,
19 			&((struct cperf_benchmark_ctx *)arg)->ver.mem);
20 		rte_free(arg);
21 	}
22 }
23 
24 void *
25 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
26 		struct comp_test_data *options)
27 {
28 	struct cperf_benchmark_ctx *ctx = NULL;
29 
30 	ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
31 
32 	if (ctx == NULL)
33 		return NULL;
34 
35 	ctx->ver.mem.dev_id = dev_id;
36 	ctx->ver.mem.qp_id = qp_id;
37 	ctx->ver.options = options;
38 	ctx->ver.silent = 1; /* ver. part will be silent */
39 
40 	if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
41 			&& !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
42 		return ctx;
43 
44 	cperf_throughput_test_destructor(ctx);
45 	return NULL;
46 }
47 
48 static int
49 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
50 {
51 	struct comp_test_data *test_data = ctx->ver.options;
52 	struct cperf_mem_resources *mem = &ctx->ver.mem;
53 	uint8_t dev_id = mem->dev_id;
54 	uint32_t i, iter, num_iter;
55 	struct rte_comp_op **ops, **deq_ops;
56 	void *priv_xform = NULL;
57 	struct rte_comp_xform xform;
58 	struct rte_mbuf **input_bufs, **output_bufs;
59 	int res = 0;
60 	int allocated = 0;
61 	uint32_t out_seg_sz;
62 
63 	if (test_data == NULL || !test_data->burst_sz) {
64 		RTE_LOG(ERR, USER1,
65 			"Unknown burst size\n");
66 		return -1;
67 	}
68 
69 	ops = rte_zmalloc_socket(NULL,
70 		2 * mem->total_bufs * sizeof(struct rte_comp_op *),
71 		0, rte_socket_id());
72 
73 	if (ops == NULL) {
74 		RTE_LOG(ERR, USER1,
75 			"Can't allocate memory for ops strucures\n");
76 		return -1;
77 	}
78 
79 	deq_ops = &ops[mem->total_bufs];
80 
81 	if (type == RTE_COMP_COMPRESS) {
82 		xform = (struct rte_comp_xform) {
83 			.type = RTE_COMP_COMPRESS,
84 			.compress = {
85 				.algo = RTE_COMP_ALGO_DEFLATE,
86 				.deflate.huffman = test_data->huffman_enc,
87 				.level = test_data->level,
88 				.window_size = test_data->window_sz,
89 				.chksum = RTE_COMP_CHECKSUM_NONE,
90 				.hash_algo = RTE_COMP_HASH_ALGO_NONE
91 			}
92 		};
93 		input_bufs = mem->decomp_bufs;
94 		output_bufs = mem->comp_bufs;
95 		out_seg_sz = test_data->out_seg_sz;
96 	} else {
97 		xform = (struct rte_comp_xform) {
98 			.type = RTE_COMP_DECOMPRESS,
99 			.decompress = {
100 				.algo = RTE_COMP_ALGO_DEFLATE,
101 				.chksum = RTE_COMP_CHECKSUM_NONE,
102 				.window_size = test_data->window_sz,
103 				.hash_algo = RTE_COMP_HASH_ALGO_NONE
104 			}
105 		};
106 		input_bufs = mem->comp_bufs;
107 		output_bufs = mem->decomp_bufs;
108 		out_seg_sz = test_data->seg_sz;
109 	}
110 
111 	/* Create private xform */
112 	if (rte_compressdev_private_xform_create(dev_id, &xform,
113 			&priv_xform) < 0) {
114 		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
115 		res = -1;
116 		goto end;
117 	}
118 
119 	uint64_t tsc_start, tsc_end, tsc_duration;
120 
121 	num_iter = test_data->num_iter;
122 	tsc_start = tsc_end = tsc_duration = 0;
123 	tsc_start = rte_rdtsc_precise();
124 
125 	for (iter = 0; iter < num_iter; iter++) {
126 		uint32_t total_ops = mem->total_bufs;
127 		uint32_t remaining_ops = mem->total_bufs;
128 		uint32_t total_deq_ops = 0;
129 		uint32_t total_enq_ops = 0;
130 		uint16_t ops_unused = 0;
131 		uint16_t num_enq = 0;
132 		uint16_t num_deq = 0;
133 
134 		while (remaining_ops > 0) {
135 			uint16_t num_ops = RTE_MIN(remaining_ops,
136 						   test_data->burst_sz);
137 			uint16_t ops_needed = num_ops - ops_unused;
138 
139 			/*
140 			 * Move the unused operations from the previous
141 			 * enqueue_burst call to the front, to maintain order
142 			 */
143 			if ((ops_unused > 0) && (num_enq > 0)) {
144 				size_t nb_b_to_mov =
145 				      ops_unused * sizeof(struct rte_comp_op *);
146 
147 				memmove(ops, &ops[num_enq], nb_b_to_mov);
148 			}
149 
150 			/* Allocate compression operations */
151 			if (ops_needed && !rte_comp_op_bulk_alloc(
152 						mem->op_pool,
153 						&ops[ops_unused],
154 						ops_needed)) {
155 				RTE_LOG(ERR, USER1,
156 				      "Could not allocate enough operations\n");
157 				res = -1;
158 				goto end;
159 			}
160 			allocated += ops_needed;
161 
162 			for (i = 0; i < ops_needed; i++) {
163 				/*
164 				 * Calculate next buffer to attach to operation
165 				 */
166 				uint32_t buf_id = total_enq_ops + i +
167 						ops_unused;
168 				uint16_t op_id = ops_unused + i;
169 				/* Reset all data in output buffers */
170 				struct rte_mbuf *m = output_bufs[buf_id];
171 
172 				m->pkt_len = out_seg_sz * m->nb_segs;
173 				while (m) {
174 					m->data_len = m->buf_len - m->data_off;
175 					m = m->next;
176 				}
177 				ops[op_id]->m_src = input_bufs[buf_id];
178 				ops[op_id]->m_dst = output_bufs[buf_id];
179 				ops[op_id]->src.offset = 0;
180 				ops[op_id]->src.length =
181 					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
182 				ops[op_id]->dst.offset = 0;
183 				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
184 				ops[op_id]->input_chksum = buf_id;
185 				ops[op_id]->private_xform = priv_xform;
186 			}
187 
188 			if (unlikely(test_data->perf_comp_force_stop))
189 				goto end;
190 
191 			num_enq = rte_compressdev_enqueue_burst(dev_id,
192 								mem->qp_id, ops,
193 								num_ops);
194 			if (num_enq == 0) {
195 				struct rte_compressdev_stats stats;
196 
197 				rte_compressdev_stats_get(dev_id, &stats);
198 				if (stats.enqueue_err_count) {
199 					res = -1;
200 					goto end;
201 				}
202 			}
203 
204 			ops_unused = num_ops - num_enq;
205 			remaining_ops -= num_enq;
206 			total_enq_ops += num_enq;
207 
208 			num_deq = rte_compressdev_dequeue_burst(dev_id,
209 							   mem->qp_id,
210 							   deq_ops,
211 							   test_data->burst_sz);
212 			total_deq_ops += num_deq;
213 
214 			if (iter == num_iter - 1) {
215 				for (i = 0; i < num_deq; i++) {
216 					struct rte_comp_op *op = deq_ops[i];
217 
218 					if (op->status !=
219 						RTE_COMP_OP_STATUS_SUCCESS) {
220 						RTE_LOG(ERR, USER1,
221 				       "Some operations were not successful\n");
222 						goto end;
223 					}
224 
225 					struct rte_mbuf *m = op->m_dst;
226 
227 					m->pkt_len = op->produced;
228 					uint32_t remaining_data = op->produced;
229 					uint16_t data_to_append;
230 
231 					while (remaining_data > 0) {
232 						data_to_append =
233 							RTE_MIN(remaining_data,
234 							     out_seg_sz);
235 						m->data_len = data_to_append;
236 						remaining_data -=
237 								data_to_append;
238 						m = m->next;
239 					}
240 				}
241 			}
242 			rte_mempool_put_bulk(mem->op_pool,
243 					     (void **)deq_ops, num_deq);
244 			allocated -= num_deq;
245 		}
246 
247 		/* Dequeue the last operations */
248 		while (total_deq_ops < total_ops) {
249 			if (unlikely(test_data->perf_comp_force_stop))
250 				goto end;
251 
252 			num_deq = rte_compressdev_dequeue_burst(dev_id,
253 							   mem->qp_id,
254 							   deq_ops,
255 							   test_data->burst_sz);
256 			if (num_deq == 0) {
257 				struct rte_compressdev_stats stats;
258 
259 				rte_compressdev_stats_get(dev_id, &stats);
260 				if (stats.dequeue_err_count) {
261 					res = -1;
262 					goto end;
263 				}
264 			}
265 
266 			total_deq_ops += num_deq;
267 
268 			if (iter == num_iter - 1) {
269 				for (i = 0; i < num_deq; i++) {
270 					struct rte_comp_op *op = deq_ops[i];
271 
272 					if (op->status !=
273 						RTE_COMP_OP_STATUS_SUCCESS) {
274 						RTE_LOG(ERR, USER1,
275 				       "Some operations were not successful\n");
276 						goto end;
277 					}
278 
279 					struct rte_mbuf *m = op->m_dst;
280 
281 					m->pkt_len = op->produced;
282 					uint32_t remaining_data = op->produced;
283 					uint16_t data_to_append;
284 
285 					while (remaining_data > 0) {
286 						data_to_append =
287 						RTE_MIN(remaining_data,
288 							out_seg_sz);
289 						m->data_len = data_to_append;
290 						remaining_data -=
291 								data_to_append;
292 						m = m->next;
293 					}
294 				}
295 			}
296 			rte_mempool_put_bulk(mem->op_pool,
297 					     (void **)deq_ops, num_deq);
298 			allocated -= num_deq;
299 		}
300 	}
301 
302 	tsc_end = rte_rdtsc_precise();
303 	tsc_duration = tsc_end - tsc_start;
304 
305 	if (type == RTE_COMP_COMPRESS)
306 		ctx->comp_tsc_duration[test_data->level] =
307 				tsc_duration / num_iter;
308 	else
309 		ctx->decomp_tsc_duration[test_data->level] =
310 				tsc_duration / num_iter;
311 
312 end:
313 	rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
314 	rte_compressdev_private_xform_free(dev_id, priv_xform);
315 	rte_free(ops);
316 
317 	if (test_data->perf_comp_force_stop) {
318 		RTE_LOG(ERR, USER1,
319 		      "lcore: %d Perf. test has been aborted by user\n",
320 			mem->lcore_id);
321 		res = -1;
322 	}
323 	return res;
324 }
325 
326 int
327 cperf_throughput_test_runner(void *test_ctx)
328 {
329 	struct cperf_benchmark_ctx *ctx = test_ctx;
330 	struct comp_test_data *test_data = ctx->ver.options;
331 	uint32_t lcore = rte_lcore_id();
332 	static uint16_t display_once;
333 	int i, ret = EXIT_SUCCESS;
334 
335 	ctx->ver.mem.lcore_id = lcore;
336 
337 	uint16_t exp = 0;
338 	/*
339 	 * printing information about current compression thread
340 	 */
341 	if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
342 				1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
343 		printf("    lcore: %u,"
344 				" driver name: %s,"
345 				" device name: %s,"
346 				" device id: %u,"
347 				" socket id: %u,"
348 				" queue pair id: %u\n",
349 			lcore,
350 			ctx->ver.options->driver_name,
351 			rte_compressdev_name_get(ctx->ver.mem.dev_id),
352 			ctx->ver.mem.dev_id,
353 			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
354 			ctx->ver.mem.qp_id);
355 
356 	/*
357 	 * First the verification part is needed
358 	 */
359 	if (cperf_verify_test_runner(&ctx->ver)) {
360 		ret =  EXIT_FAILURE;
361 		goto end;
362 	}
363 
364 	/*
365 	 * Run the tests twice, discarding the first performance
366 	 * results, before the cache is warmed up
367 	 */
368 	for (i = 0; i < 2; i++) {
369 		if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
370 			ret = EXIT_FAILURE;
371 			goto end;
372 		}
373 	}
374 
375 	for (i = 0; i < 2; i++) {
376 		if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
377 			ret = EXIT_FAILURE;
378 			goto end;
379 		}
380 	}
381 
382 	ctx->comp_tsc_byte =
383 			(double)(ctx->comp_tsc_duration[test_data->level]) /
384 					test_data->input_data_sz;
385 
386 	ctx->decomp_tsc_byte =
387 			(double)(ctx->decomp_tsc_duration[test_data->level]) /
388 					test_data->input_data_sz;
389 
390 	ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
391 			1000000000;
392 
393 	ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
394 			1000000000;
395 
396 	exp = 0;
397 	if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0,
398 			__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
399 		printf("\n%12s%6s%12s%17s%15s%16s\n",
400 			"lcore id", "Level", "Comp size", "Comp ratio [%]",
401 			"Comp [Gbps]", "Decomp [Gbps]");
402 	}
403 
404 	printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
405 		ctx->ver.mem.lcore_id,
406 		test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
407 		ctx->comp_gbps,
408 		ctx->decomp_gbps);
409 
410 end:
411 	return ret;
412 }
413