xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 12e10bd068e243d13a2aa03c1a4a1709c0faca3f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #ifdef RTE_LIBRTE_PMD_FPGA_LTE_FEC
22 #include <fpga_lte_fec.h>
23 #endif
24 
25 #include "main.h"
26 #include "test_bbdev_vector.h"
27 
28 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
29 
30 #define MAX_QUEUES RTE_MAX_LCORE
31 #define TEST_REPETITIONS 1000
32 
33 #ifdef RTE_LIBRTE_PMD_FPGA_LTE_FEC
34 #define FPGA_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
35 #define FPGA_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
36 #define VF_UL_QUEUE_VALUE 4
37 #define VF_DL_QUEUE_VALUE 4
38 #define UL_BANDWIDTH 3
39 #define DL_BANDWIDTH 3
40 #define UL_LOAD_BALANCE 128
41 #define DL_LOAD_BALANCE 128
42 #define FLR_TIMEOUT 610
43 #endif
44 
45 #define OPS_CACHE_SIZE 256U
46 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
47 
48 #define SYNC_WAIT 0
49 #define SYNC_START 1
50 
51 #define INVALID_QUEUE_ID -1
52 
53 static struct test_bbdev_vector test_vector;
54 
55 /* Switch between PMD and Interrupt for throughput TC */
56 static bool intr_enabled;
57 
58 /* Represents tested active devices */
59 static struct active_device {
60 	const char *driver_name;
61 	uint8_t dev_id;
62 	uint16_t supported_ops;
63 	uint16_t queue_ids[MAX_QUEUES];
64 	uint16_t nb_queues;
65 	struct rte_mempool *ops_mempool;
66 	struct rte_mempool *in_mbuf_pool;
67 	struct rte_mempool *hard_out_mbuf_pool;
68 	struct rte_mempool *soft_out_mbuf_pool;
69 	struct rte_mempool *harq_in_mbuf_pool;
70 	struct rte_mempool *harq_out_mbuf_pool;
71 } active_devs[RTE_BBDEV_MAX_DEVS];
72 
73 static uint8_t nb_active_devs;
74 
75 /* Data buffers used by BBDEV ops */
76 struct test_buffers {
77 	struct rte_bbdev_op_data *inputs;
78 	struct rte_bbdev_op_data *hard_outputs;
79 	struct rte_bbdev_op_data *soft_outputs;
80 	struct rte_bbdev_op_data *harq_inputs;
81 	struct rte_bbdev_op_data *harq_outputs;
82 };
83 
84 /* Operation parameters specific for given test case */
85 struct test_op_params {
86 	struct rte_mempool *mp;
87 	struct rte_bbdev_dec_op *ref_dec_op;
88 	struct rte_bbdev_enc_op *ref_enc_op;
89 	uint16_t burst_sz;
90 	uint16_t num_to_process;
91 	uint16_t num_lcores;
92 	int vector_mask;
93 	rte_atomic16_t sync;
94 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
95 };
96 
97 /* Contains per lcore params */
98 struct thread_params {
99 	uint8_t dev_id;
100 	uint16_t queue_id;
101 	uint32_t lcore_id;
102 	uint64_t start_time;
103 	double ops_per_sec;
104 	double mbps;
105 	uint8_t iter_count;
106 	rte_atomic16_t nb_dequeued;
107 	rte_atomic16_t processing_status;
108 	rte_atomic16_t burst_sz;
109 	struct test_op_params *op_params;
110 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
111 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
112 };
113 
114 #ifdef RTE_BBDEV_OFFLOAD_COST
115 /* Stores time statistics */
116 struct test_time_stats {
117 	/* Stores software enqueue total working time */
118 	uint64_t enq_sw_total_time;
119 	/* Stores minimum value of software enqueue working time */
120 	uint64_t enq_sw_min_time;
121 	/* Stores maximum value of software enqueue working time */
122 	uint64_t enq_sw_max_time;
123 	/* Stores turbo enqueue total working time */
124 	uint64_t enq_acc_total_time;
125 	/* Stores minimum value of accelerator enqueue working time */
126 	uint64_t enq_acc_min_time;
127 	/* Stores maximum value of accelerator enqueue working time */
128 	uint64_t enq_acc_max_time;
129 	/* Stores dequeue total working time */
130 	uint64_t deq_total_time;
131 	/* Stores minimum value of dequeue working time */
132 	uint64_t deq_min_time;
133 	/* Stores maximum value of dequeue working time */
134 	uint64_t deq_max_time;
135 };
136 #endif
137 
138 typedef int (test_case_function)(struct active_device *ad,
139 		struct test_op_params *op_params);
140 
141 static inline void
142 mbuf_reset(struct rte_mbuf *m)
143 {
144 	m->pkt_len = 0;
145 
146 	do {
147 		m->data_len = 0;
148 		m = m->next;
149 	} while (m != NULL);
150 }
151 
152 /* Read flag value 0/1 from bitmap */
153 static inline bool
154 check_bit(uint32_t bitmap, uint32_t bitmask)
155 {
156 	return bitmap & bitmask;
157 }
158 
159 static inline void
160 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
161 {
162 	ad->supported_ops |= (1 << op_type);
163 }
164 
165 static inline bool
166 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
167 {
168 	return ad->supported_ops & (1 << op_type);
169 }
170 
171 static inline bool
172 flags_match(uint32_t flags_req, uint32_t flags_present)
173 {
174 	return (flags_req & flags_present) == flags_req;
175 }
176 
177 static void
178 clear_soft_out_cap(uint32_t *op_flags)
179 {
180 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
181 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
182 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
183 }
184 
185 static int
186 check_dev_cap(const struct rte_bbdev_info *dev_info)
187 {
188 	unsigned int i;
189 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
190 		nb_harq_inputs, nb_harq_outputs;
191 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
192 
193 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
194 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
195 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
196 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
197 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
198 
199 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
200 		if (op_cap->type != test_vector.op_type)
201 			continue;
202 
203 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
204 			const struct rte_bbdev_op_cap_turbo_dec *cap =
205 					&op_cap->cap.turbo_dec;
206 			/* Ignore lack of soft output capability, just skip
207 			 * checking if soft output is valid.
208 			 */
209 			if ((test_vector.turbo_dec.op_flags &
210 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
211 					!(cap->capability_flags &
212 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
213 				printf(
214 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
215 					dev_info->dev_name);
216 				clear_soft_out_cap(
217 					&test_vector.turbo_dec.op_flags);
218 			}
219 
220 			if (!flags_match(test_vector.turbo_dec.op_flags,
221 					cap->capability_flags))
222 				return TEST_FAILED;
223 			if (nb_inputs > cap->num_buffers_src) {
224 				printf("Too many inputs defined: %u, max: %u\n",
225 					nb_inputs, cap->num_buffers_src);
226 				return TEST_FAILED;
227 			}
228 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
229 					(test_vector.turbo_dec.op_flags &
230 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
231 				printf(
232 					"Too many soft outputs defined: %u, max: %u\n",
233 						nb_soft_outputs,
234 						cap->num_buffers_soft_out);
235 				return TEST_FAILED;
236 			}
237 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
238 				printf(
239 					"Too many hard outputs defined: %u, max: %u\n",
240 						nb_hard_outputs,
241 						cap->num_buffers_hard_out);
242 				return TEST_FAILED;
243 			}
244 			if (intr_enabled && !(cap->capability_flags &
245 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
246 				printf(
247 					"Dequeue interrupts are not supported!\n");
248 				return TEST_FAILED;
249 			}
250 
251 			return TEST_SUCCESS;
252 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
253 			const struct rte_bbdev_op_cap_turbo_enc *cap =
254 					&op_cap->cap.turbo_enc;
255 
256 			if (!flags_match(test_vector.turbo_enc.op_flags,
257 					cap->capability_flags))
258 				return TEST_FAILED;
259 			if (nb_inputs > cap->num_buffers_src) {
260 				printf("Too many inputs defined: %u, max: %u\n",
261 					nb_inputs, cap->num_buffers_src);
262 				return TEST_FAILED;
263 			}
264 			if (nb_hard_outputs > cap->num_buffers_dst) {
265 				printf(
266 					"Too many hard outputs defined: %u, max: %u\n",
267 					nb_hard_outputs, cap->num_buffers_dst);
268 				return TEST_FAILED;
269 			}
270 			if (intr_enabled && !(cap->capability_flags &
271 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
272 				printf(
273 					"Dequeue interrupts are not supported!\n");
274 				return TEST_FAILED;
275 			}
276 
277 			return TEST_SUCCESS;
278 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
279 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
280 					&op_cap->cap.ldpc_enc;
281 
282 			if (!flags_match(test_vector.ldpc_enc.op_flags,
283 					cap->capability_flags)){
284 				printf("Flag Mismatch\n");
285 				return TEST_FAILED;
286 			}
287 			if (nb_inputs > cap->num_buffers_src) {
288 				printf("Too many inputs defined: %u, max: %u\n",
289 					nb_inputs, cap->num_buffers_src);
290 				return TEST_FAILED;
291 			}
292 			if (nb_hard_outputs > cap->num_buffers_dst) {
293 				printf(
294 					"Too many hard outputs defined: %u, max: %u\n",
295 					nb_hard_outputs, cap->num_buffers_dst);
296 				return TEST_FAILED;
297 			}
298 			if (intr_enabled && !(cap->capability_flags &
299 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
300 				printf(
301 					"Dequeue interrupts are not supported!\n");
302 				return TEST_FAILED;
303 			}
304 
305 			return TEST_SUCCESS;
306 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
307 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
308 					&op_cap->cap.ldpc_dec;
309 
310 			if (!flags_match(test_vector.ldpc_dec.op_flags,
311 					cap->capability_flags)){
312 				printf("Flag Mismatch\n");
313 				return TEST_FAILED;
314 			}
315 			if (nb_inputs > cap->num_buffers_src) {
316 				printf("Too many inputs defined: %u, max: %u\n",
317 					nb_inputs, cap->num_buffers_src);
318 				return TEST_FAILED;
319 			}
320 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
321 				printf(
322 					"Too many hard outputs defined: %u, max: %u\n",
323 					nb_hard_outputs,
324 					cap->num_buffers_hard_out);
325 				return TEST_FAILED;
326 			}
327 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
328 				printf(
329 					"Too many HARQ inputs defined: %u, max: %u\n",
330 					nb_hard_outputs,
331 					cap->num_buffers_hard_out);
332 				return TEST_FAILED;
333 			}
334 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
335 				printf(
336 					"Too many HARQ outputs defined: %u, max: %u\n",
337 					nb_hard_outputs,
338 					cap->num_buffers_hard_out);
339 				return TEST_FAILED;
340 			}
341 			if (intr_enabled && !(cap->capability_flags &
342 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
343 				printf(
344 					"Dequeue interrupts are not supported!\n");
345 				return TEST_FAILED;
346 			}
347 
348 			return TEST_SUCCESS;
349 		}
350 	}
351 
352 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
353 		return TEST_SUCCESS; /* Special case for NULL device */
354 
355 	return TEST_FAILED;
356 }
357 
358 /* calculates optimal mempool size not smaller than the val */
359 static unsigned int
360 optimal_mempool_size(unsigned int val)
361 {
362 	return rte_align32pow2(val + 1) - 1;
363 }
364 
365 /* allocates mbuf mempool for inputs and outputs */
366 static struct rte_mempool *
367 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
368 		int socket_id, unsigned int mbuf_pool_size,
369 		const char *op_type_str)
370 {
371 	unsigned int i;
372 	uint32_t max_seg_sz = 0;
373 	char pool_name[RTE_MEMPOOL_NAMESIZE];
374 
375 	/* find max input segment size */
376 	for (i = 0; i < entries->nb_segments; ++i)
377 		if (entries->segments[i].length > max_seg_sz)
378 			max_seg_sz = entries->segments[i].length;
379 
380 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
381 			dev_id);
382 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
383 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
384 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
385 }
386 
387 static int
388 create_mempools(struct active_device *ad, int socket_id,
389 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
390 {
391 	struct rte_mempool *mp;
392 	unsigned int ops_pool_size, mbuf_pool_size = 0;
393 	char pool_name[RTE_MEMPOOL_NAMESIZE];
394 	const char *op_type_str;
395 	enum rte_bbdev_op_type op_type = org_op_type;
396 
397 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
398 	struct op_data_entries *hard_out =
399 			&test_vector.entries[DATA_HARD_OUTPUT];
400 	struct op_data_entries *soft_out =
401 			&test_vector.entries[DATA_SOFT_OUTPUT];
402 	struct op_data_entries *harq_in =
403 			&test_vector.entries[DATA_HARQ_INPUT];
404 	struct op_data_entries *harq_out =
405 			&test_vector.entries[DATA_HARQ_OUTPUT];
406 
407 	/* allocate ops mempool */
408 	ops_pool_size = optimal_mempool_size(RTE_MAX(
409 			/* Ops used plus 1 reference op */
410 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
411 			/* Minimal cache size plus 1 reference op */
412 			(unsigned int)(1.5 * rte_lcore_count() *
413 					OPS_CACHE_SIZE + 1)),
414 			OPS_POOL_SIZE_MIN));
415 
416 	if (org_op_type == RTE_BBDEV_OP_NONE)
417 		op_type = RTE_BBDEV_OP_TURBO_ENC;
418 
419 	op_type_str = rte_bbdev_op_type_str(op_type);
420 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
421 
422 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
423 			ad->dev_id);
424 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
425 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
426 	TEST_ASSERT_NOT_NULL(mp,
427 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
428 			ops_pool_size,
429 			ad->dev_id,
430 			socket_id);
431 	ad->ops_mempool = mp;
432 
433 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
434 	if (org_op_type == RTE_BBDEV_OP_NONE)
435 		return TEST_SUCCESS;
436 
437 	/* Inputs */
438 	mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
439 	mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
440 	TEST_ASSERT_NOT_NULL(mp,
441 			"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
442 			mbuf_pool_size,
443 			ad->dev_id,
444 			socket_id);
445 	ad->in_mbuf_pool = mp;
446 
447 	/* Hard outputs */
448 	mbuf_pool_size = optimal_mempool_size(ops_pool_size *
449 			hard_out->nb_segments);
450 	mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
451 			"hard_out");
452 	TEST_ASSERT_NOT_NULL(mp,
453 			"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
454 			mbuf_pool_size,
455 			ad->dev_id,
456 			socket_id);
457 	ad->hard_out_mbuf_pool = mp;
458 
459 
460 	/* Soft outputs */
461 	if (soft_out->nb_segments > 0) {
462 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
463 				soft_out->nb_segments);
464 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
465 				mbuf_pool_size,
466 				"soft_out");
467 		TEST_ASSERT_NOT_NULL(mp,
468 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
469 				mbuf_pool_size,
470 				ad->dev_id,
471 				socket_id);
472 		ad->soft_out_mbuf_pool = mp;
473 	}
474 
475 	/* HARQ inputs */
476 	if (harq_in->nb_segments > 0) {
477 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
478 				harq_in->nb_segments);
479 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
480 				mbuf_pool_size,
481 				"harq_in");
482 		TEST_ASSERT_NOT_NULL(mp,
483 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
484 				mbuf_pool_size,
485 				ad->dev_id,
486 				socket_id);
487 		ad->harq_in_mbuf_pool = mp;
488 	}
489 
490 	/* HARQ outputs */
491 	if (harq_out->nb_segments > 0) {
492 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
493 				harq_out->nb_segments);
494 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
495 				mbuf_pool_size,
496 				"harq_out");
497 		TEST_ASSERT_NOT_NULL(mp,
498 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
499 				mbuf_pool_size,
500 				ad->dev_id,
501 				socket_id);
502 		ad->harq_out_mbuf_pool = mp;
503 	}
504 
505 	return TEST_SUCCESS;
506 }
507 
508 static int
509 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
510 		struct test_bbdev_vector *vector)
511 {
512 	int ret;
513 	unsigned int queue_id;
514 	struct rte_bbdev_queue_conf qconf;
515 	struct active_device *ad = &active_devs[nb_active_devs];
516 	unsigned int nb_queues;
517 	enum rte_bbdev_op_type op_type = vector->op_type;
518 
519 /* Configure fpga lte fec with PF & VF values
520  * if '-i' flag is set and using fpga device
521  */
522 #ifndef RTE_BUILD_SHARED_LIB
523 #ifdef RTE_LIBRTE_PMD_FPGA_LTE_FEC
524 	if ((get_init_device() == true) &&
525 		(!strcmp(info->drv.driver_name, FPGA_PF_DRIVER_NAME))) {
526 		struct fpga_lte_fec_conf conf;
527 		unsigned int i;
528 
529 		printf("Configure FPGA FEC Driver %s with default values\n",
530 				info->drv.driver_name);
531 
532 		/* clear default configuration before initialization */
533 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
534 
535 		/* Set PF mode :
536 		 * true if PF is used for data plane
537 		 * false for VFs
538 		 */
539 		conf.pf_mode_en = true;
540 
541 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
542 			/* Number of UL queues per VF (fpga supports 8 VFs) */
543 			conf.vf_ul_queues_number[i] = VF_UL_QUEUE_VALUE;
544 			/* Number of DL queues per VF (fpga supports 8 VFs) */
545 			conf.vf_dl_queues_number[i] = VF_DL_QUEUE_VALUE;
546 		}
547 
548 		/* UL bandwidth. Needed for schedule algorithm */
549 		conf.ul_bandwidth = UL_BANDWIDTH;
550 		/* DL bandwidth */
551 		conf.dl_bandwidth = DL_BANDWIDTH;
552 
553 		/* UL & DL load Balance Factor to 64 */
554 		conf.ul_load_balance = UL_LOAD_BALANCE;
555 		conf.dl_load_balance = DL_LOAD_BALANCE;
556 
557 		/**< FLR timeout value */
558 		conf.flr_time_out = FLR_TIMEOUT;
559 
560 		/* setup FPGA PF with configuration information */
561 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
562 		TEST_ASSERT_SUCCESS(ret,
563 				"Failed to configure 4G FPGA PF for bbdev %s",
564 				info->dev_name);
565 	}
566 #endif
567 #endif
568 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
569 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
570 
571 	/* setup device */
572 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
573 	if (ret < 0) {
574 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
575 				dev_id, nb_queues, info->socket_id, ret);
576 		return TEST_FAILED;
577 	}
578 
579 	/* configure interrupts if needed */
580 	if (intr_enabled) {
581 		ret = rte_bbdev_intr_enable(dev_id);
582 		if (ret < 0) {
583 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
584 					ret);
585 			return TEST_FAILED;
586 		}
587 	}
588 
589 	/* setup device queues */
590 	qconf.socket = info->socket_id;
591 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
592 	qconf.priority = 0;
593 	qconf.deferred_start = 0;
594 	qconf.op_type = op_type;
595 
596 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
597 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
598 		if (ret != 0) {
599 			printf(
600 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
601 					queue_id, qconf.priority, dev_id);
602 			qconf.priority++;
603 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
604 					&qconf);
605 		}
606 		if (ret != 0) {
607 			printf("All queues on dev %u allocated: %u\n",
608 					dev_id, queue_id);
609 			break;
610 		}
611 		ad->queue_ids[queue_id] = queue_id;
612 	}
613 	TEST_ASSERT(queue_id != 0,
614 			"ERROR Failed to configure any queues on dev %u",
615 			dev_id);
616 	ad->nb_queues = queue_id;
617 
618 	set_avail_op(ad, op_type);
619 
620 	return TEST_SUCCESS;
621 }
622 
623 static int
624 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
625 		struct test_bbdev_vector *vector)
626 {
627 	int ret;
628 
629 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
630 	active_devs[nb_active_devs].dev_id = dev_id;
631 
632 	ret = add_bbdev_dev(dev_id, info, vector);
633 	if (ret == TEST_SUCCESS)
634 		++nb_active_devs;
635 	return ret;
636 }
637 
638 static uint8_t
639 populate_active_devices(void)
640 {
641 	int ret;
642 	uint8_t dev_id;
643 	uint8_t nb_devs_added = 0;
644 	struct rte_bbdev_info info;
645 
646 	RTE_BBDEV_FOREACH(dev_id) {
647 		rte_bbdev_info_get(dev_id, &info);
648 
649 		if (check_dev_cap(&info)) {
650 			printf(
651 				"Device %d (%s) does not support specified capabilities\n",
652 					dev_id, info.dev_name);
653 			continue;
654 		}
655 
656 		ret = add_active_device(dev_id, &info, &test_vector);
657 		if (ret != 0) {
658 			printf("Adding active bbdev %s skipped\n",
659 					info.dev_name);
660 			continue;
661 		}
662 		nb_devs_added++;
663 	}
664 
665 	return nb_devs_added;
666 }
667 
668 static int
669 read_test_vector(void)
670 {
671 	int ret;
672 
673 	memset(&test_vector, 0, sizeof(test_vector));
674 	printf("Test vector file = %s\n", get_vector_filename());
675 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
676 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
677 			get_vector_filename());
678 
679 	return TEST_SUCCESS;
680 }
681 
682 static int
683 testsuite_setup(void)
684 {
685 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
686 
687 	if (populate_active_devices() == 0) {
688 		printf("No suitable devices found!\n");
689 		return TEST_SKIPPED;
690 	}
691 
692 	return TEST_SUCCESS;
693 }
694 
695 static int
696 interrupt_testsuite_setup(void)
697 {
698 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
699 
700 	/* Enable interrupts */
701 	intr_enabled = true;
702 
703 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
704 	if (populate_active_devices() == 0 ||
705 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
706 		intr_enabled = false;
707 		printf("No suitable devices found!\n");
708 		return TEST_SKIPPED;
709 	}
710 
711 	return TEST_SUCCESS;
712 }
713 
714 static void
715 testsuite_teardown(void)
716 {
717 	uint8_t dev_id;
718 
719 	/* Unconfigure devices */
720 	RTE_BBDEV_FOREACH(dev_id)
721 		rte_bbdev_close(dev_id);
722 
723 	/* Clear active devices structs. */
724 	memset(active_devs, 0, sizeof(active_devs));
725 	nb_active_devs = 0;
726 }
727 
728 static int
729 ut_setup(void)
730 {
731 	uint8_t i, dev_id;
732 
733 	for (i = 0; i < nb_active_devs; i++) {
734 		dev_id = active_devs[i].dev_id;
735 		/* reset bbdev stats */
736 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
737 				"Failed to reset stats of bbdev %u", dev_id);
738 		/* start the device */
739 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
740 				"Failed to start bbdev %u", dev_id);
741 	}
742 
743 	return TEST_SUCCESS;
744 }
745 
746 static void
747 ut_teardown(void)
748 {
749 	uint8_t i, dev_id;
750 	struct rte_bbdev_stats stats;
751 
752 	for (i = 0; i < nb_active_devs; i++) {
753 		dev_id = active_devs[i].dev_id;
754 		/* read stats and print */
755 		rte_bbdev_stats_get(dev_id, &stats);
756 		/* Stop the device */
757 		rte_bbdev_stop(dev_id);
758 	}
759 }
760 
761 static int
762 init_op_data_objs(struct rte_bbdev_op_data *bufs,
763 		struct op_data_entries *ref_entries,
764 		struct rte_mempool *mbuf_pool, const uint16_t n,
765 		enum op_data_type op_type, uint16_t min_alignment)
766 {
767 	int ret;
768 	unsigned int i, j;
769 
770 	for (i = 0; i < n; ++i) {
771 		char *data;
772 		struct op_data_buf *seg = &ref_entries->segments[0];
773 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
774 		TEST_ASSERT_NOT_NULL(m_head,
775 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
776 				op_type, n * ref_entries->nb_segments,
777 				mbuf_pool->size);
778 
779 		TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
780 				(uint32_t)UINT16_MAX),
781 				"Given data is bigger than allowed mbuf segment size");
782 
783 		bufs[i].data = m_head;
784 		bufs[i].offset = 0;
785 		bufs[i].length = 0;
786 
787 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
788 			data = rte_pktmbuf_append(m_head, seg->length);
789 			TEST_ASSERT_NOT_NULL(data,
790 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
791 					seg->length, op_type);
792 
793 			TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
794 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
795 					data, min_alignment);
796 			rte_memcpy(data, seg->addr, seg->length);
797 			bufs[i].length += seg->length;
798 
799 			for (j = 1; j < ref_entries->nb_segments; ++j) {
800 				struct rte_mbuf *m_tail =
801 						rte_pktmbuf_alloc(mbuf_pool);
802 				TEST_ASSERT_NOT_NULL(m_tail,
803 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
804 						op_type,
805 						n * ref_entries->nb_segments,
806 						mbuf_pool->size);
807 				seg += 1;
808 
809 				data = rte_pktmbuf_append(m_tail, seg->length);
810 				TEST_ASSERT_NOT_NULL(data,
811 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
812 						seg->length, op_type);
813 
814 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
815 						min_alignment),
816 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
817 						data, min_alignment);
818 				rte_memcpy(data, seg->addr, seg->length);
819 				bufs[i].length += seg->length;
820 
821 				ret = rte_pktmbuf_chain(m_head, m_tail);
822 				TEST_ASSERT_SUCCESS(ret,
823 						"Couldn't chain mbufs from %d data type mbuf pool",
824 						op_type);
825 			}
826 		} else {
827 
828 			/* allocate chained-mbuf for output buffer */
829 			for (j = 1; j < ref_entries->nb_segments; ++j) {
830 				struct rte_mbuf *m_tail =
831 						rte_pktmbuf_alloc(mbuf_pool);
832 				TEST_ASSERT_NOT_NULL(m_tail,
833 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
834 						op_type,
835 						n * ref_entries->nb_segments,
836 						mbuf_pool->size);
837 
838 				ret = rte_pktmbuf_chain(m_head, m_tail);
839 				TEST_ASSERT_SUCCESS(ret,
840 						"Couldn't chain mbufs from %d data type mbuf pool",
841 						op_type);
842 			}
843 		}
844 	}
845 
846 	return 0;
847 }
848 
849 static int
850 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
851 		const int socket)
852 {
853 	int i;
854 
855 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
856 	if (*buffers == NULL) {
857 		printf("WARNING: Failed to allocate op_data on socket %d\n",
858 				socket);
859 		/* try to allocate memory on other detected sockets */
860 		for (i = 0; i < socket; i++) {
861 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
862 			if (*buffers != NULL)
863 				break;
864 		}
865 	}
866 
867 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
868 }
869 
870 static void
871 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
872 		const uint16_t n, const int8_t max_llr_modulus)
873 {
874 	uint16_t i, byte_idx;
875 
876 	for (i = 0; i < n; ++i) {
877 		struct rte_mbuf *m = input_ops[i].data;
878 		while (m != NULL) {
879 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
880 					input_ops[i].offset);
881 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
882 					++byte_idx)
883 				llr[byte_idx] = round((double)max_llr_modulus *
884 						llr[byte_idx] / INT8_MAX);
885 
886 			m = m->next;
887 		}
888 	}
889 }
890 
891 static void
892 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
893 		const uint16_t n, const int8_t llr_size,
894 		const int8_t llr_decimals)
895 {
896 	if (input_ops == NULL)
897 		return;
898 
899 	uint16_t i, byte_idx;
900 
901 	int16_t llr_max, llr_min, llr_tmp;
902 	llr_max = (1 << (llr_size - 1)) - 1;
903 	llr_min = -llr_max;
904 	for (i = 0; i < n; ++i) {
905 		struct rte_mbuf *m = input_ops[i].data;
906 		while (m != NULL) {
907 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
908 					input_ops[i].offset);
909 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
910 					++byte_idx) {
911 
912 				llr_tmp = llr[byte_idx];
913 				if (llr_decimals == 2)
914 					llr_tmp *= 2;
915 				else if (llr_decimals == 0)
916 					llr_tmp /= 2;
917 				llr_tmp = RTE_MIN(llr_max,
918 						RTE_MAX(llr_min, llr_tmp));
919 				llr[byte_idx] = (int8_t) llr_tmp;
920 			}
921 
922 			m = m->next;
923 		}
924 	}
925 }
926 
927 
928 
929 static int
930 fill_queue_buffers(struct test_op_params *op_params,
931 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
932 		struct rte_mempool *soft_out_mp,
933 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
934 		uint16_t queue_id,
935 		const struct rte_bbdev_op_cap *capabilities,
936 		uint16_t min_alignment, const int socket_id)
937 {
938 	int ret;
939 	enum op_data_type type;
940 	const uint16_t n = op_params->num_to_process;
941 
942 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
943 		in_mp,
944 		soft_out_mp,
945 		hard_out_mp,
946 		harq_in_mp,
947 		harq_out_mp,
948 	};
949 
950 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
951 		&op_params->q_bufs[socket_id][queue_id].inputs,
952 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
953 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
954 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
955 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
956 	};
957 
958 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
959 		struct op_data_entries *ref_entries =
960 				&test_vector.entries[type];
961 		if (ref_entries->nb_segments == 0)
962 			continue;
963 
964 		ret = allocate_buffers_on_socket(queue_ops[type],
965 				n * sizeof(struct rte_bbdev_op_data),
966 				socket_id);
967 		TEST_ASSERT_SUCCESS(ret,
968 				"Couldn't allocate memory for rte_bbdev_op_data structs");
969 
970 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
971 				mbuf_pools[type], n, type, min_alignment);
972 		TEST_ASSERT_SUCCESS(ret,
973 				"Couldn't init rte_bbdev_op_data structs");
974 	}
975 
976 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
977 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
978 			capabilities->cap.turbo_dec.max_llr_modulus);
979 
980 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
981 		ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
982 			capabilities->cap.ldpc_dec.llr_size,
983 			capabilities->cap.ldpc_dec.llr_decimals);
984 		ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
985 				capabilities->cap.ldpc_dec.llr_size,
986 				capabilities->cap.ldpc_dec.llr_decimals);
987 	}
988 
989 	return 0;
990 }
991 
992 static void
993 free_buffers(struct active_device *ad, struct test_op_params *op_params)
994 {
995 	unsigned int i, j;
996 
997 	rte_mempool_free(ad->ops_mempool);
998 	rte_mempool_free(ad->in_mbuf_pool);
999 	rte_mempool_free(ad->hard_out_mbuf_pool);
1000 	rte_mempool_free(ad->soft_out_mbuf_pool);
1001 	rte_mempool_free(ad->harq_in_mbuf_pool);
1002 	rte_mempool_free(ad->harq_out_mbuf_pool);
1003 
1004 	for (i = 0; i < rte_lcore_count(); ++i) {
1005 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1006 			rte_free(op_params->q_bufs[j][i].inputs);
1007 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1008 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1009 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1010 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1011 		}
1012 	}
1013 }
1014 
1015 static void
1016 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1017 		unsigned int start_idx,
1018 		struct rte_bbdev_op_data *inputs,
1019 		struct rte_bbdev_op_data *hard_outputs,
1020 		struct rte_bbdev_op_data *soft_outputs,
1021 		struct rte_bbdev_dec_op *ref_op)
1022 {
1023 	unsigned int i;
1024 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1025 
1026 	for (i = 0; i < n; ++i) {
1027 		if (turbo_dec->code_block_mode == 0) {
1028 			ops[i]->turbo_dec.tb_params.ea =
1029 					turbo_dec->tb_params.ea;
1030 			ops[i]->turbo_dec.tb_params.eb =
1031 					turbo_dec->tb_params.eb;
1032 			ops[i]->turbo_dec.tb_params.k_pos =
1033 					turbo_dec->tb_params.k_pos;
1034 			ops[i]->turbo_dec.tb_params.k_neg =
1035 					turbo_dec->tb_params.k_neg;
1036 			ops[i]->turbo_dec.tb_params.c =
1037 					turbo_dec->tb_params.c;
1038 			ops[i]->turbo_dec.tb_params.c_neg =
1039 					turbo_dec->tb_params.c_neg;
1040 			ops[i]->turbo_dec.tb_params.cab =
1041 					turbo_dec->tb_params.cab;
1042 			ops[i]->turbo_dec.tb_params.r =
1043 					turbo_dec->tb_params.r;
1044 		} else {
1045 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1046 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1047 		}
1048 
1049 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1050 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1051 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1052 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1053 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1054 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1055 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1056 
1057 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1058 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1059 		if (soft_outputs != NULL)
1060 			ops[i]->turbo_dec.soft_output =
1061 				soft_outputs[start_idx + i];
1062 	}
1063 }
1064 
1065 static void
1066 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1067 		unsigned int start_idx,
1068 		struct rte_bbdev_op_data *inputs,
1069 		struct rte_bbdev_op_data *outputs,
1070 		struct rte_bbdev_enc_op *ref_op)
1071 {
1072 	unsigned int i;
1073 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1074 	for (i = 0; i < n; ++i) {
1075 		if (turbo_enc->code_block_mode == 0) {
1076 			ops[i]->turbo_enc.tb_params.ea =
1077 					turbo_enc->tb_params.ea;
1078 			ops[i]->turbo_enc.tb_params.eb =
1079 					turbo_enc->tb_params.eb;
1080 			ops[i]->turbo_enc.tb_params.k_pos =
1081 					turbo_enc->tb_params.k_pos;
1082 			ops[i]->turbo_enc.tb_params.k_neg =
1083 					turbo_enc->tb_params.k_neg;
1084 			ops[i]->turbo_enc.tb_params.c =
1085 					turbo_enc->tb_params.c;
1086 			ops[i]->turbo_enc.tb_params.c_neg =
1087 					turbo_enc->tb_params.c_neg;
1088 			ops[i]->turbo_enc.tb_params.cab =
1089 					turbo_enc->tb_params.cab;
1090 			ops[i]->turbo_enc.tb_params.ncb_pos =
1091 					turbo_enc->tb_params.ncb_pos;
1092 			ops[i]->turbo_enc.tb_params.ncb_neg =
1093 					turbo_enc->tb_params.ncb_neg;
1094 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1095 		} else {
1096 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1097 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1098 			ops[i]->turbo_enc.cb_params.ncb =
1099 					turbo_enc->cb_params.ncb;
1100 		}
1101 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1102 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1103 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1104 
1105 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1106 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1107 	}
1108 }
1109 
1110 static void
1111 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1112 		unsigned int start_idx,
1113 		struct rte_bbdev_op_data *inputs,
1114 		struct rte_bbdev_op_data *hard_outputs,
1115 		struct rte_bbdev_op_data *soft_outputs,
1116 		struct rte_bbdev_op_data *harq_inputs,
1117 		struct rte_bbdev_op_data *harq_outputs,
1118 		struct rte_bbdev_dec_op *ref_op)
1119 {
1120 	unsigned int i;
1121 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1122 
1123 	for (i = 0; i < n; ++i) {
1124 		if (ldpc_dec->code_block_mode == 0) {
1125 			ops[i]->ldpc_dec.tb_params.ea =
1126 					ldpc_dec->tb_params.ea;
1127 			ops[i]->ldpc_dec.tb_params.eb =
1128 					ldpc_dec->tb_params.eb;
1129 			ops[i]->ldpc_dec.tb_params.c =
1130 					ldpc_dec->tb_params.c;
1131 			ops[i]->ldpc_dec.tb_params.cab =
1132 					ldpc_dec->tb_params.cab;
1133 			ops[i]->ldpc_dec.tb_params.r =
1134 					ldpc_dec->tb_params.r;
1135 		} else {
1136 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1137 		}
1138 
1139 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1140 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1141 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1142 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1143 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1144 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1145 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1146 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1147 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1148 
1149 		ops[i]->ldpc_dec.hard_output = hard_outputs[start_idx + i];
1150 		ops[i]->ldpc_dec.input = inputs[start_idx + i];
1151 		if (soft_outputs != NULL)
1152 			ops[i]->ldpc_dec.soft_output =
1153 				soft_outputs[start_idx + i];
1154 		if (harq_inputs != NULL)
1155 			ops[i]->ldpc_dec.harq_combined_input =
1156 					harq_inputs[start_idx + i];
1157 		if (harq_outputs != NULL)
1158 			ops[i]->ldpc_dec.harq_combined_output =
1159 				harq_outputs[start_idx + i];
1160 	}
1161 }
1162 
1163 
1164 static void
1165 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1166 		unsigned int start_idx,
1167 		struct rte_bbdev_op_data *inputs,
1168 		struct rte_bbdev_op_data *outputs,
1169 		struct rte_bbdev_enc_op *ref_op)
1170 {
1171 	unsigned int i;
1172 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1173 	for (i = 0; i < n; ++i) {
1174 		if (ldpc_enc->code_block_mode == 0) {
1175 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1176 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1177 			ops[i]->ldpc_enc.tb_params.cab =
1178 					ldpc_enc->tb_params.cab;
1179 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1180 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1181 		} else {
1182 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1183 		}
1184 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1185 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1186 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1187 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1188 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1189 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1190 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1191 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1192 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1193 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1194 	}
1195 }
1196 
1197 static int
1198 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1199 		unsigned int order_idx, const int expected_status)
1200 {
1201 	TEST_ASSERT(op->status == expected_status,
1202 			"op_status (%d) != expected_status (%d)",
1203 			op->status, expected_status);
1204 
1205 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1206 			"Ordering error, expected %p, got %p",
1207 			(void *)(uintptr_t)order_idx, op->opaque_data);
1208 
1209 	return TEST_SUCCESS;
1210 }
1211 
1212 static int
1213 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1214 		unsigned int order_idx, const int expected_status)
1215 {
1216 	TEST_ASSERT(op->status == expected_status,
1217 			"op_status (%d) != expected_status (%d)",
1218 			op->status, expected_status);
1219 
1220 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1221 			"Ordering error, expected %p, got %p",
1222 			(void *)(uintptr_t)order_idx, op->opaque_data);
1223 
1224 	return TEST_SUCCESS;
1225 }
1226 
1227 static inline int
1228 validate_op_chain(struct rte_bbdev_op_data *op,
1229 		struct op_data_entries *orig_op)
1230 {
1231 	uint8_t i;
1232 	struct rte_mbuf *m = op->data;
1233 	uint8_t nb_dst_segments = orig_op->nb_segments;
1234 	uint32_t total_data_size = 0;
1235 
1236 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1237 			"Number of segments differ in original (%u) and filled (%u) op",
1238 			nb_dst_segments, m->nb_segs);
1239 
1240 	/* Validate each mbuf segment length */
1241 	for (i = 0; i < nb_dst_segments; ++i) {
1242 		/* Apply offset to the first mbuf segment */
1243 		uint16_t offset = (i == 0) ? op->offset : 0;
1244 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1245 		total_data_size += orig_op->segments[i].length;
1246 
1247 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1248 				"Length of segment differ in original (%u) and filled (%u) op",
1249 				orig_op->segments[i].length, data_len);
1250 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1251 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1252 				data_len,
1253 				"Output buffers (CB=%u) are not equal", i);
1254 		m = m->next;
1255 	}
1256 
1257 	/* Validate total mbuf pkt length */
1258 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1259 	TEST_ASSERT(total_data_size == pkt_len,
1260 			"Length of data differ in original (%u) and filled (%u) op",
1261 			total_data_size, pkt_len);
1262 
1263 	return TEST_SUCCESS;
1264 }
1265 
1266 static int
1267 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1268 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1269 {
1270 	unsigned int i;
1271 	int ret;
1272 	struct op_data_entries *hard_data_orig =
1273 			&test_vector.entries[DATA_HARD_OUTPUT];
1274 	struct op_data_entries *soft_data_orig =
1275 			&test_vector.entries[DATA_SOFT_OUTPUT];
1276 	struct rte_bbdev_op_turbo_dec *ops_td;
1277 	struct rte_bbdev_op_data *hard_output;
1278 	struct rte_bbdev_op_data *soft_output;
1279 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1280 
1281 	for (i = 0; i < n; ++i) {
1282 		ops_td = &ops[i]->turbo_dec;
1283 		hard_output = &ops_td->hard_output;
1284 		soft_output = &ops_td->soft_output;
1285 
1286 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1287 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1288 					"Returned iter_count (%d) > expected iter_count (%d)",
1289 					ops_td->iter_count, ref_td->iter_count);
1290 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1291 		TEST_ASSERT_SUCCESS(ret,
1292 				"Checking status and ordering for decoder failed");
1293 
1294 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1295 				hard_data_orig),
1296 				"Hard output buffers (CB=%u) are not equal",
1297 				i);
1298 
1299 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1300 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1301 					soft_data_orig),
1302 					"Soft output buffers (CB=%u) are not equal",
1303 					i);
1304 	}
1305 
1306 	return TEST_SUCCESS;
1307 }
1308 
1309 
1310 static int
1311 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1312 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1313 {
1314 	unsigned int i;
1315 	int ret;
1316 	struct op_data_entries *hard_data_orig =
1317 			&test_vector.entries[DATA_HARD_OUTPUT];
1318 	struct op_data_entries *soft_data_orig =
1319 			&test_vector.entries[DATA_SOFT_OUTPUT];
1320 	struct op_data_entries *harq_data_orig =
1321 				&test_vector.entries[DATA_HARQ_OUTPUT];
1322 	struct rte_bbdev_op_ldpc_dec *ops_td;
1323 	struct rte_bbdev_op_data *hard_output;
1324 	struct rte_bbdev_op_data *harq_output;
1325 	struct rte_bbdev_op_data *soft_output;
1326 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1327 
1328 	for (i = 0; i < n; ++i) {
1329 		ops_td = &ops[i]->ldpc_dec;
1330 		hard_output = &ops_td->hard_output;
1331 		harq_output = &ops_td->harq_combined_output;
1332 		soft_output = &ops_td->soft_output;
1333 
1334 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1335 		TEST_ASSERT_SUCCESS(ret,
1336 				"Checking status and ordering for decoder failed");
1337 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1338 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1339 					"Returned iter_count (%d) > expected iter_count (%d)",
1340 					ops_td->iter_count, ref_td->iter_count);
1341 		/* We can ignore data when the decoding failed to converge */
1342 		if ((ops[i]->status &  (1 << RTE_BBDEV_SYNDROME_ERROR)) == 0)
1343 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1344 					hard_data_orig),
1345 					"Hard output buffers (CB=%u) are not equal",
1346 					i);
1347 
1348 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1349 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1350 					soft_data_orig),
1351 					"Soft output buffers (CB=%u) are not equal",
1352 					i);
1353 		if (ref_op->ldpc_dec.op_flags &
1354 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1355 			ldpc_input_llr_scaling(harq_output, 1, 8, 0);
1356 			TEST_ASSERT_SUCCESS(validate_op_chain(harq_output,
1357 					harq_data_orig),
1358 					"HARQ output buffers (CB=%u) are not equal",
1359 					i);
1360 		}
1361 	}
1362 
1363 	return TEST_SUCCESS;
1364 }
1365 
1366 
1367 static int
1368 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1369 		struct rte_bbdev_enc_op *ref_op)
1370 {
1371 	unsigned int i;
1372 	int ret;
1373 	struct op_data_entries *hard_data_orig =
1374 			&test_vector.entries[DATA_HARD_OUTPUT];
1375 
1376 	for (i = 0; i < n; ++i) {
1377 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1378 		TEST_ASSERT_SUCCESS(ret,
1379 				"Checking status and ordering for encoder failed");
1380 		TEST_ASSERT_SUCCESS(validate_op_chain(
1381 				&ops[i]->turbo_enc.output,
1382 				hard_data_orig),
1383 				"Output buffers (CB=%u) are not equal",
1384 				i);
1385 	}
1386 
1387 	return TEST_SUCCESS;
1388 }
1389 
1390 static int
1391 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1392 		struct rte_bbdev_enc_op *ref_op)
1393 {
1394 	unsigned int i;
1395 	int ret;
1396 	struct op_data_entries *hard_data_orig =
1397 			&test_vector.entries[DATA_HARD_OUTPUT];
1398 
1399 	for (i = 0; i < n; ++i) {
1400 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1401 		TEST_ASSERT_SUCCESS(ret,
1402 				"Checking status and ordering for encoder failed");
1403 		TEST_ASSERT_SUCCESS(validate_op_chain(
1404 				&ops[i]->ldpc_enc.output,
1405 				hard_data_orig),
1406 				"Output buffers (CB=%u) are not equal",
1407 				i);
1408 	}
1409 
1410 	return TEST_SUCCESS;
1411 }
1412 
1413 static void
1414 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1415 {
1416 	unsigned int i;
1417 	struct op_data_entries *entry;
1418 
1419 	op->turbo_dec = test_vector.turbo_dec;
1420 	entry = &test_vector.entries[DATA_INPUT];
1421 	for (i = 0; i < entry->nb_segments; ++i)
1422 		op->turbo_dec.input.length +=
1423 				entry->segments[i].length;
1424 }
1425 
1426 static void
1427 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1428 {
1429 	unsigned int i;
1430 	struct op_data_entries *entry;
1431 
1432 	op->ldpc_dec = test_vector.ldpc_dec;
1433 	entry = &test_vector.entries[DATA_INPUT];
1434 	for (i = 0; i < entry->nb_segments; ++i)
1435 		op->ldpc_dec.input.length +=
1436 				entry->segments[i].length;
1437 	if (test_vector.ldpc_dec.op_flags &
1438 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1439 		entry = &test_vector.entries[DATA_HARQ_INPUT];
1440 		for (i = 0; i < entry->nb_segments; ++i)
1441 			op->ldpc_dec.harq_combined_input.length +=
1442 				entry->segments[i].length;
1443 	}
1444 }
1445 
1446 
1447 static void
1448 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1449 {
1450 	unsigned int i;
1451 	struct op_data_entries *entry;
1452 
1453 	op->turbo_enc = test_vector.turbo_enc;
1454 	entry = &test_vector.entries[DATA_INPUT];
1455 	for (i = 0; i < entry->nb_segments; ++i)
1456 		op->turbo_enc.input.length +=
1457 				entry->segments[i].length;
1458 }
1459 
1460 static void
1461 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1462 {
1463 	unsigned int i;
1464 	struct op_data_entries *entry;
1465 
1466 	op->ldpc_enc = test_vector.ldpc_enc;
1467 	entry = &test_vector.entries[DATA_INPUT];
1468 	for (i = 0; i < entry->nb_segments; ++i)
1469 		op->ldpc_enc.input.length +=
1470 				entry->segments[i].length;
1471 }
1472 
1473 static uint32_t
1474 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1475 {
1476 	uint8_t i;
1477 	uint32_t c, r, tb_size = 0;
1478 
1479 	if (op->turbo_dec.code_block_mode) {
1480 		tb_size = op->turbo_dec.tb_params.k_neg;
1481 	} else {
1482 		c = op->turbo_dec.tb_params.c;
1483 		r = op->turbo_dec.tb_params.r;
1484 		for (i = 0; i < c-r; i++)
1485 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1486 				op->turbo_dec.tb_params.k_neg :
1487 				op->turbo_dec.tb_params.k_pos;
1488 	}
1489 	return tb_size;
1490 }
1491 
1492 static uint32_t
1493 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1494 {
1495 	uint8_t i;
1496 	uint32_t c, r, tb_size = 0;
1497 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1498 
1499 	if (op->ldpc_dec.code_block_mode) {
1500 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1501 	} else {
1502 		c = op->ldpc_dec.tb_params.c;
1503 		r = op->ldpc_dec.tb_params.r;
1504 		for (i = 0; i < c-r; i++)
1505 			tb_size += sys_cols * op->ldpc_dec.z_c
1506 					- op->ldpc_dec.n_filler;
1507 	}
1508 	return tb_size;
1509 }
1510 
1511 static uint32_t
1512 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1513 {
1514 	uint8_t i;
1515 	uint32_t c, r, tb_size = 0;
1516 
1517 	if (op->turbo_enc.code_block_mode) {
1518 		tb_size = op->turbo_enc.tb_params.k_neg;
1519 	} else {
1520 		c = op->turbo_enc.tb_params.c;
1521 		r = op->turbo_enc.tb_params.r;
1522 		for (i = 0; i < c-r; i++)
1523 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1524 				op->turbo_enc.tb_params.k_neg :
1525 				op->turbo_enc.tb_params.k_pos;
1526 	}
1527 	return tb_size;
1528 }
1529 
1530 static uint32_t
1531 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1532 {
1533 	uint8_t i;
1534 	uint32_t c, r, tb_size = 0;
1535 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1536 
1537 	if (op->turbo_enc.code_block_mode) {
1538 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1539 	} else {
1540 		c = op->turbo_enc.tb_params.c;
1541 		r = op->turbo_enc.tb_params.r;
1542 		for (i = 0; i < c-r; i++)
1543 			tb_size += sys_cols * op->ldpc_enc.z_c
1544 					- op->ldpc_enc.n_filler;
1545 	}
1546 	return tb_size;
1547 }
1548 
1549 
1550 static int
1551 init_test_op_params(struct test_op_params *op_params,
1552 		enum rte_bbdev_op_type op_type, const int expected_status,
1553 		const int vector_mask, struct rte_mempool *ops_mp,
1554 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1555 {
1556 	int ret = 0;
1557 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1558 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1559 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1560 				&op_params->ref_dec_op, 1);
1561 	else
1562 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1563 				&op_params->ref_enc_op, 1);
1564 
1565 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1566 
1567 	op_params->mp = ops_mp;
1568 	op_params->burst_sz = burst_sz;
1569 	op_params->num_to_process = num_to_process;
1570 	op_params->num_lcores = num_lcores;
1571 	op_params->vector_mask = vector_mask;
1572 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1573 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1574 		op_params->ref_dec_op->status = expected_status;
1575 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1576 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
1577 		op_params->ref_enc_op->status = expected_status;
1578 	return 0;
1579 }
1580 
1581 static int
1582 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1583 		struct test_op_params *op_params)
1584 {
1585 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1586 	unsigned int i;
1587 	struct active_device *ad;
1588 	unsigned int burst_sz = get_burst_sz();
1589 	enum rte_bbdev_op_type op_type = test_vector.op_type;
1590 	const struct rte_bbdev_op_cap *capabilities = NULL;
1591 
1592 	ad = &active_devs[dev_id];
1593 
1594 	/* Check if device supports op_type */
1595 	if (!is_avail_op(ad, test_vector.op_type))
1596 		return TEST_SUCCESS;
1597 
1598 	struct rte_bbdev_info info;
1599 	rte_bbdev_info_get(ad->dev_id, &info);
1600 	socket_id = GET_SOCKET(info.socket_id);
1601 
1602 	f_ret = create_mempools(ad, socket_id, op_type,
1603 			get_num_ops());
1604 	if (f_ret != TEST_SUCCESS) {
1605 		printf("Couldn't create mempools");
1606 		goto fail;
1607 	}
1608 	if (op_type == RTE_BBDEV_OP_NONE)
1609 		op_type = RTE_BBDEV_OP_TURBO_ENC;
1610 
1611 	f_ret = init_test_op_params(op_params, test_vector.op_type,
1612 			test_vector.expected_status,
1613 			test_vector.mask,
1614 			ad->ops_mempool,
1615 			burst_sz,
1616 			get_num_ops(),
1617 			get_num_lcores());
1618 	if (f_ret != TEST_SUCCESS) {
1619 		printf("Couldn't init test op params");
1620 		goto fail;
1621 	}
1622 
1623 
1624 	/* Find capabilities */
1625 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1626 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1627 		if (cap->type == test_vector.op_type) {
1628 			capabilities = cap;
1629 			break;
1630 		}
1631 		cap++;
1632 	}
1633 	TEST_ASSERT_NOT_NULL(capabilities,
1634 			"Couldn't find capabilities");
1635 
1636 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1637 		create_reference_dec_op(op_params->ref_dec_op);
1638 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1639 		create_reference_enc_op(op_params->ref_enc_op);
1640 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1641 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
1642 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1643 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
1644 
1645 	for (i = 0; i < ad->nb_queues; ++i) {
1646 		f_ret = fill_queue_buffers(op_params,
1647 				ad->in_mbuf_pool,
1648 				ad->hard_out_mbuf_pool,
1649 				ad->soft_out_mbuf_pool,
1650 				ad->harq_in_mbuf_pool,
1651 				ad->harq_out_mbuf_pool,
1652 				ad->queue_ids[i],
1653 				capabilities,
1654 				info.drv.min_alignment,
1655 				socket_id);
1656 		if (f_ret != TEST_SUCCESS) {
1657 			printf("Couldn't init queue buffers");
1658 			goto fail;
1659 		}
1660 	}
1661 
1662 	/* Run test case function */
1663 	t_ret = test_case_func(ad, op_params);
1664 
1665 	/* Free active device resources and return */
1666 	free_buffers(ad, op_params);
1667 	return t_ret;
1668 
1669 fail:
1670 	free_buffers(ad, op_params);
1671 	return TEST_FAILED;
1672 }
1673 
1674 /* Run given test function per active device per supported op type
1675  * per burst size.
1676  */
1677 static int
1678 run_test_case(test_case_function *test_case_func)
1679 {
1680 	int ret = 0;
1681 	uint8_t dev;
1682 
1683 	/* Alloc op_params */
1684 	struct test_op_params *op_params = rte_zmalloc(NULL,
1685 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1686 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1687 			RTE_ALIGN(sizeof(struct test_op_params),
1688 				RTE_CACHE_LINE_SIZE));
1689 
1690 	/* For each device run test case function */
1691 	for (dev = 0; dev < nb_active_devs; ++dev)
1692 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
1693 
1694 	rte_free(op_params);
1695 
1696 	return ret;
1697 }
1698 
1699 static void
1700 dequeue_event_callback(uint16_t dev_id,
1701 		enum rte_bbdev_event_type event, void *cb_arg,
1702 		void *ret_param)
1703 {
1704 	int ret;
1705 	uint16_t i;
1706 	uint64_t total_time;
1707 	uint16_t deq, burst_sz, num_ops;
1708 	uint16_t queue_id = *(uint16_t *) ret_param;
1709 	struct rte_bbdev_info info;
1710 	double tb_len_bits;
1711 	struct thread_params *tp = cb_arg;
1712 
1713 	/* Find matching thread params using queue_id */
1714 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1715 		if (tp->queue_id == queue_id)
1716 			break;
1717 
1718 	if (i == MAX_QUEUES) {
1719 		printf("%s: Queue_id from interrupt details was not found!\n",
1720 				__func__);
1721 		return;
1722 	}
1723 
1724 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1725 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1726 		printf(
1727 			"Dequeue interrupt handler called for incorrect event!\n");
1728 		return;
1729 	}
1730 
1731 	burst_sz = rte_atomic16_read(&tp->burst_sz);
1732 	num_ops = tp->op_params->num_to_process;
1733 
1734 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
1735 			test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1736 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1737 				&tp->dec_ops[
1738 					rte_atomic16_read(&tp->nb_dequeued)],
1739 				burst_sz);
1740 	else
1741 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1742 				&tp->enc_ops[
1743 					rte_atomic16_read(&tp->nb_dequeued)],
1744 				burst_sz);
1745 
1746 	if (deq < burst_sz) {
1747 		printf(
1748 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1749 			burst_sz, deq);
1750 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1751 		return;
1752 	}
1753 
1754 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1755 		rte_atomic16_add(&tp->nb_dequeued, deq);
1756 		return;
1757 	}
1758 
1759 	total_time = rte_rdtsc_precise() - tp->start_time;
1760 
1761 	rte_bbdev_info_get(dev_id, &info);
1762 
1763 	ret = TEST_SUCCESS;
1764 
1765 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1766 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1767 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1768 				tp->op_params->vector_mask);
1769 		/* get the max of iter_count for all dequeued ops */
1770 		for (i = 0; i < num_ops; ++i)
1771 			tp->iter_count = RTE_MAX(
1772 					tp->dec_ops[i]->turbo_dec.iter_count,
1773 					tp->iter_count);
1774 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1775 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1776 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1777 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1778 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1779 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
1780 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1781 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
1782 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1783 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1784 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1785 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
1786 				tp->op_params->vector_mask);
1787 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1788 	}
1789 
1790 	if (ret) {
1791 		printf("Buffers validation failed\n");
1792 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1793 	}
1794 
1795 	switch (test_vector.op_type) {
1796 	case RTE_BBDEV_OP_TURBO_DEC:
1797 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1798 		break;
1799 	case RTE_BBDEV_OP_TURBO_ENC:
1800 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1801 		break;
1802 	case RTE_BBDEV_OP_LDPC_DEC:
1803 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
1804 		break;
1805 	case RTE_BBDEV_OP_LDPC_ENC:
1806 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
1807 		break;
1808 	case RTE_BBDEV_OP_NONE:
1809 		tb_len_bits = 0.0;
1810 		break;
1811 	default:
1812 		printf("Unknown op type: %d\n", test_vector.op_type);
1813 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1814 		return;
1815 	}
1816 
1817 	tp->ops_per_sec += ((double)num_ops) /
1818 			((double)total_time / (double)rte_get_tsc_hz());
1819 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1820 			((double)total_time / (double)rte_get_tsc_hz());
1821 
1822 	rte_atomic16_add(&tp->nb_dequeued, deq);
1823 }
1824 
1825 static int
1826 throughput_intr_lcore_dec(void *arg)
1827 {
1828 	struct thread_params *tp = arg;
1829 	unsigned int enqueued;
1830 	const uint16_t queue_id = tp->queue_id;
1831 	const uint16_t burst_sz = tp->op_params->burst_sz;
1832 	const uint16_t num_to_process = tp->op_params->num_to_process;
1833 	struct rte_bbdev_dec_op *ops[num_to_process];
1834 	struct test_buffers *bufs = NULL;
1835 	struct rte_bbdev_info info;
1836 	int ret, i, j;
1837 	uint16_t num_to_enq, enq;
1838 
1839 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1840 			"BURST_SIZE should be <= %u", MAX_BURST);
1841 
1842 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1843 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1844 			tp->dev_id, queue_id);
1845 
1846 	rte_bbdev_info_get(tp->dev_id, &info);
1847 
1848 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1849 			"NUM_OPS cannot exceed %u for this device",
1850 			info.drv.queue_size_lim);
1851 
1852 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1853 
1854 	rte_atomic16_clear(&tp->processing_status);
1855 	rte_atomic16_clear(&tp->nb_dequeued);
1856 
1857 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1858 		rte_pause();
1859 
1860 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1861 				num_to_process);
1862 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1863 			num_to_process);
1864 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1865 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1866 				bufs->hard_outputs, bufs->soft_outputs,
1867 				tp->op_params->ref_dec_op);
1868 
1869 	/* Set counter to validate the ordering */
1870 	for (j = 0; j < num_to_process; ++j)
1871 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1872 
1873 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1874 		for (i = 0; i < num_to_process; ++i)
1875 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1876 
1877 		tp->start_time = rte_rdtsc_precise();
1878 		for (enqueued = 0; enqueued < num_to_process;) {
1879 			num_to_enq = burst_sz;
1880 
1881 			if (unlikely(num_to_process - enqueued < num_to_enq))
1882 				num_to_enq = num_to_process - enqueued;
1883 
1884 			enq = 0;
1885 			do {
1886 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1887 						queue_id, &ops[enqueued],
1888 						num_to_enq);
1889 			} while (unlikely(num_to_enq != enq));
1890 			enqueued += enq;
1891 
1892 			/* Write to thread burst_sz current number of enqueued
1893 			 * descriptors. It ensures that proper number of
1894 			 * descriptors will be dequeued in callback
1895 			 * function - needed for last batch in case where
1896 			 * the number of operations is not a multiple of
1897 			 * burst size.
1898 			 */
1899 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1900 
1901 			/* Wait until processing of previous batch is
1902 			 * completed
1903 			 */
1904 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1905 					(int16_t) enqueued)
1906 				rte_pause();
1907 		}
1908 		if (j != TEST_REPETITIONS - 1)
1909 			rte_atomic16_clear(&tp->nb_dequeued);
1910 	}
1911 
1912 	return TEST_SUCCESS;
1913 }
1914 
1915 static int
1916 throughput_intr_lcore_enc(void *arg)
1917 {
1918 	struct thread_params *tp = arg;
1919 	unsigned int enqueued;
1920 	const uint16_t queue_id = tp->queue_id;
1921 	const uint16_t burst_sz = tp->op_params->burst_sz;
1922 	const uint16_t num_to_process = tp->op_params->num_to_process;
1923 	struct rte_bbdev_enc_op *ops[num_to_process];
1924 	struct test_buffers *bufs = NULL;
1925 	struct rte_bbdev_info info;
1926 	int ret, i, j;
1927 	uint16_t num_to_enq, enq;
1928 
1929 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1930 			"BURST_SIZE should be <= %u", MAX_BURST);
1931 
1932 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1933 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1934 			tp->dev_id, queue_id);
1935 
1936 	rte_bbdev_info_get(tp->dev_id, &info);
1937 
1938 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1939 			"NUM_OPS cannot exceed %u for this device",
1940 			info.drv.queue_size_lim);
1941 
1942 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1943 
1944 	rte_atomic16_clear(&tp->processing_status);
1945 	rte_atomic16_clear(&tp->nb_dequeued);
1946 
1947 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1948 		rte_pause();
1949 
1950 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1951 			num_to_process);
1952 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1953 			num_to_process);
1954 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1955 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1956 				bufs->hard_outputs, tp->op_params->ref_enc_op);
1957 
1958 	/* Set counter to validate the ordering */
1959 	for (j = 0; j < num_to_process; ++j)
1960 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1961 
1962 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1963 		for (i = 0; i < num_to_process; ++i)
1964 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1965 
1966 		tp->start_time = rte_rdtsc_precise();
1967 		for (enqueued = 0; enqueued < num_to_process;) {
1968 			num_to_enq = burst_sz;
1969 
1970 			if (unlikely(num_to_process - enqueued < num_to_enq))
1971 				num_to_enq = num_to_process - enqueued;
1972 
1973 			enq = 0;
1974 			do {
1975 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1976 						queue_id, &ops[enqueued],
1977 						num_to_enq);
1978 			} while (unlikely(enq != num_to_enq));
1979 			enqueued += enq;
1980 
1981 			/* Write to thread burst_sz current number of enqueued
1982 			 * descriptors. It ensures that proper number of
1983 			 * descriptors will be dequeued in callback
1984 			 * function - needed for last batch in case where
1985 			 * the number of operations is not a multiple of
1986 			 * burst size.
1987 			 */
1988 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1989 
1990 			/* Wait until processing of previous batch is
1991 			 * completed
1992 			 */
1993 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1994 					(int16_t) enqueued)
1995 				rte_pause();
1996 		}
1997 		if (j != TEST_REPETITIONS - 1)
1998 			rte_atomic16_clear(&tp->nb_dequeued);
1999 	}
2000 
2001 	return TEST_SUCCESS;
2002 }
2003 
2004 static int
2005 throughput_pmd_lcore_dec(void *arg)
2006 {
2007 	struct thread_params *tp = arg;
2008 	uint16_t enq, deq;
2009 	uint64_t total_time = 0, start_time;
2010 	const uint16_t queue_id = tp->queue_id;
2011 	const uint16_t burst_sz = tp->op_params->burst_sz;
2012 	const uint16_t num_ops = tp->op_params->num_to_process;
2013 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2014 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2015 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2016 	struct test_buffers *bufs = NULL;
2017 	int i, j, ret;
2018 	struct rte_bbdev_info info;
2019 	uint16_t num_to_enq;
2020 
2021 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2022 			"BURST_SIZE should be <= %u", MAX_BURST);
2023 
2024 	rte_bbdev_info_get(tp->dev_id, &info);
2025 
2026 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2027 			"NUM_OPS cannot exceed %u for this device",
2028 			info.drv.queue_size_lim);
2029 
2030 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2031 
2032 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2033 		rte_pause();
2034 
2035 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2036 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2037 
2038 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2039 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2040 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
2041 
2042 	/* Set counter to validate the ordering */
2043 	for (j = 0; j < num_ops; ++j)
2044 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2045 
2046 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2047 
2048 		for (j = 0; j < num_ops; ++j)
2049 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2050 
2051 		start_time = rte_rdtsc_precise();
2052 
2053 		for (enq = 0, deq = 0; enq < num_ops;) {
2054 			num_to_enq = burst_sz;
2055 
2056 			if (unlikely(num_ops - enq < num_to_enq))
2057 				num_to_enq = num_ops - enq;
2058 
2059 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2060 					queue_id, &ops_enq[enq], num_to_enq);
2061 
2062 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2063 					queue_id, &ops_deq[deq], enq - deq);
2064 		}
2065 
2066 		/* dequeue the remaining */
2067 		while (deq < enq) {
2068 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2069 					queue_id, &ops_deq[deq], enq - deq);
2070 		}
2071 
2072 		total_time += rte_rdtsc_precise() - start_time;
2073 	}
2074 
2075 	tp->iter_count = 0;
2076 	/* get the max of iter_count for all dequeued ops */
2077 	for (i = 0; i < num_ops; ++i) {
2078 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2079 				tp->iter_count);
2080 	}
2081 
2082 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2083 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
2084 				tp->op_params->vector_mask);
2085 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2086 	}
2087 
2088 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2089 
2090 	double tb_len_bits = calc_dec_TB_size(ref_op);
2091 
2092 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2093 			((double)total_time / (double)rte_get_tsc_hz());
2094 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2095 			1000000.0) / ((double)total_time /
2096 			(double)rte_get_tsc_hz());
2097 
2098 	return TEST_SUCCESS;
2099 }
2100 
2101 static int
2102 throughput_pmd_lcore_ldpc_dec(void *arg)
2103 {
2104 	struct thread_params *tp = arg;
2105 	uint16_t enq, deq;
2106 	uint64_t total_time = 0, start_time;
2107 	const uint16_t queue_id = tp->queue_id;
2108 	const uint16_t burst_sz = tp->op_params->burst_sz;
2109 	const uint16_t num_ops = tp->op_params->num_to_process;
2110 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2111 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2112 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2113 	struct test_buffers *bufs = NULL;
2114 	int i, j, ret;
2115 	struct rte_bbdev_info info;
2116 	uint16_t num_to_enq;
2117 
2118 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2119 			"BURST_SIZE should be <= %u", MAX_BURST);
2120 
2121 	rte_bbdev_info_get(tp->dev_id, &info);
2122 
2123 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2124 			"NUM_OPS cannot exceed %u for this device",
2125 			info.drv.queue_size_lim);
2126 
2127 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2128 
2129 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2130 		rte_pause();
2131 
2132 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2133 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2134 
2135 	/* For throughput tests we need to disable early termination */
2136 	if (check_bit(ref_op->ldpc_dec.op_flags,
2137 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2138 		ref_op->ldpc_dec.op_flags -=
2139 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2140 	ref_op->ldpc_dec.iter_max = 6;
2141 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2142 
2143 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2144 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2145 				bufs->hard_outputs, bufs->soft_outputs,
2146 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2147 
2148 	/* Set counter to validate the ordering */
2149 	for (j = 0; j < num_ops; ++j)
2150 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2151 
2152 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2153 		for (j = 0; j < num_ops; ++j) {
2154 			mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
2155 			if (check_bit(ref_op->ldpc_dec.op_flags,
2156 					RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE))
2157 				mbuf_reset(
2158 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
2159 		}
2160 
2161 		start_time = rte_rdtsc_precise();
2162 
2163 		for (enq = 0, deq = 0; enq < num_ops;) {
2164 			num_to_enq = burst_sz;
2165 
2166 			if (unlikely(num_ops - enq < num_to_enq))
2167 				num_to_enq = num_ops - enq;
2168 
2169 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2170 					queue_id, &ops_enq[enq], num_to_enq);
2171 
2172 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2173 					queue_id, &ops_deq[deq], enq - deq);
2174 		}
2175 
2176 		/* dequeue the remaining */
2177 		while (deq < enq) {
2178 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2179 					queue_id, &ops_deq[deq], enq - deq);
2180 		}
2181 
2182 		total_time += rte_rdtsc_precise() - start_time;
2183 	}
2184 
2185 	tp->iter_count = 0;
2186 	/* get the max of iter_count for all dequeued ops */
2187 	for (i = 0; i < num_ops; ++i) {
2188 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2189 				tp->iter_count);
2190 	}
2191 
2192 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2193 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2194 				tp->op_params->vector_mask);
2195 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2196 	}
2197 
2198 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2199 
2200 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2201 
2202 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2203 			((double)total_time / (double)rte_get_tsc_hz());
2204 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2205 			1000000.0) / ((double)total_time /
2206 			(double)rte_get_tsc_hz());
2207 
2208 	return TEST_SUCCESS;
2209 }
2210 
2211 static int
2212 throughput_pmd_lcore_enc(void *arg)
2213 {
2214 	struct thread_params *tp = arg;
2215 	uint16_t enq, deq;
2216 	uint64_t total_time = 0, start_time;
2217 	const uint16_t queue_id = tp->queue_id;
2218 	const uint16_t burst_sz = tp->op_params->burst_sz;
2219 	const uint16_t num_ops = tp->op_params->num_to_process;
2220 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2221 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2222 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2223 	struct test_buffers *bufs = NULL;
2224 	int i, j, ret;
2225 	struct rte_bbdev_info info;
2226 	uint16_t num_to_enq;
2227 
2228 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2229 			"BURST_SIZE should be <= %u", MAX_BURST);
2230 
2231 	rte_bbdev_info_get(tp->dev_id, &info);
2232 
2233 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2234 			"NUM_OPS cannot exceed %u for this device",
2235 			info.drv.queue_size_lim);
2236 
2237 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2238 
2239 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2240 		rte_pause();
2241 
2242 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2243 			num_ops);
2244 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2245 			num_ops);
2246 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2247 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2248 				bufs->hard_outputs, ref_op);
2249 
2250 	/* Set counter to validate the ordering */
2251 	for (j = 0; j < num_ops; ++j)
2252 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2253 
2254 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2255 
2256 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2257 			for (j = 0; j < num_ops; ++j)
2258 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2259 
2260 		start_time = rte_rdtsc_precise();
2261 
2262 		for (enq = 0, deq = 0; enq < num_ops;) {
2263 			num_to_enq = burst_sz;
2264 
2265 			if (unlikely(num_ops - enq < num_to_enq))
2266 				num_to_enq = num_ops - enq;
2267 
2268 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2269 					queue_id, &ops_enq[enq], num_to_enq);
2270 
2271 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2272 					queue_id, &ops_deq[deq], enq - deq);
2273 		}
2274 
2275 		/* dequeue the remaining */
2276 		while (deq < enq) {
2277 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2278 					queue_id, &ops_deq[deq], enq - deq);
2279 		}
2280 
2281 		total_time += rte_rdtsc_precise() - start_time;
2282 	}
2283 
2284 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2285 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
2286 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2287 	}
2288 
2289 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2290 
2291 	double tb_len_bits = calc_enc_TB_size(ref_op);
2292 
2293 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2294 			((double)total_time / (double)rte_get_tsc_hz());
2295 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2296 			/ 1000000.0) / ((double)total_time /
2297 			(double)rte_get_tsc_hz());
2298 
2299 	return TEST_SUCCESS;
2300 }
2301 
2302 static int
2303 throughput_pmd_lcore_ldpc_enc(void *arg)
2304 {
2305 	struct thread_params *tp = arg;
2306 	uint16_t enq, deq;
2307 	uint64_t total_time = 0, start_time;
2308 	const uint16_t queue_id = tp->queue_id;
2309 	const uint16_t burst_sz = tp->op_params->burst_sz;
2310 	const uint16_t num_ops = tp->op_params->num_to_process;
2311 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2312 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2313 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2314 	struct test_buffers *bufs = NULL;
2315 	int i, j, ret;
2316 	struct rte_bbdev_info info;
2317 	uint16_t num_to_enq;
2318 
2319 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2320 			"BURST_SIZE should be <= %u", MAX_BURST);
2321 
2322 	rte_bbdev_info_get(tp->dev_id, &info);
2323 
2324 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2325 			"NUM_OPS cannot exceed %u for this device",
2326 			info.drv.queue_size_lim);
2327 
2328 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2329 
2330 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2331 		rte_pause();
2332 
2333 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2334 			num_ops);
2335 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2336 			num_ops);
2337 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2338 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2339 				bufs->hard_outputs, ref_op);
2340 
2341 	/* Set counter to validate the ordering */
2342 	for (j = 0; j < num_ops; ++j)
2343 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2344 
2345 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2346 
2347 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2348 			for (j = 0; j < num_ops; ++j)
2349 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2350 
2351 		start_time = rte_rdtsc_precise();
2352 
2353 		for (enq = 0, deq = 0; enq < num_ops;) {
2354 			num_to_enq = burst_sz;
2355 
2356 			if (unlikely(num_ops - enq < num_to_enq))
2357 				num_to_enq = num_ops - enq;
2358 
2359 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2360 					queue_id, &ops_enq[enq], num_to_enq);
2361 
2362 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2363 					queue_id, &ops_deq[deq], enq - deq);
2364 		}
2365 
2366 		/* dequeue the remaining */
2367 		while (deq < enq) {
2368 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2369 					queue_id, &ops_deq[deq], enq - deq);
2370 		}
2371 
2372 		total_time += rte_rdtsc_precise() - start_time;
2373 	}
2374 
2375 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2376 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2377 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2378 	}
2379 
2380 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2381 
2382 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2383 
2384 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2385 			((double)total_time / (double)rte_get_tsc_hz());
2386 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2387 			/ 1000000.0) / ((double)total_time /
2388 			(double)rte_get_tsc_hz());
2389 
2390 	return TEST_SUCCESS;
2391 }
2392 
2393 static void
2394 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2395 {
2396 	unsigned int iter = 0;
2397 	double total_mops = 0, total_mbps = 0;
2398 
2399 	for (iter = 0; iter < used_cores; iter++) {
2400 		printf(
2401 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2402 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2403 			t_params[iter].mbps);
2404 		total_mops += t_params[iter].ops_per_sec;
2405 		total_mbps += t_params[iter].mbps;
2406 	}
2407 	printf(
2408 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2409 		used_cores, total_mops, total_mbps);
2410 }
2411 
2412 static void
2413 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2414 {
2415 	unsigned int iter = 0;
2416 	double total_mops = 0, total_mbps = 0;
2417 	uint8_t iter_count = 0;
2418 
2419 	for (iter = 0; iter < used_cores; iter++) {
2420 		printf(
2421 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2422 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2423 			t_params[iter].mbps, t_params[iter].iter_count);
2424 		total_mops += t_params[iter].ops_per_sec;
2425 		total_mbps += t_params[iter].mbps;
2426 		iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2427 	}
2428 	printf(
2429 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2430 		used_cores, total_mops, total_mbps, iter_count);
2431 }
2432 
2433 /*
2434  * Test function that determines how long an enqueue + dequeue of a burst
2435  * takes on available lcores.
2436  */
2437 static int
2438 throughput_test(struct active_device *ad,
2439 		struct test_op_params *op_params)
2440 {
2441 	int ret;
2442 	unsigned int lcore_id, used_cores = 0;
2443 	struct thread_params *t_params, *tp;
2444 	struct rte_bbdev_info info;
2445 	lcore_function_t *throughput_function;
2446 	uint16_t num_lcores;
2447 	const char *op_type_str;
2448 
2449 	rte_bbdev_info_get(ad->dev_id, &info);
2450 
2451 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2452 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2453 			test_vector.op_type);
2454 
2455 	printf("+ ------------------------------------------------------- +\n");
2456 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2457 			info.dev_name, ad->nb_queues, op_params->burst_sz,
2458 			op_params->num_to_process, op_params->num_lcores,
2459 			op_type_str,
2460 			intr_enabled ? "Interrupt mode" : "PMD mode",
2461 			(double)rte_get_tsc_hz() / 1000000000.0);
2462 
2463 	/* Set number of lcores */
2464 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
2465 			? ad->nb_queues
2466 			: op_params->num_lcores;
2467 
2468 	/* Allocate memory for thread parameters structure */
2469 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2470 			RTE_CACHE_LINE_SIZE);
2471 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2472 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2473 				RTE_CACHE_LINE_SIZE));
2474 
2475 	if (intr_enabled) {
2476 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2477 			throughput_function = throughput_intr_lcore_dec;
2478 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2479 			throughput_function = throughput_intr_lcore_dec;
2480 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2481 			throughput_function = throughput_intr_lcore_enc;
2482 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2483 			throughput_function = throughput_intr_lcore_enc;
2484 		else
2485 			throughput_function = throughput_intr_lcore_enc;
2486 
2487 		/* Dequeue interrupt callback registration */
2488 		ret = rte_bbdev_callback_register(ad->dev_id,
2489 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2490 				t_params);
2491 		if (ret < 0) {
2492 			rte_free(t_params);
2493 			return ret;
2494 		}
2495 	} else {
2496 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2497 			throughput_function = throughput_pmd_lcore_dec;
2498 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2499 			throughput_function = throughput_pmd_lcore_ldpc_dec;
2500 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2501 			throughput_function = throughput_pmd_lcore_enc;
2502 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2503 			throughput_function = throughput_pmd_lcore_ldpc_enc;
2504 		else
2505 			throughput_function = throughput_pmd_lcore_enc;
2506 	}
2507 
2508 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2509 
2510 	/* Master core is set at first entry */
2511 	t_params[0].dev_id = ad->dev_id;
2512 	t_params[0].lcore_id = rte_lcore_id();
2513 	t_params[0].op_params = op_params;
2514 	t_params[0].queue_id = ad->queue_ids[used_cores++];
2515 	t_params[0].iter_count = 0;
2516 
2517 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2518 		if (used_cores >= num_lcores)
2519 			break;
2520 
2521 		t_params[used_cores].dev_id = ad->dev_id;
2522 		t_params[used_cores].lcore_id = lcore_id;
2523 		t_params[used_cores].op_params = op_params;
2524 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2525 		t_params[used_cores].iter_count = 0;
2526 
2527 		rte_eal_remote_launch(throughput_function,
2528 				&t_params[used_cores++], lcore_id);
2529 	}
2530 
2531 	rte_atomic16_set(&op_params->sync, SYNC_START);
2532 	ret = throughput_function(&t_params[0]);
2533 
2534 	/* Master core is always used */
2535 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
2536 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2537 
2538 	/* Return if test failed */
2539 	if (ret) {
2540 		rte_free(t_params);
2541 		return ret;
2542 	}
2543 
2544 	/* Print throughput if interrupts are disabled and test passed */
2545 	if (!intr_enabled) {
2546 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2547 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2548 			print_dec_throughput(t_params, num_lcores);
2549 		else
2550 			print_enc_throughput(t_params, num_lcores);
2551 		rte_free(t_params);
2552 		return ret;
2553 	}
2554 
2555 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
2556 	 * all pending operations. Skip waiting for queues which reported an
2557 	 * error using processing_status variable.
2558 	 * Wait for master lcore operations.
2559 	 */
2560 	tp = &t_params[0];
2561 	while ((rte_atomic16_read(&tp->nb_dequeued) <
2562 			op_params->num_to_process) &&
2563 			(rte_atomic16_read(&tp->processing_status) !=
2564 			TEST_FAILED))
2565 		rte_pause();
2566 
2567 	tp->ops_per_sec /= TEST_REPETITIONS;
2568 	tp->mbps /= TEST_REPETITIONS;
2569 	ret |= (int)rte_atomic16_read(&tp->processing_status);
2570 
2571 	/* Wait for slave lcores operations */
2572 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2573 		tp = &t_params[used_cores];
2574 
2575 		while ((rte_atomic16_read(&tp->nb_dequeued) <
2576 				op_params->num_to_process) &&
2577 				(rte_atomic16_read(&tp->processing_status) !=
2578 				TEST_FAILED))
2579 			rte_pause();
2580 
2581 		tp->ops_per_sec /= TEST_REPETITIONS;
2582 		tp->mbps /= TEST_REPETITIONS;
2583 		ret |= (int)rte_atomic16_read(&tp->processing_status);
2584 	}
2585 
2586 	/* Print throughput if test passed */
2587 	if (!ret) {
2588 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2589 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2590 			print_dec_throughput(t_params, num_lcores);
2591 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
2592 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2593 			print_enc_throughput(t_params, num_lcores);
2594 	}
2595 
2596 	rte_free(t_params);
2597 	return ret;
2598 }
2599 
2600 static int
2601 latency_test_dec(struct rte_mempool *mempool,
2602 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2603 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2604 		const uint16_t num_to_process, uint16_t burst_sz,
2605 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2606 {
2607 	int ret = TEST_SUCCESS;
2608 	uint16_t i, j, dequeued;
2609 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2610 	uint64_t start_time = 0, last_time = 0;
2611 
2612 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2613 		uint16_t enq = 0, deq = 0;
2614 		bool first_time = true;
2615 		last_time = 0;
2616 
2617 		if (unlikely(num_to_process - dequeued < burst_sz))
2618 			burst_sz = num_to_process - dequeued;
2619 
2620 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2621 		TEST_ASSERT_SUCCESS(ret,
2622 				"rte_bbdev_dec_op_alloc_bulk() failed");
2623 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2624 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2625 					bufs->inputs,
2626 					bufs->hard_outputs,
2627 					bufs->soft_outputs,
2628 					ref_op);
2629 
2630 		/* Set counter to validate the ordering */
2631 		for (j = 0; j < burst_sz; ++j)
2632 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2633 
2634 		start_time = rte_rdtsc_precise();
2635 
2636 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
2637 				burst_sz);
2638 		TEST_ASSERT(enq == burst_sz,
2639 				"Error enqueueing burst, expected %u, got %u",
2640 				burst_sz, enq);
2641 
2642 		/* Dequeue */
2643 		do {
2644 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2645 					&ops_deq[deq], burst_sz - deq);
2646 			if (likely(first_time && (deq > 0))) {
2647 				last_time = rte_rdtsc_precise() - start_time;
2648 				first_time = false;
2649 			}
2650 		} while (unlikely(burst_sz != deq));
2651 
2652 		*max_time = RTE_MAX(*max_time, last_time);
2653 		*min_time = RTE_MIN(*min_time, last_time);
2654 		*total_time += last_time;
2655 
2656 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2657 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
2658 					vector_mask);
2659 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2660 		}
2661 
2662 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2663 		dequeued += deq;
2664 	}
2665 
2666 	return i;
2667 }
2668 
2669 static int
2670 latency_test_ldpc_dec(struct rte_mempool *mempool,
2671 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2672 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2673 		const uint16_t num_to_process, uint16_t burst_sz,
2674 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2675 {
2676 	int ret = TEST_SUCCESS;
2677 	uint16_t i, j, dequeued;
2678 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2679 	uint64_t start_time = 0, last_time = 0;
2680 
2681 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2682 		uint16_t enq = 0, deq = 0;
2683 		bool first_time = true;
2684 		last_time = 0;
2685 
2686 		if (unlikely(num_to_process - dequeued < burst_sz))
2687 			burst_sz = num_to_process - dequeued;
2688 
2689 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2690 		TEST_ASSERT_SUCCESS(ret,
2691 				"rte_bbdev_dec_op_alloc_bulk() failed");
2692 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2693 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
2694 					bufs->inputs,
2695 					bufs->hard_outputs,
2696 					bufs->soft_outputs,
2697 					bufs->harq_inputs,
2698 					bufs->harq_outputs,
2699 					ref_op);
2700 
2701 		/* Set counter to validate the ordering */
2702 		for (j = 0; j < burst_sz; ++j)
2703 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2704 
2705 		start_time = rte_rdtsc_precise();
2706 
2707 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2708 				&ops_enq[enq], burst_sz);
2709 		TEST_ASSERT(enq == burst_sz,
2710 				"Error enqueueing burst, expected %u, got %u",
2711 				burst_sz, enq);
2712 
2713 		/* Dequeue */
2714 		do {
2715 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2716 					&ops_deq[deq], burst_sz - deq);
2717 			if (likely(first_time && (deq > 0))) {
2718 				last_time = rte_rdtsc_precise() - start_time;
2719 				first_time = false;
2720 			}
2721 		} while (unlikely(burst_sz != deq));
2722 
2723 		*max_time = RTE_MAX(*max_time, last_time);
2724 		*min_time = RTE_MIN(*min_time, last_time);
2725 		*total_time += last_time;
2726 
2727 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2728 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
2729 					vector_mask);
2730 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2731 		}
2732 
2733 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2734 		dequeued += deq;
2735 	}
2736 
2737 	return i;
2738 }
2739 
2740 static int
2741 latency_test_enc(struct rte_mempool *mempool,
2742 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2743 		uint16_t dev_id, uint16_t queue_id,
2744 		const uint16_t num_to_process, uint16_t burst_sz,
2745 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2746 {
2747 	int ret = TEST_SUCCESS;
2748 	uint16_t i, j, dequeued;
2749 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2750 	uint64_t start_time = 0, last_time = 0;
2751 
2752 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2753 		uint16_t enq = 0, deq = 0;
2754 		bool first_time = true;
2755 		last_time = 0;
2756 
2757 		if (unlikely(num_to_process - dequeued < burst_sz))
2758 			burst_sz = num_to_process - dequeued;
2759 
2760 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2761 		TEST_ASSERT_SUCCESS(ret,
2762 				"rte_bbdev_enc_op_alloc_bulk() failed");
2763 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2764 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2765 					bufs->inputs,
2766 					bufs->hard_outputs,
2767 					ref_op);
2768 
2769 		/* Set counter to validate the ordering */
2770 		for (j = 0; j < burst_sz; ++j)
2771 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2772 
2773 		start_time = rte_rdtsc_precise();
2774 
2775 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
2776 				burst_sz);
2777 		TEST_ASSERT(enq == burst_sz,
2778 				"Error enqueueing burst, expected %u, got %u",
2779 				burst_sz, enq);
2780 
2781 		/* Dequeue */
2782 		do {
2783 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2784 					&ops_deq[deq], burst_sz - deq);
2785 			if (likely(first_time && (deq > 0))) {
2786 				last_time += rte_rdtsc_precise() - start_time;
2787 				first_time = false;
2788 			}
2789 		} while (unlikely(burst_sz != deq));
2790 
2791 		*max_time = RTE_MAX(*max_time, last_time);
2792 		*min_time = RTE_MIN(*min_time, last_time);
2793 		*total_time += last_time;
2794 
2795 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2796 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2797 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2798 		}
2799 
2800 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2801 		dequeued += deq;
2802 	}
2803 
2804 	return i;
2805 }
2806 
2807 static int
2808 latency_test_ldpc_enc(struct rte_mempool *mempool,
2809 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2810 		uint16_t dev_id, uint16_t queue_id,
2811 		const uint16_t num_to_process, uint16_t burst_sz,
2812 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2813 {
2814 	int ret = TEST_SUCCESS;
2815 	uint16_t i, j, dequeued;
2816 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2817 	uint64_t start_time = 0, last_time = 0;
2818 
2819 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2820 		uint16_t enq = 0, deq = 0;
2821 		bool first_time = true;
2822 		last_time = 0;
2823 
2824 		if (unlikely(num_to_process - dequeued < burst_sz))
2825 			burst_sz = num_to_process - dequeued;
2826 
2827 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2828 
2829 		TEST_ASSERT_SUCCESS(ret,
2830 				"rte_bbdev_enc_op_alloc_bulk() failed");
2831 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2832 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
2833 					bufs->inputs,
2834 					bufs->hard_outputs,
2835 					ref_op);
2836 
2837 		/* Set counter to validate the ordering */
2838 		for (j = 0; j < burst_sz; ++j)
2839 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2840 
2841 		start_time = rte_rdtsc_precise();
2842 
2843 		/*
2844 		 * printf("Latency Debug %d\n",
2845 		 * ops_enq[0]->ldpc_enc.cb_params.z_c); REMOVEME
2846 		 */
2847 
2848 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
2849 				&ops_enq[enq], burst_sz);
2850 		TEST_ASSERT(enq == burst_sz,
2851 				"Error enqueueing burst, expected %u, got %u",
2852 				burst_sz, enq);
2853 
2854 		/* Dequeue */
2855 		do {
2856 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2857 					&ops_deq[deq], burst_sz - deq);
2858 			if (likely(first_time && (deq > 0))) {
2859 				last_time += rte_rdtsc_precise() - start_time;
2860 				first_time = false;
2861 			}
2862 		} while (unlikely(burst_sz != deq));
2863 
2864 		*max_time = RTE_MAX(*max_time, last_time);
2865 		*min_time = RTE_MIN(*min_time, last_time);
2866 		*total_time += last_time;
2867 
2868 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2869 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2870 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2871 		}
2872 
2873 		/*
2874 		 * printf("Ready to free - deq %d num_to_process %d\n", FIXME
2875 		 *		deq, num_to_process);
2876 		 * printf("cache %d\n", ops_enq[0]->mempool->cache_size);
2877 		 */
2878 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2879 		dequeued += deq;
2880 	}
2881 
2882 	return i;
2883 }
2884 
2885 static int
2886 latency_test(struct active_device *ad,
2887 		struct test_op_params *op_params)
2888 {
2889 	int iter;
2890 	uint16_t burst_sz = op_params->burst_sz;
2891 	const uint16_t num_to_process = op_params->num_to_process;
2892 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2893 	const uint16_t queue_id = ad->queue_ids[0];
2894 	struct test_buffers *bufs = NULL;
2895 	struct rte_bbdev_info info;
2896 	uint64_t total_time, min_time, max_time;
2897 	const char *op_type_str;
2898 
2899 	total_time = max_time = 0;
2900 	min_time = UINT64_MAX;
2901 
2902 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2903 			"BURST_SIZE should be <= %u", MAX_BURST);
2904 
2905 	rte_bbdev_info_get(ad->dev_id, &info);
2906 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2907 
2908 	op_type_str = rte_bbdev_op_type_str(op_type);
2909 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2910 
2911 	printf("+ ------------------------------------------------------- +\n");
2912 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
2913 			info.dev_name, burst_sz, num_to_process, op_type_str);
2914 
2915 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2916 		iter = latency_test_dec(op_params->mp, bufs,
2917 				op_params->ref_dec_op, op_params->vector_mask,
2918 				ad->dev_id, queue_id, num_to_process,
2919 				burst_sz, &total_time, &min_time, &max_time);
2920 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
2921 		iter = latency_test_enc(op_params->mp, bufs,
2922 				op_params->ref_enc_op, ad->dev_id, queue_id,
2923 				num_to_process, burst_sz, &total_time,
2924 				&min_time, &max_time);
2925 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
2926 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
2927 				op_params->ref_enc_op, ad->dev_id, queue_id,
2928 				num_to_process, burst_sz, &total_time,
2929 				&min_time, &max_time);
2930 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
2931 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
2932 				op_params->ref_dec_op, op_params->vector_mask,
2933 				ad->dev_id, queue_id, num_to_process,
2934 				burst_sz, &total_time, &min_time, &max_time);
2935 	else
2936 		iter = latency_test_enc(op_params->mp, bufs,
2937 					op_params->ref_enc_op,
2938 					ad->dev_id, queue_id,
2939 					num_to_process, burst_sz, &total_time,
2940 					&min_time, &max_time);
2941 
2942 	if (iter <= 0)
2943 		return TEST_FAILED;
2944 
2945 	printf("Operation latency:\n"
2946 			"\tavg: %lg cycles, %lg us\n"
2947 			"\tmin: %lg cycles, %lg us\n"
2948 			"\tmax: %lg cycles, %lg us\n",
2949 			(double)total_time / (double)iter,
2950 			(double)(total_time * 1000000) / (double)iter /
2951 			(double)rte_get_tsc_hz(), (double)min_time,
2952 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2953 			(double)max_time, (double)(max_time * 1000000) /
2954 			(double)rte_get_tsc_hz());
2955 
2956 	return TEST_SUCCESS;
2957 }
2958 
2959 #ifdef RTE_BBDEV_OFFLOAD_COST
2960 static int
2961 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2962 		struct rte_bbdev_stats *stats)
2963 {
2964 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2965 	struct rte_bbdev_stats *q_stats;
2966 
2967 	if (queue_id >= dev->data->num_queues)
2968 		return -1;
2969 
2970 	q_stats = &dev->data->queues[queue_id].queue_stats;
2971 
2972 	stats->enqueued_count = q_stats->enqueued_count;
2973 	stats->dequeued_count = q_stats->dequeued_count;
2974 	stats->enqueue_err_count = q_stats->enqueue_err_count;
2975 	stats->dequeue_err_count = q_stats->dequeue_err_count;
2976 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2977 
2978 	return 0;
2979 }
2980 
2981 static int
2982 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2983 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2984 		uint16_t queue_id, const uint16_t num_to_process,
2985 		uint16_t burst_sz, struct test_time_stats *time_st)
2986 {
2987 	int i, dequeued, ret;
2988 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2989 	uint64_t enq_start_time, deq_start_time;
2990 	uint64_t enq_sw_last_time, deq_last_time;
2991 	struct rte_bbdev_stats stats;
2992 
2993 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2994 		uint16_t enq = 0, deq = 0;
2995 
2996 		if (unlikely(num_to_process - dequeued < burst_sz))
2997 			burst_sz = num_to_process - dequeued;
2998 
2999 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3000 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3001 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3002 					bufs->inputs,
3003 					bufs->hard_outputs,
3004 					bufs->soft_outputs,
3005 					ref_op);
3006 
3007 		/* Start time meas for enqueue function offload latency */
3008 		enq_start_time = rte_rdtsc_precise();
3009 		do {
3010 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3011 					&ops_enq[enq], burst_sz - enq);
3012 		} while (unlikely(burst_sz != enq));
3013 
3014 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3015 		TEST_ASSERT_SUCCESS(ret,
3016 				"Failed to get stats for queue (%u) of device (%u)",
3017 				queue_id, dev_id);
3018 
3019 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3020 				stats.acc_offload_cycles;
3021 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3022 				enq_sw_last_time);
3023 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3024 				enq_sw_last_time);
3025 		time_st->enq_sw_total_time += enq_sw_last_time;
3026 
3027 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3028 				stats.acc_offload_cycles);
3029 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3030 				stats.acc_offload_cycles);
3031 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3032 
3033 		/* give time for device to process ops */
3034 		rte_delay_us(200);
3035 
3036 		/* Start time meas for dequeue function offload latency */
3037 		deq_start_time = rte_rdtsc_precise();
3038 		/* Dequeue one operation */
3039 		do {
3040 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3041 					&ops_deq[deq], 1);
3042 		} while (unlikely(deq != 1));
3043 
3044 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3045 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3046 				deq_last_time);
3047 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3048 				deq_last_time);
3049 		time_st->deq_total_time += deq_last_time;
3050 
3051 		/* Dequeue remaining operations if needed*/
3052 		while (burst_sz != deq)
3053 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3054 					&ops_deq[deq], burst_sz - deq);
3055 
3056 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3057 		dequeued += deq;
3058 	}
3059 
3060 	return i;
3061 }
3062 
3063 static int
3064 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3065 		struct test_buffers *bufs,
3066 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3067 		uint16_t queue_id, const uint16_t num_to_process,
3068 		uint16_t burst_sz, struct test_time_stats *time_st)
3069 {
3070 	int i, dequeued, ret;
3071 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3072 	uint64_t enq_start_time, deq_start_time;
3073 	uint64_t enq_sw_last_time, deq_last_time;
3074 	struct rte_bbdev_stats stats;
3075 
3076 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3077 		uint16_t enq = 0, deq = 0;
3078 
3079 		if (unlikely(num_to_process - dequeued < burst_sz))
3080 			burst_sz = num_to_process - dequeued;
3081 
3082 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3083 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3084 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3085 					bufs->inputs,
3086 					bufs->hard_outputs,
3087 					bufs->soft_outputs,
3088 					bufs->harq_inputs,
3089 					bufs->harq_outputs,
3090 					ref_op);
3091 
3092 		/* Start time meas for enqueue function offload latency */
3093 		enq_start_time = rte_rdtsc_precise();
3094 		do {
3095 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3096 					&ops_enq[enq], burst_sz - enq);
3097 		} while (unlikely(burst_sz != enq));
3098 
3099 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3100 		TEST_ASSERT_SUCCESS(ret,
3101 				"Failed to get stats for queue (%u) of device (%u)",
3102 				queue_id, dev_id);
3103 
3104 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3105 				stats.acc_offload_cycles;
3106 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3107 				enq_sw_last_time);
3108 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3109 				enq_sw_last_time);
3110 		time_st->enq_sw_total_time += enq_sw_last_time;
3111 
3112 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3113 				stats.acc_offload_cycles);
3114 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3115 				stats.acc_offload_cycles);
3116 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3117 
3118 		/* give time for device to process ops */
3119 		rte_delay_us(200);
3120 
3121 		/* Start time meas for dequeue function offload latency */
3122 		deq_start_time = rte_rdtsc_precise();
3123 		/* Dequeue one operation */
3124 		do {
3125 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3126 					&ops_deq[deq], 1);
3127 		} while (unlikely(deq != 1));
3128 
3129 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3130 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3131 				deq_last_time);
3132 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3133 				deq_last_time);
3134 		time_st->deq_total_time += deq_last_time;
3135 
3136 		/* Dequeue remaining operations if needed*/
3137 		while (burst_sz != deq)
3138 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3139 					&ops_deq[deq], burst_sz - deq);
3140 
3141 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3142 		dequeued += deq;
3143 	}
3144 
3145 	return i;
3146 }
3147 
3148 static int
3149 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3150 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3151 		uint16_t queue_id, const uint16_t num_to_process,
3152 		uint16_t burst_sz, struct test_time_stats *time_st)
3153 {
3154 	int i, dequeued, ret;
3155 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3156 	uint64_t enq_start_time, deq_start_time;
3157 	uint64_t enq_sw_last_time, deq_last_time;
3158 	struct rte_bbdev_stats stats;
3159 
3160 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3161 		uint16_t enq = 0, deq = 0;
3162 
3163 		if (unlikely(num_to_process - dequeued < burst_sz))
3164 			burst_sz = num_to_process - dequeued;
3165 
3166 		rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3167 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3168 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3169 					bufs->inputs,
3170 					bufs->hard_outputs,
3171 					ref_op);
3172 
3173 		/* Start time meas for enqueue function offload latency */
3174 		enq_start_time = rte_rdtsc_precise();
3175 		do {
3176 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3177 					&ops_enq[enq], burst_sz - enq);
3178 		} while (unlikely(burst_sz != enq));
3179 
3180 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3181 		TEST_ASSERT_SUCCESS(ret,
3182 				"Failed to get stats for queue (%u) of device (%u)",
3183 				queue_id, dev_id);
3184 
3185 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3186 				stats.acc_offload_cycles;
3187 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3188 				enq_sw_last_time);
3189 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3190 				enq_sw_last_time);
3191 		time_st->enq_sw_total_time += enq_sw_last_time;
3192 
3193 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3194 				stats.acc_offload_cycles);
3195 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3196 				stats.acc_offload_cycles);
3197 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3198 
3199 		/* give time for device to process ops */
3200 		rte_delay_us(200);
3201 
3202 		/* Start time meas for dequeue function offload latency */
3203 		deq_start_time = rte_rdtsc_precise();
3204 		/* Dequeue one operation */
3205 		do {
3206 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3207 					&ops_deq[deq], 1);
3208 		} while (unlikely(deq != 1));
3209 
3210 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3211 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3212 				deq_last_time);
3213 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3214 				deq_last_time);
3215 		time_st->deq_total_time += deq_last_time;
3216 
3217 		while (burst_sz != deq)
3218 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3219 					&ops_deq[deq], burst_sz - deq);
3220 
3221 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3222 		dequeued += deq;
3223 	}
3224 
3225 	return i;
3226 }
3227 
3228 static int
3229 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3230 		struct test_buffers *bufs,
3231 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3232 		uint16_t queue_id, const uint16_t num_to_process,
3233 		uint16_t burst_sz, struct test_time_stats *time_st)
3234 {
3235 	int i, dequeued, ret;
3236 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3237 	uint64_t enq_start_time, deq_start_time;
3238 	uint64_t enq_sw_last_time, deq_last_time;
3239 	struct rte_bbdev_stats stats;
3240 
3241 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3242 		uint16_t enq = 0, deq = 0;
3243 
3244 		if (unlikely(num_to_process - dequeued < burst_sz))
3245 			burst_sz = num_to_process - dequeued;
3246 
3247 		rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3248 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3249 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3250 					bufs->inputs,
3251 					bufs->hard_outputs,
3252 					ref_op);
3253 
3254 		/* Start time meas for enqueue function offload latency */
3255 		enq_start_time = rte_rdtsc_precise();
3256 		do {
3257 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3258 					&ops_enq[enq], burst_sz - enq);
3259 		} while (unlikely(burst_sz != enq));
3260 
3261 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3262 		TEST_ASSERT_SUCCESS(ret,
3263 				"Failed to get stats for queue (%u) of device (%u)",
3264 				queue_id, dev_id);
3265 
3266 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3267 				stats.acc_offload_cycles;
3268 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3269 				enq_sw_last_time);
3270 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3271 				enq_sw_last_time);
3272 		time_st->enq_sw_total_time += enq_sw_last_time;
3273 
3274 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3275 				stats.acc_offload_cycles);
3276 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3277 				stats.acc_offload_cycles);
3278 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3279 
3280 		/* give time for device to process ops */
3281 		rte_delay_us(200);
3282 
3283 		/* Start time meas for dequeue function offload latency */
3284 		deq_start_time = rte_rdtsc_precise();
3285 		/* Dequeue one operation */
3286 		do {
3287 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3288 					&ops_deq[deq], 1);
3289 		} while (unlikely(deq != 1));
3290 
3291 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3292 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3293 				deq_last_time);
3294 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3295 				deq_last_time);
3296 		time_st->deq_total_time += deq_last_time;
3297 
3298 		while (burst_sz != deq)
3299 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3300 					&ops_deq[deq], burst_sz - deq);
3301 
3302 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3303 		dequeued += deq;
3304 	}
3305 
3306 	return i;
3307 }
3308 #endif
3309 
3310 static int
3311 offload_cost_test(struct active_device *ad,
3312 		struct test_op_params *op_params)
3313 {
3314 #ifndef RTE_BBDEV_OFFLOAD_COST
3315 	RTE_SET_USED(ad);
3316 	RTE_SET_USED(op_params);
3317 	printf("Offload latency test is disabled.\n");
3318 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3319 	return TEST_SKIPPED;
3320 #else
3321 	int iter;
3322 	uint16_t burst_sz = op_params->burst_sz;
3323 	const uint16_t num_to_process = op_params->num_to_process;
3324 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3325 	const uint16_t queue_id = ad->queue_ids[0];
3326 	struct test_buffers *bufs = NULL;
3327 	struct rte_bbdev_info info;
3328 	const char *op_type_str;
3329 	struct test_time_stats time_st;
3330 
3331 	memset(&time_st, 0, sizeof(struct test_time_stats));
3332 	time_st.enq_sw_min_time = UINT64_MAX;
3333 	time_st.enq_acc_min_time = UINT64_MAX;
3334 	time_st.deq_min_time = UINT64_MAX;
3335 
3336 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3337 			"BURST_SIZE should be <= %u", MAX_BURST);
3338 
3339 	rte_bbdev_info_get(ad->dev_id, &info);
3340 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3341 
3342 	op_type_str = rte_bbdev_op_type_str(op_type);
3343 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3344 
3345 	printf("+ ------------------------------------------------------- +\n");
3346 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3347 			info.dev_name, burst_sz, num_to_process, op_type_str);
3348 
3349 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3350 		iter = offload_latency_test_dec(op_params->mp, bufs,
3351 				op_params->ref_dec_op, ad->dev_id, queue_id,
3352 				num_to_process, burst_sz, &time_st);
3353 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3354 		iter = offload_latency_test_enc(op_params->mp, bufs,
3355 				op_params->ref_enc_op, ad->dev_id, queue_id,
3356 				num_to_process, burst_sz, &time_st);
3357 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3358 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3359 				op_params->ref_enc_op, ad->dev_id, queue_id,
3360 				num_to_process, burst_sz, &time_st);
3361 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3362 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3363 			op_params->ref_dec_op, ad->dev_id, queue_id,
3364 			num_to_process, burst_sz, &time_st);
3365 	else
3366 		iter = offload_latency_test_enc(op_params->mp, bufs,
3367 				op_params->ref_enc_op, ad->dev_id, queue_id,
3368 				num_to_process, burst_sz, &time_st);
3369 
3370 	if (iter <= 0)
3371 		return TEST_FAILED;
3372 
3373 	printf("Enqueue driver offload cost latency:\n"
3374 			"\tavg: %lg cycles, %lg us\n"
3375 			"\tmin: %lg cycles, %lg us\n"
3376 			"\tmax: %lg cycles, %lg us\n"
3377 			"Enqueue accelerator offload cost latency:\n"
3378 			"\tavg: %lg cycles, %lg us\n"
3379 			"\tmin: %lg cycles, %lg us\n"
3380 			"\tmax: %lg cycles, %lg us\n",
3381 			(double)time_st.enq_sw_total_time / (double)iter,
3382 			(double)(time_st.enq_sw_total_time * 1000000) /
3383 			(double)iter / (double)rte_get_tsc_hz(),
3384 			(double)time_st.enq_sw_min_time,
3385 			(double)(time_st.enq_sw_min_time * 1000000) /
3386 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3387 			(double)(time_st.enq_sw_max_time * 1000000) /
3388 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3389 			(double)iter,
3390 			(double)(time_st.enq_acc_total_time * 1000000) /
3391 			(double)iter / (double)rte_get_tsc_hz(),
3392 			(double)time_st.enq_acc_min_time,
3393 			(double)(time_st.enq_acc_min_time * 1000000) /
3394 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3395 			(double)(time_st.enq_acc_max_time * 1000000) /
3396 			rte_get_tsc_hz());
3397 
3398 	printf("Dequeue offload cost latency - one op:\n"
3399 			"\tavg: %lg cycles, %lg us\n"
3400 			"\tmin: %lg cycles, %lg us\n"
3401 			"\tmax: %lg cycles, %lg us\n",
3402 			(double)time_st.deq_total_time / (double)iter,
3403 			(double)(time_st.deq_total_time * 1000000) /
3404 			(double)iter / (double)rte_get_tsc_hz(),
3405 			(double)time_st.deq_min_time,
3406 			(double)(time_st.deq_min_time * 1000000) /
3407 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
3408 			(double)(time_st.deq_max_time * 1000000) /
3409 			rte_get_tsc_hz());
3410 
3411 	return TEST_SUCCESS;
3412 #endif
3413 }
3414 
3415 #ifdef RTE_BBDEV_OFFLOAD_COST
3416 static int
3417 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3418 		const uint16_t num_to_process, uint16_t burst_sz,
3419 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3420 		uint64_t *deq_max_time)
3421 {
3422 	int i, deq_total;
3423 	struct rte_bbdev_dec_op *ops[MAX_BURST];
3424 	uint64_t deq_start_time, deq_last_time;
3425 
3426 	/* Test deq offload latency from an empty queue */
3427 
3428 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3429 			++i, deq_total += burst_sz) {
3430 		deq_start_time = rte_rdtsc_precise();
3431 
3432 		if (unlikely(num_to_process - deq_total < burst_sz))
3433 			burst_sz = num_to_process - deq_total;
3434 		rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3435 
3436 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3437 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3438 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3439 		*deq_total_time += deq_last_time;
3440 	}
3441 
3442 	return i;
3443 }
3444 
3445 static int
3446 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3447 		const uint16_t num_to_process, uint16_t burst_sz,
3448 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3449 		uint64_t *deq_max_time)
3450 {
3451 	int i, deq_total;
3452 	struct rte_bbdev_enc_op *ops[MAX_BURST];
3453 	uint64_t deq_start_time, deq_last_time;
3454 
3455 	/* Test deq offload latency from an empty queue */
3456 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3457 			++i, deq_total += burst_sz) {
3458 		deq_start_time = rte_rdtsc_precise();
3459 
3460 		if (unlikely(num_to_process - deq_total < burst_sz))
3461 			burst_sz = num_to_process - deq_total;
3462 		rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3463 
3464 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3465 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3466 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3467 		*deq_total_time += deq_last_time;
3468 	}
3469 
3470 	return i;
3471 }
3472 #endif
3473 
3474 static int
3475 offload_latency_empty_q_test(struct active_device *ad,
3476 		struct test_op_params *op_params)
3477 {
3478 #ifndef RTE_BBDEV_OFFLOAD_COST
3479 	RTE_SET_USED(ad);
3480 	RTE_SET_USED(op_params);
3481 	printf("Offload latency empty dequeue test is disabled.\n");
3482 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3483 	return TEST_SKIPPED;
3484 #else
3485 	int iter;
3486 	uint64_t deq_total_time, deq_min_time, deq_max_time;
3487 	uint16_t burst_sz = op_params->burst_sz;
3488 	const uint16_t num_to_process = op_params->num_to_process;
3489 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3490 	const uint16_t queue_id = ad->queue_ids[0];
3491 	struct rte_bbdev_info info;
3492 	const char *op_type_str;
3493 
3494 	deq_total_time = deq_max_time = 0;
3495 	deq_min_time = UINT64_MAX;
3496 
3497 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3498 			"BURST_SIZE should be <= %u", MAX_BURST);
3499 
3500 	rte_bbdev_info_get(ad->dev_id, &info);
3501 
3502 	op_type_str = rte_bbdev_op_type_str(op_type);
3503 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3504 
3505 	printf("+ ------------------------------------------------------- +\n");
3506 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3507 			info.dev_name, burst_sz, num_to_process, op_type_str);
3508 
3509 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3510 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3511 				num_to_process, burst_sz, &deq_total_time,
3512 				&deq_min_time, &deq_max_time);
3513 	else
3514 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3515 				num_to_process, burst_sz, &deq_total_time,
3516 				&deq_min_time, &deq_max_time);
3517 
3518 	if (iter <= 0)
3519 		return TEST_FAILED;
3520 
3521 	printf("Empty dequeue offload:\n"
3522 			"\tavg: %lg cycles, %lg us\n"
3523 			"\tmin: %lg cycles, %lg us\n"
3524 			"\tmax: %lg cycles, %lg us\n",
3525 			(double)deq_total_time / (double)iter,
3526 			(double)(deq_total_time * 1000000) / (double)iter /
3527 			(double)rte_get_tsc_hz(), (double)deq_min_time,
3528 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3529 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
3530 			rte_get_tsc_hz());
3531 
3532 	return TEST_SUCCESS;
3533 #endif
3534 }
3535 
3536 static int
3537 throughput_tc(void)
3538 {
3539 	return run_test_case(throughput_test);
3540 }
3541 
3542 static int
3543 offload_cost_tc(void)
3544 {
3545 	return run_test_case(offload_cost_test);
3546 }
3547 
3548 static int
3549 offload_latency_empty_q_tc(void)
3550 {
3551 	return run_test_case(offload_latency_empty_q_test);
3552 }
3553 
3554 static int
3555 latency_tc(void)
3556 {
3557 	return run_test_case(latency_test);
3558 }
3559 
3560 static int
3561 interrupt_tc(void)
3562 {
3563 	return run_test_case(throughput_test);
3564 }
3565 
3566 static struct unit_test_suite bbdev_throughput_testsuite = {
3567 	.suite_name = "BBdev Throughput Tests",
3568 	.setup = testsuite_setup,
3569 	.teardown = testsuite_teardown,
3570 	.unit_test_cases = {
3571 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
3572 		TEST_CASES_END() /**< NULL terminate unit test array */
3573 	}
3574 };
3575 
3576 static struct unit_test_suite bbdev_validation_testsuite = {
3577 	.suite_name = "BBdev Validation Tests",
3578 	.setup = testsuite_setup,
3579 	.teardown = testsuite_teardown,
3580 	.unit_test_cases = {
3581 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3582 		TEST_CASES_END() /**< NULL terminate unit test array */
3583 	}
3584 };
3585 
3586 static struct unit_test_suite bbdev_latency_testsuite = {
3587 	.suite_name = "BBdev Latency Tests",
3588 	.setup = testsuite_setup,
3589 	.teardown = testsuite_teardown,
3590 	.unit_test_cases = {
3591 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3592 		TEST_CASES_END() /**< NULL terminate unit test array */
3593 	}
3594 };
3595 
3596 static struct unit_test_suite bbdev_offload_cost_testsuite = {
3597 	.suite_name = "BBdev Offload Cost Tests",
3598 	.setup = testsuite_setup,
3599 	.teardown = testsuite_teardown,
3600 	.unit_test_cases = {
3601 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
3602 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
3603 		TEST_CASES_END() /**< NULL terminate unit test array */
3604 	}
3605 };
3606 
3607 static struct unit_test_suite bbdev_interrupt_testsuite = {
3608 	.suite_name = "BBdev Interrupt Tests",
3609 	.setup = interrupt_testsuite_setup,
3610 	.teardown = testsuite_teardown,
3611 	.unit_test_cases = {
3612 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
3613 		TEST_CASES_END() /**< NULL terminate unit test array */
3614 	}
3615 };
3616 
3617 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
3618 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
3619 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
3620 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
3621 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
3622