xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 86dfed2a8ed704e013f054985a92d46f07ff48d1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <inttypes.h>
8 #include <math.h>
9 
10 #include <rte_eal.h>
11 #include <rte_common.h>
12 #include <rte_dev.h>
13 #include <rte_launch.h>
14 #include <rte_bbdev.h>
15 #include <rte_cycles.h>
16 #include <rte_lcore.h>
17 #include <rte_malloc.h>
18 #include <rte_random.h>
19 #include <rte_hexdump.h>
20 #include <rte_interrupts.h>
21 
22 #include "main.h"
23 #include "test_bbdev_vector.h"
24 
25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
26 
27 #define MAX_QUEUES RTE_MAX_LCORE
28 #define TEST_REPETITIONS 100
29 #define TIME_OUT_POLL 1e8
30 #define WAIT_OFFLOAD_US 1000
31 
32 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
33 #include <fpga_lte_fec.h>
34 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
35 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
36 #define VF_UL_4G_QUEUE_VALUE 4
37 #define VF_DL_4G_QUEUE_VALUE 4
38 #define UL_4G_BANDWIDTH 3
39 #define DL_4G_BANDWIDTH 3
40 #define UL_4G_LOAD_BALANCE 128
41 #define DL_4G_LOAD_BALANCE 128
42 #define FLR_4G_TIMEOUT 610
43 #endif
44 
45 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
46 #include <rte_pmd_fpga_5gnr_fec.h>
47 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
48 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
49 #define VF_UL_5G_QUEUE_VALUE 4
50 #define VF_DL_5G_QUEUE_VALUE 4
51 #define UL_5G_BANDWIDTH 3
52 #define DL_5G_BANDWIDTH 3
53 #define UL_5G_LOAD_BALANCE 128
54 #define DL_5G_LOAD_BALANCE 128
55 #endif
56 
57 #ifdef RTE_BASEBAND_ACC
58 #include <rte_acc_cfg.h>
59 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
60 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
61 #define ACC100_QMGR_NUM_AQS 16
62 #define ACC100_QMGR_NUM_QGS 2
63 #define ACC100_QMGR_AQ_DEPTH 5
64 #define ACC100_QMGR_INVALID_IDX -1
65 #define ACC100_QMGR_RR 1
66 #define ACC100_QOS_GBR 0
67 #define ACC200PF_DRIVER_NAME   ("intel_acc200_pf")
68 #define ACC200VF_DRIVER_NAME   ("intel_acc200_vf")
69 #define ACC200_QMGR_NUM_AQS 16
70 #define ACC200_QMGR_NUM_QGS 2
71 #define ACC200_QMGR_AQ_DEPTH 5
72 #define ACC200_QMGR_INVALID_IDX -1
73 #define ACC200_QMGR_RR 1
74 #define ACC200_QOS_GBR 0
75 #endif
76 
77 #define OPS_CACHE_SIZE 256U
78 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
79 
80 #define SYNC_WAIT 0
81 #define SYNC_START 1
82 
83 #define INVALID_QUEUE_ID -1
84 /* Increment for next code block in external HARQ memory */
85 #define HARQ_INCR 32768
86 /* Headroom for filler LLRs insertion in HARQ buffer */
87 #define FILLER_HEADROOM 2048
88 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
89 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
90 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
91 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
92 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
93 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
94 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
95 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
96 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
97 
98 #define HARQ_MEM_TOLERANCE 256
99 static struct test_bbdev_vector test_vector;
100 
101 /* Switch between PMD and Interrupt for throughput TC */
102 static bool intr_enabled;
103 
104 /* LLR arithmetic representation for numerical conversion */
105 static int ldpc_llr_decimals;
106 static int ldpc_llr_size;
107 /* Keep track of the LDPC decoder device capability flag */
108 static uint32_t ldpc_cap_flags;
109 
110 /* Represents tested active devices */
111 static struct active_device {
112 	const char *driver_name;
113 	uint8_t dev_id;
114 	uint16_t supported_ops;
115 	uint16_t queue_ids[MAX_QUEUES];
116 	uint16_t nb_queues;
117 	struct rte_mempool *ops_mempool;
118 	struct rte_mempool *in_mbuf_pool;
119 	struct rte_mempool *hard_out_mbuf_pool;
120 	struct rte_mempool *soft_out_mbuf_pool;
121 	struct rte_mempool *harq_in_mbuf_pool;
122 	struct rte_mempool *harq_out_mbuf_pool;
123 } active_devs[RTE_BBDEV_MAX_DEVS];
124 
125 static uint8_t nb_active_devs;
126 
127 /* Data buffers used by BBDEV ops */
128 struct test_buffers {
129 	struct rte_bbdev_op_data *inputs;
130 	struct rte_bbdev_op_data *hard_outputs;
131 	struct rte_bbdev_op_data *soft_outputs;
132 	struct rte_bbdev_op_data *harq_inputs;
133 	struct rte_bbdev_op_data *harq_outputs;
134 };
135 
136 /* Operation parameters specific for given test case */
137 struct test_op_params {
138 	struct rte_mempool *mp;
139 	struct rte_bbdev_dec_op *ref_dec_op;
140 	struct rte_bbdev_enc_op *ref_enc_op;
141 	struct rte_bbdev_fft_op *ref_fft_op;
142 	uint16_t burst_sz;
143 	uint16_t num_to_process;
144 	uint16_t num_lcores;
145 	int vector_mask;
146 	uint16_t sync;
147 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
148 };
149 
150 /* Contains per lcore params */
151 struct thread_params {
152 	uint8_t dev_id;
153 	uint16_t queue_id;
154 	uint32_t lcore_id;
155 	uint64_t start_time;
156 	double ops_per_sec;
157 	double mbps;
158 	uint8_t iter_count;
159 	double iter_average;
160 	double bler;
161 	uint16_t nb_dequeued;
162 	int16_t processing_status;
163 	uint16_t burst_sz;
164 	struct test_op_params *op_params;
165 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
166 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
167 	struct rte_bbdev_fft_op *fft_ops[MAX_BURST];
168 };
169 
170 /* Stores time statistics */
171 struct test_time_stats {
172 	/* Stores software enqueue total working time */
173 	uint64_t enq_sw_total_time;
174 	/* Stores minimum value of software enqueue working time */
175 	uint64_t enq_sw_min_time;
176 	/* Stores maximum value of software enqueue working time */
177 	uint64_t enq_sw_max_time;
178 	/* Stores turbo enqueue total working time */
179 	uint64_t enq_acc_total_time;
180 	/* Stores minimum value of accelerator enqueue working time */
181 	uint64_t enq_acc_min_time;
182 	/* Stores maximum value of accelerator enqueue working time */
183 	uint64_t enq_acc_max_time;
184 	/* Stores dequeue total working time */
185 	uint64_t deq_total_time;
186 	/* Stores minimum value of dequeue working time */
187 	uint64_t deq_min_time;
188 	/* Stores maximum value of dequeue working time */
189 	uint64_t deq_max_time;
190 };
191 
192 typedef int (test_case_function)(struct active_device *ad,
193 		struct test_op_params *op_params);
194 
195 /* Get device status before timeout exit */
196 static inline void
197 timeout_exit(uint8_t dev_id)
198 {
199 	struct rte_bbdev_info info;
200 	rte_bbdev_info_get(dev_id, &info);
201 	printf("Device Status %s\n", rte_bbdev_device_status_str(info.drv.device_status));
202 }
203 
204 static inline void
205 mbuf_reset(struct rte_mbuf *m)
206 {
207 	m->pkt_len = 0;
208 
209 	do {
210 		m->data_len = 0;
211 		m = m->next;
212 	} while (m != NULL);
213 }
214 
215 /* Read flag value 0/1 from bitmap */
216 static inline bool
217 check_bit(uint32_t bitmap, uint32_t bitmask)
218 {
219 	return bitmap & bitmask;
220 }
221 
222 static inline void
223 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
224 {
225 	ad->supported_ops |= (1 << op_type);
226 }
227 
228 static inline bool
229 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
230 {
231 	return ad->supported_ops & (1 << op_type);
232 }
233 
234 static inline bool
235 flags_match(uint32_t flags_req, uint32_t flags_present)
236 {
237 	return (flags_req & flags_present) == flags_req;
238 }
239 
240 static void
241 clear_soft_out_cap(uint32_t *op_flags)
242 {
243 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
244 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
245 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
246 }
247 
248 /* This API is to convert all the test vector op data entries
249  * to big endian format. It is used when the device supports
250  * the input in the big endian format.
251  */
252 static inline void
253 convert_op_data_to_be(void)
254 {
255 	struct op_data_entries *op;
256 	enum op_data_type type;
257 	uint8_t nb_segs, *rem_data, temp;
258 	uint32_t *data, len;
259 	int complete, rem, i, j;
260 
261 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
262 		nb_segs = test_vector.entries[type].nb_segments;
263 		op = &test_vector.entries[type];
264 
265 		/* Invert byte endianness for all the segments */
266 		for (i = 0; i < nb_segs; ++i) {
267 			len = op->segments[i].length;
268 			data = op->segments[i].addr;
269 
270 			/* Swap complete u32 bytes */
271 			complete = len / 4;
272 			for (j = 0; j < complete; j++)
273 				data[j] = rte_bswap32(data[j]);
274 
275 			/* Swap any remaining bytes */
276 			rem = len % 4;
277 			rem_data = (uint8_t *)&data[j];
278 			for (j = 0; j < rem/2; j++) {
279 				temp = rem_data[j];
280 				rem_data[j] = rem_data[rem - j - 1];
281 				rem_data[rem - j - 1] = temp;
282 			}
283 		}
284 	}
285 }
286 
287 static int
288 check_dev_cap(const struct rte_bbdev_info *dev_info)
289 {
290 	unsigned int i;
291 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
292 		nb_harq_inputs, nb_harq_outputs;
293 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
294 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
295 
296 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
297 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
298 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
299 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
300 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
301 
302 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
303 		if (op_cap->type != test_vector.op_type)
304 			continue;
305 
306 		if (dev_data_endianness == RTE_BIG_ENDIAN)
307 			convert_op_data_to_be();
308 
309 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
310 			const struct rte_bbdev_op_cap_turbo_dec *cap =
311 					&op_cap->cap.turbo_dec;
312 			/* Ignore lack of soft output capability, just skip
313 			 * checking if soft output is valid.
314 			 */
315 			if ((test_vector.turbo_dec.op_flags &
316 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
317 					!(cap->capability_flags &
318 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319 				printf(
320 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
321 					dev_info->dev_name);
322 				clear_soft_out_cap(
323 					&test_vector.turbo_dec.op_flags);
324 			}
325 
326 			if (!flags_match(test_vector.turbo_dec.op_flags,
327 					cap->capability_flags))
328 				return TEST_FAILED;
329 			if (nb_inputs > cap->num_buffers_src) {
330 				printf("Too many inputs defined: %u, max: %u\n",
331 					nb_inputs, cap->num_buffers_src);
332 				return TEST_FAILED;
333 			}
334 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
335 					(test_vector.turbo_dec.op_flags &
336 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
337 				printf(
338 					"Too many soft outputs defined: %u, max: %u\n",
339 						nb_soft_outputs,
340 						cap->num_buffers_soft_out);
341 				return TEST_FAILED;
342 			}
343 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
344 				printf(
345 					"Too many hard outputs defined: %u, max: %u\n",
346 						nb_hard_outputs,
347 						cap->num_buffers_hard_out);
348 				return TEST_FAILED;
349 			}
350 			if (intr_enabled && !(cap->capability_flags &
351 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
352 				printf(
353 					"Dequeue interrupts are not supported!\n");
354 				return TEST_FAILED;
355 			}
356 
357 			return TEST_SUCCESS;
358 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
359 			const struct rte_bbdev_op_cap_turbo_enc *cap =
360 					&op_cap->cap.turbo_enc;
361 
362 			if (!flags_match(test_vector.turbo_enc.op_flags,
363 					cap->capability_flags))
364 				return TEST_FAILED;
365 			if (nb_inputs > cap->num_buffers_src) {
366 				printf("Too many inputs defined: %u, max: %u\n",
367 					nb_inputs, cap->num_buffers_src);
368 				return TEST_FAILED;
369 			}
370 			if (nb_hard_outputs > cap->num_buffers_dst) {
371 				printf(
372 					"Too many hard outputs defined: %u, max: %u\n",
373 					nb_hard_outputs, cap->num_buffers_dst);
374 				return TEST_FAILED;
375 			}
376 			if (intr_enabled && !(cap->capability_flags &
377 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
378 				printf(
379 					"Dequeue interrupts are not supported!\n");
380 				return TEST_FAILED;
381 			}
382 
383 			return TEST_SUCCESS;
384 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
385 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
386 					&op_cap->cap.ldpc_enc;
387 
388 			if (!flags_match(test_vector.ldpc_enc.op_flags,
389 					cap->capability_flags)){
390 				printf("Flag Mismatch\n");
391 				return TEST_FAILED;
392 			}
393 			if (nb_inputs > cap->num_buffers_src) {
394 				printf("Too many inputs defined: %u, max: %u\n",
395 					nb_inputs, cap->num_buffers_src);
396 				return TEST_FAILED;
397 			}
398 			if (nb_hard_outputs > cap->num_buffers_dst) {
399 				printf(
400 					"Too many hard outputs defined: %u, max: %u\n",
401 					nb_hard_outputs, cap->num_buffers_dst);
402 				return TEST_FAILED;
403 			}
404 			if (intr_enabled && !(cap->capability_flags &
405 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
406 				printf(
407 					"Dequeue interrupts are not supported!\n");
408 				return TEST_FAILED;
409 			}
410 
411 			return TEST_SUCCESS;
412 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
413 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
414 					&op_cap->cap.ldpc_dec;
415 
416 			if (!flags_match(test_vector.ldpc_dec.op_flags,
417 					cap->capability_flags)){
418 				printf("Flag Mismatch\n");
419 				return TEST_FAILED;
420 			}
421 			if (nb_inputs > cap->num_buffers_src) {
422 				printf("Too many inputs defined: %u, max: %u\n",
423 					nb_inputs, cap->num_buffers_src);
424 				return TEST_FAILED;
425 			}
426 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
427 				printf(
428 					"Too many hard outputs defined: %u, max: %u\n",
429 					nb_hard_outputs,
430 					cap->num_buffers_hard_out);
431 				return TEST_FAILED;
432 			}
433 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
434 				printf(
435 					"Too many HARQ inputs defined: %u, max: %u\n",
436 					nb_harq_inputs,
437 					cap->num_buffers_hard_out);
438 				return TEST_FAILED;
439 			}
440 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
441 				printf(
442 					"Too many HARQ outputs defined: %u, max: %u\n",
443 					nb_harq_outputs,
444 					cap->num_buffers_hard_out);
445 				return TEST_FAILED;
446 			}
447 			if (intr_enabled && !(cap->capability_flags &
448 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
449 				printf(
450 					"Dequeue interrupts are not supported!\n");
451 				return TEST_FAILED;
452 			}
453 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
454 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
455 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
456 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
457 					))) {
458 				printf("Skip loop-back with interrupt\n");
459 				return TEST_FAILED;
460 			}
461 			return TEST_SUCCESS;
462 		} else if (op_cap->type == RTE_BBDEV_OP_FFT) {
463 			const struct rte_bbdev_op_cap_fft *cap = &op_cap->cap.fft;
464 
465 			if (!flags_match(test_vector.fft.op_flags, cap->capability_flags)) {
466 				printf("Flag Mismatch\n");
467 				return TEST_FAILED;
468 			}
469 			if (nb_inputs > cap->num_buffers_src) {
470 				printf("Too many inputs defined: %u, max: %u\n",
471 					nb_inputs, cap->num_buffers_src);
472 				return TEST_FAILED;
473 			}
474 			return TEST_SUCCESS;
475 		}
476 	}
477 
478 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
479 		return TEST_SUCCESS; /* Special case for NULL device */
480 
481 	return TEST_FAILED;
482 }
483 
484 /* calculates optimal mempool size not smaller than the val */
485 static unsigned int
486 optimal_mempool_size(unsigned int val)
487 {
488 	return rte_align32pow2(val + 1) - 1;
489 }
490 
491 /* allocates mbuf mempool for inputs and outputs */
492 static struct rte_mempool *
493 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
494 		int socket_id, unsigned int mbuf_pool_size,
495 		const char *op_type_str)
496 {
497 	unsigned int i;
498 	uint32_t max_seg_sz = 0;
499 	char pool_name[RTE_MEMPOOL_NAMESIZE];
500 
501 	/* find max input segment size */
502 	for (i = 0; i < entries->nb_segments; ++i)
503 		if (entries->segments[i].length > max_seg_sz)
504 			max_seg_sz = entries->segments[i].length;
505 
506 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
507 			dev_id);
508 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
509 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
510 					+ FILLER_HEADROOM,
511 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
512 }
513 
514 static int
515 create_mempools(struct active_device *ad, int socket_id,
516 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
517 {
518 	struct rte_mempool *mp;
519 	unsigned int ops_pool_size, mbuf_pool_size = 0;
520 	char pool_name[RTE_MEMPOOL_NAMESIZE];
521 	const char *op_type_str;
522 	enum rte_bbdev_op_type op_type = org_op_type;
523 
524 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
525 	struct op_data_entries *hard_out =
526 			&test_vector.entries[DATA_HARD_OUTPUT];
527 	struct op_data_entries *soft_out =
528 			&test_vector.entries[DATA_SOFT_OUTPUT];
529 	struct op_data_entries *harq_in =
530 			&test_vector.entries[DATA_HARQ_INPUT];
531 	struct op_data_entries *harq_out =
532 			&test_vector.entries[DATA_HARQ_OUTPUT];
533 
534 	/* allocate ops mempool */
535 	ops_pool_size = optimal_mempool_size(RTE_MAX(
536 			/* Ops used plus 1 reference op */
537 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
538 			/* Minimal cache size plus 1 reference op */
539 			(unsigned int)(1.5 * rte_lcore_count() *
540 					OPS_CACHE_SIZE + 1)),
541 			OPS_POOL_SIZE_MIN));
542 
543 	if (org_op_type == RTE_BBDEV_OP_NONE)
544 		op_type = RTE_BBDEV_OP_TURBO_ENC;
545 
546 	op_type_str = rte_bbdev_op_type_str(op_type);
547 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
548 
549 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
550 			ad->dev_id);
551 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
552 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
553 	TEST_ASSERT_NOT_NULL(mp,
554 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
555 			ops_pool_size,
556 			ad->dev_id,
557 			socket_id);
558 	ad->ops_mempool = mp;
559 
560 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
561 	if (org_op_type == RTE_BBDEV_OP_NONE)
562 		return TEST_SUCCESS;
563 
564 	/* Inputs */
565 	if (in->nb_segments > 0) {
566 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
567 				in->nb_segments);
568 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
569 				mbuf_pool_size, "in");
570 		TEST_ASSERT_NOT_NULL(mp,
571 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
572 				mbuf_pool_size,
573 				ad->dev_id,
574 				socket_id);
575 		ad->in_mbuf_pool = mp;
576 	}
577 
578 	/* Hard outputs */
579 	if (hard_out->nb_segments > 0) {
580 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
581 				hard_out->nb_segments);
582 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
583 				mbuf_pool_size,
584 				"hard_out");
585 		TEST_ASSERT_NOT_NULL(mp,
586 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
587 				mbuf_pool_size,
588 				ad->dev_id,
589 				socket_id);
590 		ad->hard_out_mbuf_pool = mp;
591 	}
592 
593 	/* Soft outputs */
594 	if (soft_out->nb_segments > 0) {
595 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
596 				soft_out->nb_segments);
597 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
598 				mbuf_pool_size,
599 				"soft_out");
600 		TEST_ASSERT_NOT_NULL(mp,
601 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
602 				mbuf_pool_size,
603 				ad->dev_id,
604 				socket_id);
605 		ad->soft_out_mbuf_pool = mp;
606 	}
607 
608 	/* HARQ inputs */
609 	if (harq_in->nb_segments > 0) {
610 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
611 				harq_in->nb_segments);
612 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
613 				mbuf_pool_size,
614 				"harq_in");
615 		TEST_ASSERT_NOT_NULL(mp,
616 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
617 				mbuf_pool_size,
618 				ad->dev_id,
619 				socket_id);
620 		ad->harq_in_mbuf_pool = mp;
621 	}
622 
623 	/* HARQ outputs */
624 	if (harq_out->nb_segments > 0) {
625 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
626 				harq_out->nb_segments);
627 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
628 				mbuf_pool_size,
629 				"harq_out");
630 		TEST_ASSERT_NOT_NULL(mp,
631 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
632 				mbuf_pool_size,
633 				ad->dev_id,
634 				socket_id);
635 		ad->harq_out_mbuf_pool = mp;
636 	}
637 
638 	return TEST_SUCCESS;
639 }
640 
641 static int
642 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
643 		struct test_bbdev_vector *vector)
644 {
645 	int ret;
646 	unsigned int queue_id;
647 	struct rte_bbdev_queue_conf qconf;
648 	struct active_device *ad = &active_devs[nb_active_devs];
649 	unsigned int nb_queues;
650 	enum rte_bbdev_op_type op_type = vector->op_type;
651 
652 /* Configure fpga lte fec with PF & VF values
653  * if '-i' flag is set and using fpga device
654  */
655 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
656 	if ((get_init_device() == true) &&
657 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
658 		struct rte_fpga_lte_fec_conf conf;
659 		unsigned int i;
660 
661 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
662 				info->drv.driver_name);
663 
664 		/* clear default configuration before initialization */
665 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
666 
667 		/* Set PF mode :
668 		 * true if PF is used for data plane
669 		 * false for VFs
670 		 */
671 		conf.pf_mode_en = true;
672 
673 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
674 			/* Number of UL queues per VF (fpga supports 8 VFs) */
675 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
676 			/* Number of DL queues per VF (fpga supports 8 VFs) */
677 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
678 		}
679 
680 		/* UL bandwidth. Needed for schedule algorithm */
681 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
682 		/* DL bandwidth */
683 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
684 
685 		/* UL & DL load Balance Factor to 64 */
686 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
687 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
688 
689 		/**< FLR timeout value */
690 		conf.flr_time_out = FLR_4G_TIMEOUT;
691 
692 		/* setup FPGA PF with configuration information */
693 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
694 		TEST_ASSERT_SUCCESS(ret,
695 				"Failed to configure 4G FPGA PF for bbdev %s",
696 				info->dev_name);
697 	}
698 #endif
699 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
700 	if ((get_init_device() == true) &&
701 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
702 		struct rte_fpga_5gnr_fec_conf conf;
703 		unsigned int i;
704 
705 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
706 				info->drv.driver_name);
707 
708 		/* clear default configuration before initialization */
709 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
710 
711 		/* Set PF mode :
712 		 * true if PF is used for data plane
713 		 * false for VFs
714 		 */
715 		conf.pf_mode_en = true;
716 
717 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
718 			/* Number of UL queues per VF (fpga supports 8 VFs) */
719 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
720 			/* Number of DL queues per VF (fpga supports 8 VFs) */
721 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
722 		}
723 
724 		/* UL bandwidth. Needed for schedule algorithm */
725 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
726 		/* DL bandwidth */
727 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
728 
729 		/* UL & DL load Balance Factor to 64 */
730 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
731 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
732 
733 		/* setup FPGA PF with configuration information */
734 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
735 		TEST_ASSERT_SUCCESS(ret,
736 				"Failed to configure 5G FPGA PF for bbdev %s",
737 				info->dev_name);
738 	}
739 #endif
740 #ifdef RTE_BASEBAND_ACC
741 	if ((get_init_device() == true) &&
742 			(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
743 		struct rte_acc_conf conf;
744 		unsigned int i;
745 
746 		printf("Configure ACC100/ACC101 FEC Driver %s with default values\n",
747 				info->drv.driver_name);
748 
749 		/* clear default configuration before initialization */
750 		memset(&conf, 0, sizeof(struct rte_acc_conf));
751 
752 		/* Always set in PF mode for built-in configuration */
753 		conf.pf_mode_en = true;
754 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
755 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
756 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
757 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
758 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
759 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
760 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
761 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
762 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
763 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
764 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
765 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
766 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
767 		}
768 
769 		conf.input_pos_llr_1_bit = true;
770 		conf.output_pos_llr_1_bit = true;
771 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
772 
773 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
774 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
775 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
776 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
777 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
778 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
779 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
780 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
781 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
782 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
783 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
784 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
785 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
786 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
787 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
788 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
789 
790 		/* setup PF with configuration information */
791 		ret = rte_acc_configure(info->dev_name, &conf);
792 		TEST_ASSERT_SUCCESS(ret,
793 				"Failed to configure ACC100 PF for bbdev %s",
794 				info->dev_name);
795 	}
796 	if ((get_init_device() == true) &&
797 		(!strcmp(info->drv.driver_name, ACC200PF_DRIVER_NAME))) {
798 		struct rte_acc_conf conf;
799 		unsigned int i;
800 
801 		printf("Configure ACC200 FEC Driver %s with default values\n",
802 				info->drv.driver_name);
803 
804 		/* clear default configuration before initialization */
805 		memset(&conf, 0, sizeof(struct rte_acc_conf));
806 
807 		/* Always set in PF mode for built-in configuration */
808 		conf.pf_mode_en = true;
809 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
810 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
811 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
812 			conf.arb_dl_4g[i].round_robin_weight = ACC200_QMGR_RR;
813 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
814 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
815 			conf.arb_ul_4g[i].round_robin_weight = ACC200_QMGR_RR;
816 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
817 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
818 			conf.arb_dl_5g[i].round_robin_weight = ACC200_QMGR_RR;
819 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
820 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
821 			conf.arb_ul_5g[i].round_robin_weight = ACC200_QMGR_RR;
822 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
823 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
824 			conf.arb_fft[i].round_robin_weight = ACC200_QMGR_RR;
825 		}
826 
827 		conf.input_pos_llr_1_bit = true;
828 		conf.output_pos_llr_1_bit = true;
829 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
830 
831 		conf.q_ul_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
832 		conf.q_ul_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
833 		conf.q_ul_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
834 		conf.q_ul_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
835 		conf.q_dl_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
836 		conf.q_dl_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
837 		conf.q_dl_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
838 		conf.q_dl_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
839 		conf.q_ul_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
840 		conf.q_ul_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
841 		conf.q_ul_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
842 		conf.q_ul_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
843 		conf.q_dl_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
844 		conf.q_dl_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
845 		conf.q_dl_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
846 		conf.q_dl_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
847 		conf.q_fft.num_qgroups = ACC200_QMGR_NUM_QGS;
848 		conf.q_fft.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
849 		conf.q_fft.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
850 		conf.q_fft.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
851 
852 		/* setup PF with configuration information */
853 		ret = rte_acc_configure(info->dev_name, &conf);
854 		TEST_ASSERT_SUCCESS(ret,
855 				"Failed to configure ACC200 PF for bbdev %s",
856 				info->dev_name);
857 	}
858 #endif
859 	/* Let's refresh this now this is configured */
860 	rte_bbdev_info_get(dev_id, info);
861 	if (info->drv.device_status == RTE_BBDEV_DEV_FATAL_ERR)
862 		printf("Device Status %s\n", rte_bbdev_device_status_str(info->drv.device_status));
863 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
864 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
865 
866 	/* setup device */
867 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
868 	if (ret < 0) {
869 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
870 				dev_id, nb_queues, info->socket_id, ret);
871 		return TEST_FAILED;
872 	}
873 
874 	/* configure interrupts if needed */
875 	if (intr_enabled) {
876 		ret = rte_bbdev_intr_enable(dev_id);
877 		if (ret < 0) {
878 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
879 					ret);
880 			return TEST_FAILED;
881 		}
882 	}
883 
884 	/* setup device queues */
885 	qconf.socket = info->socket_id;
886 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
887 	qconf.priority = 0;
888 	qconf.deferred_start = 0;
889 	qconf.op_type = op_type;
890 
891 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
892 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
893 		if (ret != 0) {
894 			printf(
895 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
896 					queue_id, qconf.priority, dev_id);
897 			qconf.priority++;
898 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
899 		}
900 		if (ret != 0) {
901 			printf("All queues on dev %u allocated: %u\n", dev_id, queue_id);
902 			break;
903 		}
904 		ret = rte_bbdev_queue_start(ad->dev_id, queue_id);
905 		if (ret != 0) {
906 			printf("Failed to start queue on dev %u q_id: %u\n", dev_id, queue_id);
907 			break;
908 		}
909 		ad->queue_ids[queue_id] = queue_id;
910 	}
911 	TEST_ASSERT(queue_id != 0,
912 			"ERROR Failed to configure any queues on dev %u\n"
913 			"\tthe device may not support the related operation capability\n"
914 			"\tor the device may not have been configured yet", dev_id);
915 	ad->nb_queues = queue_id;
916 
917 	set_avail_op(ad, op_type);
918 
919 	return TEST_SUCCESS;
920 }
921 
922 static int
923 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
924 		struct test_bbdev_vector *vector)
925 {
926 	int ret;
927 
928 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
929 	active_devs[nb_active_devs].dev_id = dev_id;
930 
931 	ret = add_bbdev_dev(dev_id, info, vector);
932 	if (ret == TEST_SUCCESS)
933 		++nb_active_devs;
934 	return ret;
935 }
936 
937 static uint8_t
938 populate_active_devices(void)
939 {
940 	int ret;
941 	uint8_t dev_id;
942 	uint8_t nb_devs_added = 0;
943 	struct rte_bbdev_info info;
944 
945 	RTE_BBDEV_FOREACH(dev_id) {
946 		rte_bbdev_info_get(dev_id, &info);
947 
948 		if (check_dev_cap(&info)) {
949 			printf(
950 				"Device %d (%s) does not support specified capabilities\n",
951 					dev_id, info.dev_name);
952 			continue;
953 		}
954 
955 		ret = add_active_device(dev_id, &info, &test_vector);
956 		if (ret != 0) {
957 			printf("Adding active bbdev %s skipped\n",
958 					info.dev_name);
959 			continue;
960 		}
961 		nb_devs_added++;
962 	}
963 
964 	return nb_devs_added;
965 }
966 
967 static int
968 read_test_vector(void)
969 {
970 	int ret;
971 
972 	memset(&test_vector, 0, sizeof(test_vector));
973 	printf("Test vector file = %s\n", get_vector_filename());
974 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
975 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
976 			get_vector_filename());
977 
978 	return TEST_SUCCESS;
979 }
980 
981 static int
982 testsuite_setup(void)
983 {
984 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
985 
986 	if (populate_active_devices() == 0) {
987 		printf("No suitable devices found!\n");
988 		return TEST_SKIPPED;
989 	}
990 
991 	return TEST_SUCCESS;
992 }
993 
994 static int
995 interrupt_testsuite_setup(void)
996 {
997 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
998 
999 	/* Enable interrupts */
1000 	intr_enabled = true;
1001 
1002 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
1003 	if (populate_active_devices() == 0 ||
1004 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
1005 		intr_enabled = false;
1006 		printf("No suitable devices found!\n");
1007 		return TEST_SKIPPED;
1008 	}
1009 
1010 	return TEST_SUCCESS;
1011 }
1012 
1013 static void
1014 testsuite_teardown(void)
1015 {
1016 	uint8_t dev_id;
1017 
1018 	/* Unconfigure devices */
1019 	RTE_BBDEV_FOREACH(dev_id)
1020 		rte_bbdev_close(dev_id);
1021 
1022 	/* Clear active devices structs. */
1023 	memset(active_devs, 0, sizeof(active_devs));
1024 	nb_active_devs = 0;
1025 
1026 	/* Disable interrupts */
1027 	intr_enabled = false;
1028 }
1029 
1030 static int
1031 ut_setup(void)
1032 {
1033 	uint8_t i, dev_id;
1034 
1035 	for (i = 0; i < nb_active_devs; i++) {
1036 		dev_id = active_devs[i].dev_id;
1037 		/* reset bbdev stats */
1038 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
1039 				"Failed to reset stats of bbdev %u", dev_id);
1040 		/* start the device */
1041 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
1042 				"Failed to start bbdev %u", dev_id);
1043 	}
1044 
1045 	return TEST_SUCCESS;
1046 }
1047 
1048 static void
1049 ut_teardown(void)
1050 {
1051 	uint8_t i, dev_id;
1052 	struct rte_bbdev_stats stats;
1053 
1054 	for (i = 0; i < nb_active_devs; i++) {
1055 		dev_id = active_devs[i].dev_id;
1056 		/* read stats and print */
1057 		rte_bbdev_stats_get(dev_id, &stats);
1058 		/* Stop the device */
1059 		rte_bbdev_stop(dev_id);
1060 	}
1061 }
1062 
1063 static int
1064 init_op_data_objs(struct rte_bbdev_op_data *bufs,
1065 		struct op_data_entries *ref_entries,
1066 		struct rte_mempool *mbuf_pool, const uint16_t n,
1067 		enum op_data_type op_type, uint16_t min_alignment)
1068 {
1069 	int ret;
1070 	unsigned int i, j;
1071 	bool large_input = false;
1072 
1073 	for (i = 0; i < n; ++i) {
1074 		char *data;
1075 		struct op_data_buf *seg = &ref_entries->segments[0];
1076 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
1077 		TEST_ASSERT_NOT_NULL(m_head,
1078 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1079 				op_type, n * ref_entries->nb_segments,
1080 				mbuf_pool->size);
1081 
1082 		if ((seg->length + RTE_PKTMBUF_HEADROOM) > RTE_BBDEV_LDPC_E_MAX_MBUF) {
1083 			/*
1084 			 * Special case when DPDK mbuf cannot handle
1085 			 * the required input size
1086 			 */
1087 			large_input = true;
1088 		}
1089 		bufs[i].data = m_head;
1090 		bufs[i].offset = 0;
1091 		bufs[i].length = 0;
1092 
1093 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1094 			if (large_input) {
1095 				/* Allocate a fake overused mbuf */
1096 				data = rte_malloc(NULL, seg->length, 0);
1097 				TEST_ASSERT_NOT_NULL(data,
1098 					"rte malloc failed with %u bytes",
1099 					seg->length);
1100 				memcpy(data, seg->addr, seg->length);
1101 				m_head->buf_addr = data;
1102 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1103 				m_head->data_off = 0;
1104 				m_head->data_len = seg->length;
1105 			} else {
1106 				data = rte_pktmbuf_append(m_head, seg->length);
1107 				TEST_ASSERT_NOT_NULL(data,
1108 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1109 					seg->length, op_type);
1110 
1111 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1112 						data, min_alignment),
1113 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1114 					data, min_alignment);
1115 				rte_memcpy(data, seg->addr, seg->length);
1116 			}
1117 
1118 			bufs[i].length += seg->length;
1119 
1120 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1121 				struct rte_mbuf *m_tail =
1122 						rte_pktmbuf_alloc(mbuf_pool);
1123 				TEST_ASSERT_NOT_NULL(m_tail,
1124 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1125 						op_type,
1126 						n * ref_entries->nb_segments,
1127 						mbuf_pool->size);
1128 				seg += 1;
1129 
1130 				data = rte_pktmbuf_append(m_tail, seg->length);
1131 				TEST_ASSERT_NOT_NULL(data,
1132 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1133 						seg->length, op_type);
1134 
1135 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1136 						min_alignment),
1137 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1138 						data, min_alignment);
1139 				rte_memcpy(data, seg->addr, seg->length);
1140 				bufs[i].length += seg->length;
1141 
1142 				ret = rte_pktmbuf_chain(m_head, m_tail);
1143 				TEST_ASSERT_SUCCESS(ret,
1144 						"Couldn't chain mbufs from %d data type mbuf pool",
1145 						op_type);
1146 			}
1147 		} else {
1148 			if (((op_type == DATA_HARD_OUTPUT) || (op_type == DATA_SOFT_OUTPUT))
1149 					&& ((seg->length + RTE_PKTMBUF_HEADROOM)
1150 					> RTE_BBDEV_LDPC_E_MAX_MBUF)) {
1151 				/* Allocate a fake overused mbuf + margin */
1152 				data = rte_malloc(NULL, seg->length + 1024, 0);
1153 				TEST_ASSERT_NOT_NULL(data,
1154 					"rte malloc failed with %u bytes",
1155 					seg->length + 1024);
1156 				m_head->buf_addr = data;
1157 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1158 				m_head->data_off = 0;
1159 				m_head->data_len = seg->length;
1160 			} else {
1161 				/* allocate chained-mbuf for output buffer */
1162 				for (j = 1; j < ref_entries->nb_segments; ++j) {
1163 					struct rte_mbuf *m_tail =
1164 						rte_pktmbuf_alloc(mbuf_pool);
1165 					TEST_ASSERT_NOT_NULL(m_tail,
1166 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1167 						op_type,
1168 						n * ref_entries->nb_segments,
1169 						mbuf_pool->size);
1170 
1171 					ret = rte_pktmbuf_chain(m_head, m_tail);
1172 					TEST_ASSERT_SUCCESS(ret,
1173 						"Couldn't chain mbufs from %d data type mbuf pool",
1174 						op_type);
1175 				}
1176 			}
1177 			bufs[i].length += seg->length;
1178 		}
1179 	}
1180 
1181 	return 0;
1182 }
1183 
1184 static int
1185 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1186 		const int socket)
1187 {
1188 	int i;
1189 
1190 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1191 	if (*buffers == NULL) {
1192 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1193 				socket);
1194 		/* try to allocate memory on other detected sockets */
1195 		for (i = 0; i < socket; i++) {
1196 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1197 			if (*buffers != NULL)
1198 				break;
1199 		}
1200 	}
1201 
1202 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1203 }
1204 
1205 static void
1206 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1207 		const uint16_t n, const int8_t max_llr_modulus)
1208 {
1209 	uint16_t i, byte_idx;
1210 
1211 	for (i = 0; i < n; ++i) {
1212 		struct rte_mbuf *m = input_ops[i].data;
1213 		while (m != NULL) {
1214 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1215 					input_ops[i].offset);
1216 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1217 					++byte_idx)
1218 				llr[byte_idx] = round((double)max_llr_modulus *
1219 						llr[byte_idx] / INT8_MAX);
1220 
1221 			m = m->next;
1222 		}
1223 	}
1224 }
1225 
1226 /*
1227  * We may have to insert filler bits
1228  * when they are required by the HARQ assumption
1229  */
1230 static void
1231 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1232 		const uint16_t n, struct test_op_params *op_params)
1233 {
1234 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1235 
1236 	if (input_ops == NULL)
1237 		return;
1238 	/* No need to add filler if not required by device */
1239 	if (!(ldpc_cap_flags &
1240 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1241 		return;
1242 	/* No need to add filler for loopback operation */
1243 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1244 		return;
1245 
1246 	uint16_t i, j, parity_offset;
1247 	for (i = 0; i < n; ++i) {
1248 		struct rte_mbuf *m = input_ops[i].data;
1249 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1250 				input_ops[i].offset);
1251 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1252 				* dec.z_c - dec.n_filler;
1253 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1254 		m->data_len = new_hin_size;
1255 		input_ops[i].length = new_hin_size;
1256 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1257 				j--)
1258 			llr[j] = llr[j - dec.n_filler];
1259 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1260 		for (j = 0; j < dec.n_filler; j++)
1261 			llr[parity_offset + j] = llr_max_pre_scaling;
1262 	}
1263 }
1264 
1265 static void
1266 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1267 		const uint16_t n, const int8_t llr_size,
1268 		const int8_t llr_decimals)
1269 {
1270 	if (input_ops == NULL)
1271 		return;
1272 
1273 	uint16_t i, byte_idx;
1274 
1275 	int16_t llr_max, llr_min, llr_tmp;
1276 	llr_max = (1 << (llr_size - 1)) - 1;
1277 	llr_min = -llr_max;
1278 	for (i = 0; i < n; ++i) {
1279 		struct rte_mbuf *m = input_ops[i].data;
1280 		while (m != NULL) {
1281 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1282 					input_ops[i].offset);
1283 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1284 					++byte_idx) {
1285 
1286 				llr_tmp = llr[byte_idx];
1287 				if (llr_decimals == 4)
1288 					llr_tmp *= 8;
1289 				else if (llr_decimals == 2)
1290 					llr_tmp *= 2;
1291 				else if (llr_decimals == 0)
1292 					llr_tmp /= 2;
1293 				llr_tmp = RTE_MIN(llr_max,
1294 						RTE_MAX(llr_min, llr_tmp));
1295 				llr[byte_idx] = (int8_t) llr_tmp;
1296 			}
1297 
1298 			m = m->next;
1299 		}
1300 	}
1301 }
1302 
1303 
1304 
1305 static int
1306 fill_queue_buffers(struct test_op_params *op_params,
1307 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1308 		struct rte_mempool *soft_out_mp,
1309 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1310 		uint16_t queue_id,
1311 		const struct rte_bbdev_op_cap *capabilities,
1312 		uint16_t min_alignment, const int socket_id)
1313 {
1314 	int ret;
1315 	enum op_data_type type;
1316 	const uint16_t n = op_params->num_to_process;
1317 
1318 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1319 		in_mp,
1320 		soft_out_mp,
1321 		hard_out_mp,
1322 		harq_in_mp,
1323 		harq_out_mp,
1324 	};
1325 
1326 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1327 		&op_params->q_bufs[socket_id][queue_id].inputs,
1328 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1329 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1330 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1331 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1332 	};
1333 
1334 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1335 		struct op_data_entries *ref_entries =
1336 				&test_vector.entries[type];
1337 		if (ref_entries->nb_segments == 0)
1338 			continue;
1339 
1340 		ret = allocate_buffers_on_socket(queue_ops[type],
1341 				n * sizeof(struct rte_bbdev_op_data),
1342 				socket_id);
1343 		TEST_ASSERT_SUCCESS(ret,
1344 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1345 
1346 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1347 				mbuf_pools[type], n, type, min_alignment);
1348 		TEST_ASSERT_SUCCESS(ret,
1349 				"Couldn't init rte_bbdev_op_data structs");
1350 	}
1351 
1352 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1353 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1354 			capabilities->cap.turbo_dec.max_llr_modulus);
1355 
1356 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1357 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1358 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1359 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1360 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1361 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1362 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1363 
1364 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1365 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1366 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1367 		if (!loopback && !llr_comp)
1368 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1369 					ldpc_llr_size, ldpc_llr_decimals);
1370 		if (!loopback && !harq_comp)
1371 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1372 					ldpc_llr_size, ldpc_llr_decimals);
1373 		if (!loopback)
1374 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1375 					op_params);
1376 	}
1377 
1378 	return 0;
1379 }
1380 
1381 static void
1382 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1383 {
1384 	unsigned int i, j;
1385 
1386 	rte_mempool_free(ad->ops_mempool);
1387 	rte_mempool_free(ad->in_mbuf_pool);
1388 	rte_mempool_free(ad->hard_out_mbuf_pool);
1389 	rte_mempool_free(ad->soft_out_mbuf_pool);
1390 	rte_mempool_free(ad->harq_in_mbuf_pool);
1391 	rte_mempool_free(ad->harq_out_mbuf_pool);
1392 
1393 	for (i = 0; i < rte_lcore_count(); ++i) {
1394 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1395 			rte_free(op_params->q_bufs[j][i].inputs);
1396 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1397 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1398 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1399 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1400 		}
1401 	}
1402 }
1403 
1404 static void
1405 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1406 		unsigned int start_idx,
1407 		struct rte_bbdev_op_data *inputs,
1408 		struct rte_bbdev_op_data *hard_outputs,
1409 		struct rte_bbdev_op_data *soft_outputs,
1410 		struct rte_bbdev_dec_op *ref_op)
1411 {
1412 	unsigned int i;
1413 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1414 
1415 	for (i = 0; i < n; ++i) {
1416 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1417 			ops[i]->turbo_dec.tb_params.ea =
1418 					turbo_dec->tb_params.ea;
1419 			ops[i]->turbo_dec.tb_params.eb =
1420 					turbo_dec->tb_params.eb;
1421 			ops[i]->turbo_dec.tb_params.k_pos =
1422 					turbo_dec->tb_params.k_pos;
1423 			ops[i]->turbo_dec.tb_params.k_neg =
1424 					turbo_dec->tb_params.k_neg;
1425 			ops[i]->turbo_dec.tb_params.c =
1426 					turbo_dec->tb_params.c;
1427 			ops[i]->turbo_dec.tb_params.c_neg =
1428 					turbo_dec->tb_params.c_neg;
1429 			ops[i]->turbo_dec.tb_params.cab =
1430 					turbo_dec->tb_params.cab;
1431 			ops[i]->turbo_dec.tb_params.r =
1432 					turbo_dec->tb_params.r;
1433 		} else {
1434 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1435 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1436 		}
1437 
1438 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1439 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1440 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1441 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1442 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1443 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1444 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1445 
1446 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1447 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1448 		if (soft_outputs != NULL)
1449 			ops[i]->turbo_dec.soft_output =
1450 				soft_outputs[start_idx + i];
1451 	}
1452 }
1453 
1454 static void
1455 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1456 		unsigned int start_idx,
1457 		struct rte_bbdev_op_data *inputs,
1458 		struct rte_bbdev_op_data *outputs,
1459 		struct rte_bbdev_enc_op *ref_op)
1460 {
1461 	unsigned int i;
1462 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1463 	for (i = 0; i < n; ++i) {
1464 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1465 			ops[i]->turbo_enc.tb_params.ea =
1466 					turbo_enc->tb_params.ea;
1467 			ops[i]->turbo_enc.tb_params.eb =
1468 					turbo_enc->tb_params.eb;
1469 			ops[i]->turbo_enc.tb_params.k_pos =
1470 					turbo_enc->tb_params.k_pos;
1471 			ops[i]->turbo_enc.tb_params.k_neg =
1472 					turbo_enc->tb_params.k_neg;
1473 			ops[i]->turbo_enc.tb_params.c =
1474 					turbo_enc->tb_params.c;
1475 			ops[i]->turbo_enc.tb_params.c_neg =
1476 					turbo_enc->tb_params.c_neg;
1477 			ops[i]->turbo_enc.tb_params.cab =
1478 					turbo_enc->tb_params.cab;
1479 			ops[i]->turbo_enc.tb_params.ncb_pos =
1480 					turbo_enc->tb_params.ncb_pos;
1481 			ops[i]->turbo_enc.tb_params.ncb_neg =
1482 					turbo_enc->tb_params.ncb_neg;
1483 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1484 		} else {
1485 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1486 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1487 			ops[i]->turbo_enc.cb_params.ncb =
1488 					turbo_enc->cb_params.ncb;
1489 		}
1490 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1491 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1492 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1493 
1494 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1495 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1496 	}
1497 }
1498 
1499 
1500 /* Returns a random number drawn from a normal distribution
1501  * with mean of 0 and variance of 1
1502  * Marsaglia algorithm
1503  */
1504 static double
1505 randn(int n)
1506 {
1507 	double S, Z, U1, U2, u, v, fac;
1508 
1509 	do {
1510 		U1 = (double)rand() / RAND_MAX;
1511 		U2 = (double)rand() / RAND_MAX;
1512 		u = 2. * U1 - 1.;
1513 		v = 2. * U2 - 1.;
1514 		S = u * u + v * v;
1515 	} while (S >= 1 || S == 0);
1516 	fac = sqrt(-2. * log(S) / S);
1517 	Z = (n % 2) ? u * fac : v * fac;
1518 	return Z;
1519 }
1520 
1521 static inline double
1522 maxstar(double A, double B)
1523 {
1524 	if (fabs(A - B) > 5)
1525 		return RTE_MAX(A, B);
1526 	else
1527 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1528 }
1529 
1530 /*
1531  * Generate Qm LLRS for Qm==8
1532  * Modulation, AWGN and LLR estimation from max log development
1533  */
1534 static void
1535 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1536 {
1537 	int qm = 8;
1538 	int qam = 256;
1539 	int m, k;
1540 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1541 	/* 5.1.4 of TS38.211 */
1542 	const double symbols_I[256] = {
1543 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1544 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1545 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1546 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1547 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1548 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1549 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1550 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1551 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1552 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1553 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1554 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1555 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1556 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1557 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1558 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1559 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1560 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1561 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1562 			-13, -13, -15, -15, -13, -13, -15, -15};
1563 	const double symbols_Q[256] = {
1564 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1565 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1566 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1567 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1568 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1569 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1570 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1571 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1572 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1573 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1574 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1575 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1576 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1577 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1578 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1579 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1580 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1581 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1582 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1583 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1584 	/* Average constellation point energy */
1585 	N0 *= 170.0;
1586 	for (k = 0; k < qm; k++)
1587 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1588 	/* 5.1.4 of TS38.211 */
1589 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1590 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1591 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1592 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1593 	/* AWGN channel */
1594 	I += sqrt(N0 / 2) * randn(0);
1595 	Q += sqrt(N0 / 2) * randn(1);
1596 	/*
1597 	 * Calculate the log of the probability that each of
1598 	 * the constellation points was transmitted
1599 	 */
1600 	for (m = 0; m < qam; m++)
1601 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1602 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1603 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1604 	for (k = 0; k < qm; k++) {
1605 		p0 = -999999;
1606 		p1 = -999999;
1607 		/* For each constellation point */
1608 		for (m = 0; m < qam; m++) {
1609 			if ((m >> (qm - k - 1)) & 1)
1610 				p1 = maxstar(p1, log_syml_prob[m]);
1611 			else
1612 				p0 = maxstar(p0, log_syml_prob[m]);
1613 		}
1614 		/* Calculate the LLR */
1615 		llr_ = p0 - p1;
1616 		llr_ *= (1 << ldpc_llr_decimals);
1617 		llr_ = round(llr_);
1618 		if (llr_ > llr_max)
1619 			llr_ = llr_max;
1620 		if (llr_ < -llr_max)
1621 			llr_ = -llr_max;
1622 		llrs[qm * i + k] = (int8_t) llr_;
1623 	}
1624 }
1625 
1626 
1627 /*
1628  * Generate Qm LLRS for Qm==6
1629  * Modulation, AWGN and LLR estimation from max log development
1630  */
1631 static void
1632 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1633 {
1634 	int qm = 6;
1635 	int qam = 64;
1636 	int m, k;
1637 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1638 	/* 5.1.4 of TS38.211 */
1639 	const double symbols_I[64] = {
1640 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1641 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1642 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1643 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1644 			-5, -5, -7, -7, -5, -5, -7, -7};
1645 	const double symbols_Q[64] = {
1646 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1647 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1648 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1649 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1650 			-3, -1, -3, -1, -5, -7, -5, -7};
1651 	/* Average constellation point energy */
1652 	N0 *= 42.0;
1653 	for (k = 0; k < qm; k++)
1654 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1655 	/* 5.1.4 of TS38.211 */
1656 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1657 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1658 	/* AWGN channel */
1659 	I += sqrt(N0 / 2) * randn(0);
1660 	Q += sqrt(N0 / 2) * randn(1);
1661 	/*
1662 	 * Calculate the log of the probability that each of
1663 	 * the constellation points was transmitted
1664 	 */
1665 	for (m = 0; m < qam; m++)
1666 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1667 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1668 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1669 	for (k = 0; k < qm; k++) {
1670 		p0 = -999999;
1671 		p1 = -999999;
1672 		/* For each constellation point */
1673 		for (m = 0; m < qam; m++) {
1674 			if ((m >> (qm - k - 1)) & 1)
1675 				p1 = maxstar(p1, log_syml_prob[m]);
1676 			else
1677 				p0 = maxstar(p0, log_syml_prob[m]);
1678 		}
1679 		/* Calculate the LLR */
1680 		llr_ = p0 - p1;
1681 		llr_ *= (1 << ldpc_llr_decimals);
1682 		llr_ = round(llr_);
1683 		if (llr_ > llr_max)
1684 			llr_ = llr_max;
1685 		if (llr_ < -llr_max)
1686 			llr_ = -llr_max;
1687 		llrs[qm * i + k] = (int8_t) llr_;
1688 	}
1689 }
1690 
1691 /*
1692  * Generate Qm LLRS for Qm==4
1693  * Modulation, AWGN and LLR estimation from max log development
1694  */
1695 static void
1696 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1697 {
1698 	int qm = 4;
1699 	int qam = 16;
1700 	int m, k;
1701 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1702 	/* 5.1.4 of TS38.211 */
1703 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1704 			-1, -1, -3, -3, -1, -1, -3, -3};
1705 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1706 			1, 3, 1, 3, -1, -3, -1, -3};
1707 	/* Average constellation point energy */
1708 	N0 *= 10.0;
1709 	for (k = 0; k < qm; k++)
1710 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1711 	/* 5.1.4 of TS38.211 */
1712 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1713 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1714 	/* AWGN channel */
1715 	I += sqrt(N0 / 2) * randn(0);
1716 	Q += sqrt(N0 / 2) * randn(1);
1717 	/*
1718 	 * Calculate the log of the probability that each of
1719 	 * the constellation points was transmitted
1720 	 */
1721 	for (m = 0; m < qam; m++)
1722 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1723 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1724 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1725 	for (k = 0; k < qm; k++) {
1726 		p0 = -999999;
1727 		p1 = -999999;
1728 		/* For each constellation point */
1729 		for (m = 0; m < qam; m++) {
1730 			if ((m >> (qm - k - 1)) & 1)
1731 				p1 = maxstar(p1, log_syml_prob[m]);
1732 			else
1733 				p0 = maxstar(p0, log_syml_prob[m]);
1734 		}
1735 		/* Calculate the LLR */
1736 		llr_ = p0 - p1;
1737 		llr_ *= (1 << ldpc_llr_decimals);
1738 		llr_ = round(llr_);
1739 		if (llr_ > llr_max)
1740 			llr_ = llr_max;
1741 		if (llr_ < -llr_max)
1742 			llr_ = -llr_max;
1743 		llrs[qm * i + k] = (int8_t) llr_;
1744 	}
1745 }
1746 
1747 static void
1748 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1749 {
1750 	double b, b1, n;
1751 	double coeff = 2.0 * sqrt(N0);
1752 
1753 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1754 	if (llrs[j] < 8 && llrs[j] > -8)
1755 		return;
1756 
1757 	/* Note don't change sign here */
1758 	n = randn(j % 2);
1759 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1760 			+ coeff * n) / N0;
1761 	b = b1 * (1 << ldpc_llr_decimals);
1762 	b = round(b);
1763 	if (b > llr_max)
1764 		b = llr_max;
1765 	if (b < -llr_max)
1766 		b = -llr_max;
1767 	llrs[j] = (int8_t) b;
1768 }
1769 
1770 /* Simple LLR generation assuming AWGN and QPSK */
1771 static void
1772 gen_turbo_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1773 {
1774 	double b, b1, n;
1775 	double coeff = 2.0 * sqrt(N0);
1776 
1777 	/* Ignore in vectors null LLRs not to be saturated */
1778 	if (llrs[j] == 0)
1779 		return;
1780 
1781 	/* Note don't change sign here */
1782 	n = randn(j % 2);
1783 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1784 			+ coeff * n) / N0;
1785 	b = b1 * (1 << 4);
1786 	b = round(b);
1787 	if (b > llr_max)
1788 		b = llr_max;
1789 	if (b < -llr_max)
1790 		b = -llr_max;
1791 	llrs[j] = (int8_t) b;
1792 }
1793 
1794 /* Generate LLR for a given SNR */
1795 static void
1796 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1797 		struct rte_bbdev_dec_op *ref_op)
1798 {
1799 	struct rte_mbuf *m;
1800 	uint16_t qm;
1801 	uint32_t i, j, e, range;
1802 	double N0, llr_max;
1803 
1804 	e = ref_op->ldpc_dec.cb_params.e;
1805 	qm = ref_op->ldpc_dec.q_m;
1806 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1807 	range = e / qm;
1808 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1809 
1810 	for (i = 0; i < n; ++i) {
1811 		m = inputs[i].data;
1812 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1813 		if (qm == 8) {
1814 			for (j = 0; j < range; ++j)
1815 				gen_qm8_llr(llrs, j, N0, llr_max);
1816 		} else if (qm == 6) {
1817 			for (j = 0; j < range; ++j)
1818 				gen_qm6_llr(llrs, j, N0, llr_max);
1819 		} else if (qm == 4) {
1820 			for (j = 0; j < range; ++j)
1821 				gen_qm4_llr(llrs, j, N0, llr_max);
1822 		} else {
1823 			for (j = 0; j < e; ++j)
1824 				gen_qm2_llr(llrs, j, N0, llr_max);
1825 		}
1826 	}
1827 }
1828 
1829 /* Generate LLR for turbo decoder for a given SNR */
1830 static void
1831 generate_turbo_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1832 		struct rte_bbdev_dec_op *ref_op)
1833 {
1834 	struct rte_mbuf *m;
1835 	uint32_t i, j, range;
1836 	double N0, llr_max;
1837 
1838 	llr_max = 127;
1839 	range = ref_op->turbo_dec.input.length;
1840 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1841 
1842 	for (i = 0; i < n; ++i) {
1843 		m = inputs[i].data;
1844 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1845 		for (j = 0; j < range; ++j)
1846 			gen_turbo_llr(llrs, j, N0, llr_max);
1847 	}
1848 }
1849 
1850 static void
1851 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1852 		unsigned int start_idx,
1853 		struct rte_bbdev_op_data *inputs,
1854 		struct rte_bbdev_op_data *hard_outputs,
1855 		struct rte_bbdev_op_data *soft_outputs,
1856 		struct rte_bbdev_op_data *harq_inputs,
1857 		struct rte_bbdev_op_data *harq_outputs,
1858 		struct rte_bbdev_dec_op *ref_op)
1859 {
1860 	unsigned int i;
1861 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1862 
1863 	for (i = 0; i < n; ++i) {
1864 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1865 			ops[i]->ldpc_dec.tb_params.ea =
1866 					ldpc_dec->tb_params.ea;
1867 			ops[i]->ldpc_dec.tb_params.eb =
1868 					ldpc_dec->tb_params.eb;
1869 			ops[i]->ldpc_dec.tb_params.c =
1870 					ldpc_dec->tb_params.c;
1871 			ops[i]->ldpc_dec.tb_params.cab =
1872 					ldpc_dec->tb_params.cab;
1873 			ops[i]->ldpc_dec.tb_params.r =
1874 					ldpc_dec->tb_params.r;
1875 		} else {
1876 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1877 		}
1878 
1879 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1880 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1881 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1882 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1883 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1884 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1885 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1886 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1887 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1888 
1889 		if (hard_outputs != NULL)
1890 			ops[i]->ldpc_dec.hard_output =
1891 					hard_outputs[start_idx + i];
1892 		if (inputs != NULL)
1893 			ops[i]->ldpc_dec.input =
1894 					inputs[start_idx + i];
1895 		if (soft_outputs != NULL)
1896 			ops[i]->ldpc_dec.soft_output =
1897 					soft_outputs[start_idx + i];
1898 		if (harq_inputs != NULL)
1899 			ops[i]->ldpc_dec.harq_combined_input =
1900 					harq_inputs[start_idx + i];
1901 		if (harq_outputs != NULL)
1902 			ops[i]->ldpc_dec.harq_combined_output =
1903 					harq_outputs[start_idx + i];
1904 	}
1905 }
1906 
1907 
1908 static void
1909 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1910 		unsigned int start_idx,
1911 		struct rte_bbdev_op_data *inputs,
1912 		struct rte_bbdev_op_data *outputs,
1913 		struct rte_bbdev_enc_op *ref_op)
1914 {
1915 	unsigned int i;
1916 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1917 	for (i = 0; i < n; ++i) {
1918 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1919 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1920 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1921 			ops[i]->ldpc_enc.tb_params.cab =
1922 					ldpc_enc->tb_params.cab;
1923 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1924 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1925 		} else {
1926 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1927 		}
1928 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1929 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1930 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1931 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1932 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1933 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1934 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1935 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1936 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1937 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1938 	}
1939 }
1940 
1941 static void
1942 copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n,
1943 		unsigned int start_idx, struct rte_bbdev_op_data *inputs,
1944 		struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *pwrouts,
1945 		struct rte_bbdev_fft_op *ref_op)
1946 {
1947 	unsigned int i, j;
1948 	struct rte_bbdev_op_fft *fft = &ref_op->fft;
1949 	for (i = 0; i < n; i++) {
1950 		ops[i]->fft.input_sequence_size = fft->input_sequence_size;
1951 		ops[i]->fft.input_leading_padding = fft->input_leading_padding;
1952 		ops[i]->fft.output_sequence_size = fft->output_sequence_size;
1953 		ops[i]->fft.output_leading_depadding =
1954 				fft->output_leading_depadding;
1955 		for (j = 0; j < RTE_BBDEV_MAX_CS_2; j++)
1956 			ops[i]->fft.window_index[j] = fft->window_index[j];
1957 		ops[i]->fft.cs_bitmap = fft->cs_bitmap;
1958 		ops[i]->fft.num_antennas_log2 = fft->num_antennas_log2;
1959 		ops[i]->fft.idft_log2 = fft->idft_log2;
1960 		ops[i]->fft.dft_log2 = fft->dft_log2;
1961 		ops[i]->fft.cs_time_adjustment = fft->cs_time_adjustment;
1962 		ops[i]->fft.idft_shift = fft->idft_shift;
1963 		ops[i]->fft.dft_shift = fft->dft_shift;
1964 		ops[i]->fft.ncs_reciprocal = fft->ncs_reciprocal;
1965 		ops[i]->fft.power_shift = fft->power_shift;
1966 		ops[i]->fft.fp16_exp_adjust = fft->fp16_exp_adjust;
1967 		ops[i]->fft.base_output = outputs[start_idx + i];
1968 		ops[i]->fft.base_input = inputs[start_idx + i];
1969 		if (pwrouts != NULL)
1970 			ops[i]->fft.power_meas_output = pwrouts[start_idx + i];
1971 		ops[i]->fft.op_flags = fft->op_flags;
1972 	}
1973 }
1974 
1975 static int
1976 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1977 		unsigned int order_idx, const int expected_status)
1978 {
1979 	int status = op->status;
1980 	/* ignore parity mismatch false alarms for long iterations */
1981 	if (get_iter_max() >= 10) {
1982 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1983 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1984 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1985 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1986 		}
1987 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1988 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1989 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1990 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1991 		}
1992 	}
1993 
1994 	TEST_ASSERT(status == expected_status,
1995 			"op_status (%d) != expected_status (%d)",
1996 			op->status, expected_status);
1997 
1998 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1999 			"Ordering error, expected %p, got %p",
2000 			(void *)(uintptr_t)order_idx, op->opaque_data);
2001 
2002 	return TEST_SUCCESS;
2003 }
2004 
2005 static int
2006 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
2007 		unsigned int order_idx, const int expected_status)
2008 {
2009 	TEST_ASSERT(op->status == expected_status,
2010 			"op_status (%d) != expected_status (%d)",
2011 			op->status, expected_status);
2012 
2013 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
2014 			"Ordering error, expected %p, got %p",
2015 			(void *)(uintptr_t)order_idx, op->opaque_data);
2016 
2017 	return TEST_SUCCESS;
2018 }
2019 
2020 static int
2021 check_fft_status_and_ordering(struct rte_bbdev_fft_op *op,
2022 		unsigned int order_idx, const int expected_status)
2023 {
2024 	TEST_ASSERT(op->status == expected_status,
2025 			"op_status (%d) != expected_status (%d)",
2026 			op->status, expected_status);
2027 
2028 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
2029 			"Ordering error, expected %p, got %p",
2030 			(void *)(uintptr_t)order_idx, op->opaque_data);
2031 
2032 	return TEST_SUCCESS;
2033 }
2034 
2035 static inline int
2036 validate_op_chain(struct rte_bbdev_op_data *op,
2037 		struct op_data_entries *orig_op)
2038 {
2039 	uint8_t i;
2040 	struct rte_mbuf *m = op->data;
2041 	uint8_t nb_dst_segments = orig_op->nb_segments;
2042 	uint32_t total_data_size = 0;
2043 	bool ignore_mbuf = false; /* ignore mbuf limitations */
2044 
2045 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2046 			"Number of segments differ in original (%u) and filled (%u) op",
2047 			nb_dst_segments, m->nb_segs);
2048 
2049 	/* Validate each mbuf segment length */
2050 	for (i = 0; i < nb_dst_segments; ++i) {
2051 		/* Apply offset to the first mbuf segment */
2052 		uint16_t offset = (i == 0) ? op->offset : 0;
2053 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2054 		total_data_size += orig_op->segments[i].length;
2055 
2056 		if (orig_op->segments[i].length > RTE_BBDEV_LDPC_E_MAX_MBUF)
2057 			ignore_mbuf = true;
2058 		if (!ignore_mbuf)
2059 			TEST_ASSERT(orig_op->segments[i].length == data_len,
2060 					"Length of segment differ in original (%u) and filled (%u) op",
2061 					orig_op->segments[i].length, data_len);
2062 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
2063 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
2064 				orig_op->segments[i].length,
2065 				"Output buffers (CB=%u) are not equal", i);
2066 		m = m->next;
2067 	}
2068 
2069 	/* Validate total mbuf pkt length */
2070 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2071 	if (!ignore_mbuf)
2072 		TEST_ASSERT(total_data_size == pkt_len,
2073 				"Length of data differ in original (%u) and filled (%u) op",
2074 				total_data_size, pkt_len);
2075 
2076 	return TEST_SUCCESS;
2077 }
2078 
2079 /*
2080  * Compute K0 for a given configuration for HARQ output length computation
2081  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
2082  */
2083 static inline uint16_t
2084 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
2085 {
2086 	if (rv_index == 0)
2087 		return 0;
2088 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
2089 	if (n_cb == n) {
2090 		if (rv_index == 1)
2091 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
2092 		else if (rv_index == 2)
2093 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
2094 		else
2095 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
2096 	}
2097 	/* LBRM case - includes a division by N */
2098 	if (rv_index == 1)
2099 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
2100 				/ n) * z_c;
2101 	else if (rv_index == 2)
2102 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
2103 				/ n) * z_c;
2104 	else
2105 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
2106 				/ n) * z_c;
2107 }
2108 
2109 /* HARQ output length including the Filler bits */
2110 static inline uint16_t
2111 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
2112 {
2113 	uint16_t k0 = 0;
2114 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
2115 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
2116 	/* Compute RM out size and number of rows */
2117 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2118 			* ops_ld->z_c - ops_ld->n_filler;
2119 	uint16_t deRmOutSize = RTE_MIN(
2120 			k0 + ops_ld->cb_params.e +
2121 			((k0 > parity_offset) ?
2122 					0 : ops_ld->n_filler),
2123 					ops_ld->n_cb);
2124 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
2125 			/ ops_ld->z_c);
2126 	uint16_t harq_output_len = numRows * ops_ld->z_c;
2127 	return harq_output_len;
2128 }
2129 
2130 static inline int
2131 validate_op_harq_chain(struct rte_bbdev_op_data *op,
2132 		struct op_data_entries *orig_op,
2133 		struct rte_bbdev_op_ldpc_dec *ops_ld)
2134 {
2135 	uint8_t i;
2136 	uint32_t j, jj, k;
2137 	struct rte_mbuf *m = op->data;
2138 	uint8_t nb_dst_segments = orig_op->nb_segments;
2139 	uint32_t total_data_size = 0;
2140 	int8_t *harq_orig, *harq_out, abs_harq_origin;
2141 	uint32_t byte_error = 0, cum_error = 0, error;
2142 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
2143 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
2144 	uint16_t parity_offset;
2145 
2146 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2147 			"Number of segments differ in original (%u) and filled (%u) op",
2148 			nb_dst_segments, m->nb_segs);
2149 
2150 	/* Validate each mbuf segment length */
2151 	for (i = 0; i < nb_dst_segments; ++i) {
2152 		/* Apply offset to the first mbuf segment */
2153 		uint16_t offset = (i == 0) ? op->offset : 0;
2154 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2155 		total_data_size += orig_op->segments[i].length;
2156 
2157 		TEST_ASSERT(orig_op->segments[i].length < (uint32_t)(data_len + HARQ_MEM_TOLERANCE),
2158 				"Length of segment differ in original (%u) and filled (%u) op",
2159 				orig_op->segments[i].length, data_len);
2160 		harq_orig = (int8_t *) orig_op->segments[i].addr;
2161 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2162 
2163 		/* Cannot compare HARQ output data for such cases */
2164 		if ((ldpc_llr_decimals > 1) && ((ops_ld->op_flags & RTE_BBDEV_LDPC_LLR_COMPRESSION)
2165 				|| (ops_ld->op_flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)))
2166 			break;
2167 
2168 		if (!(ldpc_cap_flags &
2169 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
2170 				) || (ops_ld->op_flags &
2171 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2172 			data_len -= ops_ld->z_c;
2173 			parity_offset = data_len;
2174 		} else {
2175 			/* Compute RM out size and number of rows */
2176 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2177 					* ops_ld->z_c - ops_ld->n_filler;
2178 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
2179 					ops_ld->n_filler;
2180 			if (data_len > deRmOutSize)
2181 				data_len = deRmOutSize;
2182 		}
2183 		if (data_len > orig_op->segments[i].length)
2184 			data_len = orig_op->segments[i].length;
2185 		/*
2186 		 * HARQ output can have minor differences
2187 		 * due to integer representation and related scaling
2188 		 */
2189 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2190 			if (j == parity_offset) {
2191 				/* Special Handling of the filler bits */
2192 				for (k = 0; k < ops_ld->n_filler; k++) {
2193 					if (harq_out[jj] !=
2194 							llr_max_pre_scaling) {
2195 						printf("HARQ Filler issue %d: %d %d\n",
2196 							jj, harq_out[jj],
2197 							llr_max);
2198 						byte_error++;
2199 					}
2200 					jj++;
2201 				}
2202 			}
2203 			if (!(ops_ld->op_flags &
2204 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2205 				if (ldpc_llr_decimals > 1)
2206 					harq_out[jj] = (harq_out[jj] + 1)
2207 						>> (ldpc_llr_decimals - 1);
2208 				/* Saturated to S7 */
2209 				if (harq_orig[j] > llr_max)
2210 					harq_orig[j] = llr_max;
2211 				if (harq_orig[j] < -llr_max)
2212 					harq_orig[j] = -llr_max;
2213 			}
2214 			if (harq_orig[j] != harq_out[jj]) {
2215 				error = (harq_orig[j] > harq_out[jj]) ?
2216 						harq_orig[j] - harq_out[jj] :
2217 						harq_out[jj] - harq_orig[j];
2218 				abs_harq_origin = harq_orig[j] > 0 ?
2219 							harq_orig[j] :
2220 							-harq_orig[j];
2221 				/* Residual quantization error */
2222 				if ((error > 8 && (abs_harq_origin <
2223 						(llr_max - 16))) ||
2224 						(error > 16)) {
2225 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2226 							j, harq_orig[j],
2227 							harq_out[jj], error);
2228 					byte_error++;
2229 					cum_error += error;
2230 				}
2231 			}
2232 		}
2233 		m = m->next;
2234 	}
2235 
2236 	if (byte_error)
2237 		TEST_ASSERT(byte_error <= 1,
2238 				"HARQ output mismatch (%d) %d",
2239 				byte_error, cum_error);
2240 
2241 	/* Validate total mbuf pkt length */
2242 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2243 	TEST_ASSERT(total_data_size < pkt_len + HARQ_MEM_TOLERANCE,
2244 			"Length of data differ in original (%u) and filled (%u) op",
2245 			total_data_size, pkt_len);
2246 
2247 	return TEST_SUCCESS;
2248 }
2249 
2250 
2251 static inline int
2252 validate_op_so_chain(struct rte_bbdev_op_data *op,
2253 		struct op_data_entries *orig_op)
2254 {
2255 	struct rte_mbuf *m = op->data;
2256 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2257 	uint32_t j, jj;
2258 	int8_t *so_orig, *so_out;
2259 	uint32_t byte_error = 0, error, margin_error = 0;
2260 
2261 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2262 			"Number of segments differ in original (%u) and filled (%u) op",
2263 			nb_dst_segments, m->nb_segs);
2264 
2265 	/* Validate each mbuf segment length. */
2266 	for (i = 0; i < nb_dst_segments; ++i) {
2267 		/* Apply offset to the first mbuf segment. */
2268 		uint16_t offset = (i == 0) ? op->offset : 0;
2269 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2270 
2271 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2272 				"Length of segment differ in original (%u) and filled (%u) op",
2273 				orig_op->segments[i].length, data_len);
2274 		so_orig = (int8_t *) orig_op->segments[i].addr;
2275 		so_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2276 		margin_error += data_len / 8; /* Allow for few % errors. */
2277 
2278 		/* SO output can have minor differences due to algorithm variations. */
2279 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2280 			if (so_orig[j] != so_out[jj]) {
2281 				error = (so_orig[j] > so_out[jj]) ? so_orig[j] - so_out[jj] :
2282 						so_out[jj] - so_orig[j];
2283 				/* Residual quantization error. */
2284 				if (error > 32) {
2285 					printf("Warning: Soft mismatch %d: exp %d act %d => %d\n",
2286 							j, so_orig[j], so_out[jj], error);
2287 					byte_error++;
2288 				}
2289 			}
2290 		}
2291 		m = m->next;
2292 	}
2293 
2294 	if (byte_error > margin_error)
2295 		TEST_ASSERT(byte_error <= 1, "Soft output mismatch (%d) %d",
2296 				byte_error, margin_error);
2297 
2298 	return TEST_SUCCESS;
2299 }
2300 
2301 static int
2302 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2303 		struct rte_bbdev_dec_op *ref_op)
2304 {
2305 	unsigned int i;
2306 	int ret;
2307 	struct op_data_entries *hard_data_orig =
2308 			&test_vector.entries[DATA_HARD_OUTPUT];
2309 	struct op_data_entries *soft_data_orig =
2310 			&test_vector.entries[DATA_SOFT_OUTPUT];
2311 	struct rte_bbdev_op_turbo_dec *ops_td;
2312 	struct rte_bbdev_op_data *hard_output;
2313 	struct rte_bbdev_op_data *soft_output;
2314 
2315 	for (i = 0; i < n; ++i) {
2316 		ops_td = &ops[i]->turbo_dec;
2317 		hard_output = &ops_td->hard_output;
2318 		soft_output = &ops_td->soft_output;
2319 
2320 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2321 		TEST_ASSERT_SUCCESS(ret,
2322 				"Checking status and ordering for decoder failed");
2323 
2324 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2325 				hard_data_orig),
2326 				"Hard output buffers (CB=%u) are not equal",
2327 				i);
2328 
2329 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2330 			TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output,
2331 					soft_data_orig),
2332 					"Soft output buffers (CB=%u) are not equal",
2333 					i);
2334 	}
2335 
2336 	return TEST_SUCCESS;
2337 }
2338 
2339 /* Check Number of code blocks errors */
2340 static int
2341 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2342 {
2343 	unsigned int i;
2344 	struct op_data_entries *hard_data_orig =
2345 			&test_vector.entries[DATA_HARD_OUTPUT];
2346 	struct rte_bbdev_op_ldpc_dec *ops_td;
2347 	struct rte_bbdev_op_data *hard_output;
2348 	int errors = 0;
2349 	struct rte_mbuf *m;
2350 
2351 	for (i = 0; i < n; ++i) {
2352 		ops_td = &ops[i]->ldpc_dec;
2353 		hard_output = &ops_td->hard_output;
2354 		m = hard_output->data;
2355 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2356 				hard_data_orig->segments[0].addr,
2357 				hard_data_orig->segments[0].length))
2358 			errors++;
2359 	}
2360 	return errors;
2361 }
2362 
2363 /* Check Number of code blocks errors */
2364 static int
2365 validate_turbo_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2366 {
2367 	unsigned int i;
2368 	struct op_data_entries *hard_data_orig = &test_vector.entries[DATA_HARD_OUTPUT];
2369 	struct rte_bbdev_op_turbo_dec *ops_td;
2370 	struct rte_bbdev_op_data *hard_output;
2371 	int errors = 0;
2372 	struct rte_mbuf *m;
2373 
2374 	for (i = 0; i < n; ++i) {
2375 		ops_td = &ops[i]->turbo_dec;
2376 		hard_output = &ops_td->hard_output;
2377 		m = hard_output->data;
2378 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2379 				hard_data_orig->segments[0].addr,
2380 				hard_data_orig->segments[0].length))
2381 			errors++;
2382 	}
2383 	return errors;
2384 }
2385 
2386 
2387 static int
2388 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2389 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2390 {
2391 	unsigned int i;
2392 	int ret;
2393 	struct op_data_entries *hard_data_orig =
2394 			&test_vector.entries[DATA_HARD_OUTPUT];
2395 	struct op_data_entries *soft_data_orig =
2396 			&test_vector.entries[DATA_SOFT_OUTPUT];
2397 	struct op_data_entries *harq_data_orig =
2398 				&test_vector.entries[DATA_HARQ_OUTPUT];
2399 	struct rte_bbdev_op_ldpc_dec *ops_td;
2400 	struct rte_bbdev_op_data *hard_output;
2401 	struct rte_bbdev_op_data *harq_output;
2402 	struct rte_bbdev_op_data *soft_output;
2403 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2404 
2405 	for (i = 0; i < n; ++i) {
2406 		ops_td = &ops[i]->ldpc_dec;
2407 		hard_output = &ops_td->hard_output;
2408 		harq_output = &ops_td->harq_combined_output;
2409 		soft_output = &ops_td->soft_output;
2410 
2411 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2412 		TEST_ASSERT_SUCCESS(ret,
2413 				"Checking status and ordering for decoder failed");
2414 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2415 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2416 					"Returned iter_count (%d) > expected iter_count (%d)",
2417 					ops_td->iter_count, ref_td->iter_count);
2418 		/*
2419 		 * We can ignore output data when the decoding failed to
2420 		 * converge or for loop-back cases
2421 		 */
2422 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2423 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2424 				) && (
2425 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2426 						)) == 0)
2427 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2428 					hard_data_orig),
2429 					"Hard output buffers (CB=%u) are not equal",
2430 					i);
2431 
2432 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2433 			TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output,
2434 					soft_data_orig),
2435 					"Soft output buffers (CB=%u) are not equal",
2436 					i);
2437 		if (ref_op->ldpc_dec.op_flags &
2438 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2439 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2440 					harq_data_orig, ops_td),
2441 					"HARQ output buffers (CB=%u) are not equal",
2442 					i);
2443 		}
2444 		if (ref_op->ldpc_dec.op_flags &
2445 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2446 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2447 					harq_data_orig, ops_td),
2448 					"HARQ output buffers (CB=%u) are not equal",
2449 					i);
2450 
2451 	}
2452 
2453 	return TEST_SUCCESS;
2454 }
2455 
2456 
2457 static int
2458 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2459 		struct rte_bbdev_enc_op *ref_op)
2460 {
2461 	unsigned int i;
2462 	int ret;
2463 	struct op_data_entries *hard_data_orig =
2464 			&test_vector.entries[DATA_HARD_OUTPUT];
2465 
2466 	for (i = 0; i < n; ++i) {
2467 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2468 		TEST_ASSERT_SUCCESS(ret,
2469 				"Checking status and ordering for encoder failed");
2470 		TEST_ASSERT_SUCCESS(validate_op_chain(
2471 				&ops[i]->turbo_enc.output,
2472 				hard_data_orig),
2473 				"Output buffers (CB=%u) are not equal",
2474 				i);
2475 	}
2476 
2477 	return TEST_SUCCESS;
2478 }
2479 
2480 static int
2481 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2482 		struct rte_bbdev_enc_op *ref_op)
2483 {
2484 	unsigned int i;
2485 	int ret;
2486 	struct op_data_entries *hard_data_orig =
2487 			&test_vector.entries[DATA_HARD_OUTPUT];
2488 
2489 	for (i = 0; i < n; ++i) {
2490 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2491 		TEST_ASSERT_SUCCESS(ret,
2492 				"Checking status and ordering for encoder failed");
2493 		TEST_ASSERT_SUCCESS(validate_op_chain(
2494 				&ops[i]->ldpc_enc.output,
2495 				hard_data_orig),
2496 				"Output buffers (CB=%u) are not equal",
2497 				i);
2498 	}
2499 
2500 	return TEST_SUCCESS;
2501 }
2502 
2503 static inline int
2504 validate_op_fft_chain(struct rte_bbdev_op_data *op, struct op_data_entries *orig_op)
2505 {
2506 	struct rte_mbuf *m = op->data;
2507 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2508 	int16_t delt, abs_delt, thres_hold = 3;
2509 	uint32_t j, data_len_iq, error_num;
2510 	int16_t *ref_out, *op_out;
2511 
2512 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2513 			"Number of segments differ in original (%u) and filled (%u) op fft",
2514 			nb_dst_segments, m->nb_segs);
2515 
2516 	/* Due to size limitation of mbuf, FFT doesn't use real mbuf. */
2517 	for (i = 0; i < nb_dst_segments; ++i) {
2518 		uint16_t offset = (i == 0) ? op->offset : 0;
2519 		uint32_t data_len = op->length;
2520 
2521 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2522 				"Length of segment differ in original (%u) and filled (%u) op fft",
2523 				orig_op->segments[i].length, data_len);
2524 		/* Divided by 2 to get the number of 16bits data. */
2525 		data_len_iq = data_len >> 1;
2526 		ref_out = (int16_t *)(orig_op->segments[i].addr);
2527 		op_out = rte_pktmbuf_mtod_offset(m, int16_t *, offset);
2528 		error_num = 0;
2529 		for (j = 0; j < data_len_iq; j++) {
2530 			delt = ref_out[j] - op_out[j];
2531 			abs_delt = delt > 0 ? delt : -delt;
2532 			error_num += (abs_delt > thres_hold ? 1 : 0);
2533 		}
2534 		if (error_num > 0) {
2535 			rte_memdump(stdout, "Buffer A", ref_out, data_len);
2536 			rte_memdump(stdout, "Buffer B", op_out, data_len);
2537 			TEST_ASSERT(error_num == 0,
2538 				"FFT Output are not matched total (%u) errors (%u)",
2539 				data_len_iq, error_num);
2540 		}
2541 
2542 		m = m->next;
2543 	}
2544 
2545 	return TEST_SUCCESS;
2546 }
2547 
2548 static int
2549 validate_fft_op(struct rte_bbdev_fft_op **ops, const uint16_t n,
2550 		struct rte_bbdev_fft_op *ref_op)
2551 {
2552 	unsigned int i;
2553 	int ret;
2554 	struct op_data_entries *fft_data_orig = &test_vector.entries[DATA_HARD_OUTPUT];
2555 	struct op_data_entries *fft_pwr_orig = &test_vector.entries[DATA_SOFT_OUTPUT];
2556 
2557 	for (i = 0; i < n; ++i) {
2558 		ret = check_fft_status_and_ordering(ops[i], i, ref_op->status);
2559 		TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for FFT failed");
2560 		TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2561 				&ops[i]->fft.base_output, fft_data_orig),
2562 				"FFT Output buffers (op=%u) are not matched", i);
2563 		if (check_bit(ops[i]->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS))
2564 			TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2565 				&ops[i]->fft.power_meas_output, fft_pwr_orig),
2566 				"FFT Power Output buffers (op=%u) are not matched", i);
2567 	}
2568 
2569 	return TEST_SUCCESS;
2570 }
2571 
2572 static void
2573 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2574 {
2575 	unsigned int i;
2576 	struct op_data_entries *entry;
2577 
2578 	op->turbo_dec = test_vector.turbo_dec;
2579 	entry = &test_vector.entries[DATA_INPUT];
2580 	for (i = 0; i < entry->nb_segments; ++i)
2581 		op->turbo_dec.input.length +=
2582 				entry->segments[i].length;
2583 }
2584 
2585 static void
2586 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2587 {
2588 	unsigned int i;
2589 	struct op_data_entries *entry;
2590 
2591 	op->ldpc_dec = test_vector.ldpc_dec;
2592 	entry = &test_vector.entries[DATA_INPUT];
2593 	for (i = 0; i < entry->nb_segments; ++i)
2594 		op->ldpc_dec.input.length +=
2595 				entry->segments[i].length;
2596 	if (test_vector.ldpc_dec.op_flags &
2597 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2598 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2599 		for (i = 0; i < entry->nb_segments; ++i)
2600 			op->ldpc_dec.harq_combined_input.length +=
2601 				entry->segments[i].length;
2602 	}
2603 }
2604 
2605 static void
2606 create_reference_fft_op(struct rte_bbdev_fft_op *op)
2607 {
2608 	unsigned int i;
2609 	struct op_data_entries *entry;
2610 	op->fft = test_vector.fft;
2611 	entry = &test_vector.entries[DATA_INPUT];
2612 	for (i = 0; i < entry->nb_segments; ++i)
2613 		op->fft.base_input.length += entry->segments[i].length;
2614 }
2615 
2616 static void
2617 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2618 {
2619 	unsigned int i;
2620 	struct op_data_entries *entry;
2621 
2622 	op->turbo_enc = test_vector.turbo_enc;
2623 	entry = &test_vector.entries[DATA_INPUT];
2624 	for (i = 0; i < entry->nb_segments; ++i)
2625 		op->turbo_enc.input.length +=
2626 				entry->segments[i].length;
2627 }
2628 
2629 static void
2630 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2631 {
2632 	unsigned int i;
2633 	struct op_data_entries *entry;
2634 
2635 	op->ldpc_enc = test_vector.ldpc_enc;
2636 	entry = &test_vector.entries[DATA_INPUT];
2637 	for (i = 0; i < entry->nb_segments; ++i)
2638 		op->ldpc_enc.input.length +=
2639 				entry->segments[i].length;
2640 }
2641 
2642 static uint32_t
2643 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2644 {
2645 	uint8_t i;
2646 	uint32_t c, r, tb_size = 0;
2647 
2648 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2649 		tb_size = op->turbo_dec.tb_params.k_neg;
2650 	} else {
2651 		c = op->turbo_dec.tb_params.c;
2652 		r = op->turbo_dec.tb_params.r;
2653 		for (i = 0; i < c-r; i++)
2654 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2655 				op->turbo_dec.tb_params.k_neg :
2656 				op->turbo_dec.tb_params.k_pos;
2657 	}
2658 	return tb_size;
2659 }
2660 
2661 static uint32_t
2662 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2663 {
2664 	uint8_t num_cbs = 0;
2665 	uint32_t tb_size = 0;
2666 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2667 
2668 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
2669 		num_cbs = 1;
2670 	else
2671 		num_cbs = op->ldpc_dec.tb_params.c - op->ldpc_dec.tb_params.r;
2672 
2673 	tb_size = (sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler) * num_cbs;
2674 	return tb_size;
2675 }
2676 
2677 static uint32_t
2678 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2679 {
2680 	uint8_t i;
2681 	uint32_t c, r, tb_size = 0;
2682 
2683 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2684 		tb_size = op->turbo_enc.tb_params.k_neg;
2685 	} else {
2686 		c = op->turbo_enc.tb_params.c;
2687 		r = op->turbo_enc.tb_params.r;
2688 		for (i = 0; i < c-r; i++)
2689 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2690 				op->turbo_enc.tb_params.k_neg :
2691 				op->turbo_enc.tb_params.k_pos;
2692 	}
2693 	return tb_size;
2694 }
2695 
2696 static uint32_t
2697 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2698 {
2699 	uint8_t num_cbs = 0;
2700 	uint32_t tb_size = 0;
2701 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2702 
2703 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK)
2704 		num_cbs = 1;
2705 	else
2706 		num_cbs = op->ldpc_enc.tb_params.c - op->ldpc_enc.tb_params.r;
2707 
2708 	tb_size = (sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler) * num_cbs;
2709 	return tb_size;
2710 }
2711 
2712 static uint32_t
2713 calc_fft_size(struct rte_bbdev_fft_op *op)
2714 {
2715 	uint32_t output_size;
2716 	int num_cs = 0, i;
2717 	for (i = 0; i < 12; i++)
2718 		if (check_bit(op->fft.cs_bitmap, 1 << i))
2719 			num_cs++;
2720 	output_size = (num_cs * op->fft.output_sequence_size * 4) << op->fft.num_antennas_log2;
2721 	return output_size;
2722 }
2723 
2724 static int
2725 init_test_op_params(struct test_op_params *op_params,
2726 		enum rte_bbdev_op_type op_type, const int expected_status,
2727 		const int vector_mask, struct rte_mempool *ops_mp,
2728 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2729 {
2730 	int ret = 0;
2731 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2732 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2733 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2734 				&op_params->ref_dec_op, 1);
2735 	else if (op_type == RTE_BBDEV_OP_FFT)
2736 		ret = rte_bbdev_fft_op_alloc_bulk(ops_mp,
2737 				&op_params->ref_fft_op, 1);
2738 	else
2739 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2740 				&op_params->ref_enc_op, 1);
2741 
2742 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2743 
2744 	op_params->mp = ops_mp;
2745 	op_params->burst_sz = burst_sz;
2746 	op_params->num_to_process = num_to_process;
2747 	op_params->num_lcores = num_lcores;
2748 	op_params->vector_mask = vector_mask;
2749 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2750 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2751 		op_params->ref_dec_op->status = expected_status;
2752 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2753 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2754 		op_params->ref_enc_op->status = expected_status;
2755 	else if (op_type == RTE_BBDEV_OP_FFT)
2756 		op_params->ref_fft_op->status = expected_status;
2757 	return 0;
2758 }
2759 
2760 static int
2761 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2762 		struct test_op_params *op_params)
2763 {
2764 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2765 	unsigned int i;
2766 	struct active_device *ad;
2767 	unsigned int burst_sz = get_burst_sz();
2768 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2769 	const struct rte_bbdev_op_cap *capabilities = NULL;
2770 
2771 	ad = &active_devs[dev_id];
2772 
2773 	/* Check if device supports op_type */
2774 	if (!is_avail_op(ad, test_vector.op_type))
2775 		return TEST_SUCCESS;
2776 
2777 	struct rte_bbdev_info info;
2778 	rte_bbdev_info_get(ad->dev_id, &info);
2779 	socket_id = GET_SOCKET(info.socket_id);
2780 
2781 	f_ret = create_mempools(ad, socket_id, op_type,
2782 			get_num_ops());
2783 	if (f_ret != TEST_SUCCESS) {
2784 		printf("Couldn't create mempools");
2785 		goto fail;
2786 	}
2787 	if (op_type == RTE_BBDEV_OP_NONE)
2788 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2789 
2790 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2791 			test_vector.expected_status,
2792 			test_vector.mask,
2793 			ad->ops_mempool,
2794 			burst_sz,
2795 			get_num_ops(),
2796 			get_num_lcores());
2797 	if (f_ret != TEST_SUCCESS) {
2798 		printf("Couldn't init test op params");
2799 		goto fail;
2800 	}
2801 
2802 
2803 	/* Find capabilities */
2804 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2805 	do {
2806 		if (cap->type == test_vector.op_type) {
2807 			capabilities = cap;
2808 			break;
2809 		}
2810 		cap++;
2811 	} while (cap->type != RTE_BBDEV_OP_NONE);
2812 	TEST_ASSERT_NOT_NULL(capabilities,
2813 			"Couldn't find capabilities");
2814 
2815 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2816 		create_reference_dec_op(op_params->ref_dec_op);
2817 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2818 		create_reference_enc_op(op_params->ref_enc_op);
2819 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2820 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2821 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2822 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2823 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
2824 		create_reference_fft_op(op_params->ref_fft_op);
2825 
2826 	for (i = 0; i < ad->nb_queues; ++i) {
2827 		f_ret = fill_queue_buffers(op_params,
2828 				ad->in_mbuf_pool,
2829 				ad->hard_out_mbuf_pool,
2830 				ad->soft_out_mbuf_pool,
2831 				ad->harq_in_mbuf_pool,
2832 				ad->harq_out_mbuf_pool,
2833 				ad->queue_ids[i],
2834 				capabilities,
2835 				info.drv.min_alignment,
2836 				socket_id);
2837 		if (f_ret != TEST_SUCCESS) {
2838 			printf("Couldn't init queue buffers");
2839 			goto fail;
2840 		}
2841 	}
2842 
2843 	/* Run test case function */
2844 	t_ret = test_case_func(ad, op_params);
2845 
2846 	/* Free active device resources and return */
2847 	free_buffers(ad, op_params);
2848 	return t_ret;
2849 
2850 fail:
2851 	free_buffers(ad, op_params);
2852 	return TEST_FAILED;
2853 }
2854 
2855 /* Run given test function per active device per supported op type
2856  * per burst size.
2857  */
2858 static int
2859 run_test_case(test_case_function *test_case_func)
2860 {
2861 	int ret = 0;
2862 	uint8_t dev;
2863 
2864 	/* Alloc op_params */
2865 	struct test_op_params *op_params = rte_zmalloc(NULL,
2866 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2867 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2868 			RTE_ALIGN(sizeof(struct test_op_params),
2869 				RTE_CACHE_LINE_SIZE));
2870 
2871 	/* For each device run test case function */
2872 	for (dev = 0; dev < nb_active_devs; ++dev)
2873 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2874 
2875 	rte_free(op_params);
2876 
2877 	return ret;
2878 }
2879 
2880 
2881 /* Push back the HARQ output from DDR to host */
2882 static void
2883 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2884 		struct rte_bbdev_dec_op **ops,
2885 		const uint16_t n)
2886 {
2887 	uint16_t j;
2888 	int save_status, ret;
2889 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2890 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2891 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2892 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2893 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2894 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2895 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2896 	for (j = 0; j < n; ++j) {
2897 		if ((loopback && mem_out) || hc_out) {
2898 			save_status = ops[j]->status;
2899 			ops[j]->ldpc_dec.op_flags =
2900 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2901 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2902 			if (h_comp)
2903 				ops[j]->ldpc_dec.op_flags +=
2904 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2905 			ops[j]->ldpc_dec.harq_combined_input.offset =
2906 					harq_offset;
2907 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2908 			harq_offset += HARQ_INCR;
2909 			if (!loopback)
2910 				ops[j]->ldpc_dec.harq_combined_input.length =
2911 				ops[j]->ldpc_dec.harq_combined_output.length;
2912 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2913 					&ops[j], 1);
2914 			ret = 0;
2915 			while (ret == 0)
2916 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2917 						dev_id, queue_id,
2918 						&ops_deq[j], 1);
2919 			ops[j]->ldpc_dec.op_flags = flags;
2920 			ops[j]->status = save_status;
2921 		}
2922 	}
2923 }
2924 
2925 /*
2926  * Push back the HARQ output from HW DDR to Host
2927  * Preload HARQ memory input and adjust HARQ offset
2928  */
2929 static void
2930 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2931 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2932 		bool preload)
2933 {
2934 	uint16_t j;
2935 	int deq;
2936 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2937 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2938 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2939 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2940 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2941 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2942 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2943 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2944 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2945 	if ((mem_in || hc_in) && preload) {
2946 		for (j = 0; j < n; ++j) {
2947 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2948 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2949 			ops[j]->ldpc_dec.op_flags =
2950 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2951 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2952 			if (h_comp)
2953 				ops[j]->ldpc_dec.op_flags +=
2954 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2955 			ops[j]->ldpc_dec.harq_combined_output.offset =
2956 					harq_offset;
2957 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2958 			harq_offset += HARQ_INCR;
2959 		}
2960 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2961 		deq = 0;
2962 		while (deq != n)
2963 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2964 					dev_id, queue_id, &ops_deq[deq],
2965 					n - deq);
2966 		/* Restore the operations */
2967 		for (j = 0; j < n; ++j) {
2968 			ops[j]->ldpc_dec.op_flags = flags;
2969 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2970 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2971 		}
2972 	}
2973 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2974 	for (j = 0; j < n; ++j) {
2975 		/* Adjust HARQ offset when we reach external DDR */
2976 		if (mem_in || hc_in)
2977 			ops[j]->ldpc_dec.harq_combined_input.offset
2978 				= harq_offset;
2979 		if (mem_out || hc_out)
2980 			ops[j]->ldpc_dec.harq_combined_output.offset
2981 				= harq_offset;
2982 		harq_offset += HARQ_INCR;
2983 	}
2984 }
2985 
2986 static void
2987 dequeue_event_callback(uint16_t dev_id,
2988 		enum rte_bbdev_event_type event, void *cb_arg,
2989 		void *ret_param)
2990 {
2991 	int ret;
2992 	uint16_t i;
2993 	uint64_t total_time;
2994 	uint16_t deq, burst_sz, num_ops;
2995 	uint16_t queue_id = *(uint16_t *) ret_param;
2996 	struct rte_bbdev_info info;
2997 	double tb_len_bits;
2998 	struct thread_params *tp = cb_arg;
2999 
3000 	/* Find matching thread params using queue_id */
3001 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
3002 		if (tp->queue_id == queue_id)
3003 			break;
3004 
3005 	if (i == MAX_QUEUES) {
3006 		printf("%s: Queue_id from interrupt details was not found!\n",
3007 				__func__);
3008 		return;
3009 	}
3010 
3011 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
3012 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3013 		printf(
3014 			"Dequeue interrupt handler called for incorrect event!\n");
3015 		return;
3016 	}
3017 
3018 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
3019 	num_ops = tp->op_params->num_to_process;
3020 
3021 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3022 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3023 				&tp->dec_ops[
3024 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3025 				burst_sz);
3026 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3027 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3028 				&tp->dec_ops[
3029 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3030 				burst_sz);
3031 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3032 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3033 				&tp->enc_ops[
3034 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3035 				burst_sz);
3036 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
3037 		deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
3038 				&tp->fft_ops[
3039 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3040 				burst_sz);
3041 	else /*RTE_BBDEV_OP_TURBO_ENC*/
3042 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3043 				&tp->enc_ops[
3044 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3045 				burst_sz);
3046 
3047 	if (deq < burst_sz) {
3048 		printf(
3049 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
3050 			burst_sz, deq);
3051 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3052 		return;
3053 	}
3054 
3055 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
3056 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
3057 		return;
3058 	}
3059 
3060 	total_time = rte_rdtsc_precise() - tp->start_time;
3061 
3062 	rte_bbdev_info_get(dev_id, &info);
3063 
3064 	ret = TEST_SUCCESS;
3065 
3066 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
3067 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3068 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op);
3069 		/* get the max of iter_count for all dequeued ops */
3070 		for (i = 0; i < num_ops; ++i)
3071 			tp->iter_count = RTE_MAX(
3072 					tp->dec_ops[i]->turbo_dec.iter_count,
3073 					tp->iter_count);
3074 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
3075 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
3076 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3077 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
3078 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3079 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
3080 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3081 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
3082 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3083 	} else if (test_vector.op_type == RTE_BBDEV_OP_FFT) {
3084 		struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
3085 		ret = validate_fft_op(tp->fft_ops, num_ops, ref_op);
3086 		rte_bbdev_fft_op_free_bulk(tp->fft_ops, deq);
3087 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
3088 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3089 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
3090 				tp->op_params->vector_mask);
3091 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
3092 	}
3093 
3094 	if (ret) {
3095 		printf("Buffers validation failed\n");
3096 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3097 	}
3098 
3099 	switch (test_vector.op_type) {
3100 	case RTE_BBDEV_OP_TURBO_DEC:
3101 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
3102 		break;
3103 	case RTE_BBDEV_OP_TURBO_ENC:
3104 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
3105 		break;
3106 	case RTE_BBDEV_OP_LDPC_DEC:
3107 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
3108 		break;
3109 	case RTE_BBDEV_OP_FFT:
3110 		tb_len_bits = calc_fft_size(tp->op_params->ref_fft_op);
3111 		break;
3112 	case RTE_BBDEV_OP_LDPC_ENC:
3113 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
3114 		break;
3115 	case RTE_BBDEV_OP_NONE:
3116 		tb_len_bits = 0.0;
3117 		break;
3118 	default:
3119 		printf("Unknown op type: %d\n", test_vector.op_type);
3120 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3121 		return;
3122 	}
3123 
3124 	tp->ops_per_sec += ((double)num_ops) /
3125 			((double)total_time / (double)rte_get_tsc_hz());
3126 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
3127 			((double)total_time / (double)rte_get_tsc_hz());
3128 
3129 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
3130 }
3131 
3132 static int
3133 throughput_intr_lcore_ldpc_dec(void *arg)
3134 {
3135 	struct thread_params *tp = arg;
3136 	unsigned int enqueued;
3137 	const uint16_t queue_id = tp->queue_id;
3138 	const uint16_t burst_sz = tp->op_params->burst_sz;
3139 	const uint16_t num_to_process = tp->op_params->num_to_process;
3140 	struct rte_bbdev_dec_op *ops[num_to_process];
3141 	struct test_buffers *bufs = NULL;
3142 	struct rte_bbdev_info info;
3143 	int ret, i, j;
3144 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3145 	uint16_t num_to_enq, enq;
3146 
3147 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3148 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3149 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3150 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3151 
3152 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3153 			"BURST_SIZE should be <= %u", MAX_BURST);
3154 
3155 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3156 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3157 			tp->dev_id, queue_id);
3158 
3159 	rte_bbdev_info_get(tp->dev_id, &info);
3160 
3161 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3162 			"NUM_OPS cannot exceed %u for this device",
3163 			info.drv.queue_size_lim);
3164 
3165 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3166 
3167 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3168 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3169 
3170 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3171 
3172 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3173 				num_to_process);
3174 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3175 			num_to_process);
3176 	ref_op->ldpc_dec.iter_max = get_iter_max();
3177 
3178 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3179 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
3180 				bufs->hard_outputs, bufs->soft_outputs,
3181 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3182 
3183 	/* Set counter to validate the ordering */
3184 	for (j = 0; j < num_to_process; ++j)
3185 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3186 
3187 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3188 		for (i = 0; i < num_to_process; ++i) {
3189 			if (!loopback)
3190 				mbuf_reset(ops[i]->ldpc_dec.hard_output.data);
3191 			if (hc_out || loopback)
3192 				mbuf_reset(ops[i]->ldpc_dec.harq_combined_output.data);
3193 			if (ops[i]->ldpc_dec.soft_output.data != NULL)
3194 				mbuf_reset(ops[i]->ldpc_dec.soft_output.data);
3195 		}
3196 
3197 		tp->start_time = rte_rdtsc_precise();
3198 		for (enqueued = 0; enqueued < num_to_process;) {
3199 			num_to_enq = burst_sz;
3200 
3201 			if (unlikely(num_to_process - enqueued < num_to_enq))
3202 				num_to_enq = num_to_process - enqueued;
3203 
3204 			enq = 0;
3205 			do {
3206 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
3207 						tp->dev_id,
3208 						queue_id, &ops[enqueued],
3209 						num_to_enq);
3210 			} while (unlikely(num_to_enq != enq));
3211 			enqueued += enq;
3212 
3213 			/* Write to thread burst_sz current number of enqueued
3214 			 * descriptors. It ensures that proper number of
3215 			 * descriptors will be dequeued in callback
3216 			 * function - needed for last batch in case where
3217 			 * the number of operations is not a multiple of
3218 			 * burst size.
3219 			 */
3220 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3221 
3222 			/* Wait until processing of previous batch is
3223 			 * completed
3224 			 */
3225 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3226 		}
3227 		if (j != TEST_REPETITIONS - 1)
3228 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3229 	}
3230 
3231 	return TEST_SUCCESS;
3232 }
3233 
3234 static int
3235 throughput_intr_lcore_dec(void *arg)
3236 {
3237 	struct thread_params *tp = arg;
3238 	unsigned int enqueued;
3239 	const uint16_t queue_id = tp->queue_id;
3240 	const uint16_t burst_sz = tp->op_params->burst_sz;
3241 	const uint16_t num_to_process = tp->op_params->num_to_process;
3242 	struct rte_bbdev_dec_op *ops[num_to_process];
3243 	struct test_buffers *bufs = NULL;
3244 	struct rte_bbdev_info info;
3245 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3246 	int ret, i, j;
3247 	uint16_t num_to_enq, enq;
3248 
3249 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3250 			"BURST_SIZE should be <= %u", MAX_BURST);
3251 
3252 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3253 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3254 			tp->dev_id, queue_id);
3255 
3256 	rte_bbdev_info_get(tp->dev_id, &info);
3257 
3258 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3259 			"NUM_OPS cannot exceed %u for this device",
3260 			info.drv.queue_size_lim);
3261 
3262 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3263 
3264 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3265 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3266 
3267 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3268 
3269 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3270 				num_to_process);
3271 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_to_process);
3272 	ref_op->turbo_dec.iter_max = get_iter_max();
3273 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3274 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
3275 				bufs->hard_outputs, bufs->soft_outputs,
3276 				tp->op_params->ref_dec_op);
3277 
3278 	/* Set counter to validate the ordering. */
3279 	for (j = 0; j < num_to_process; ++j)
3280 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3281 
3282 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3283 		for (i = 0; i < num_to_process; ++i) {
3284 			mbuf_reset(ops[i]->turbo_dec.hard_output.data);
3285 			if (ops[i]->turbo_dec.soft_output.data != NULL)
3286 				mbuf_reset(ops[i]->turbo_dec.soft_output.data);
3287 		}
3288 
3289 		tp->start_time = rte_rdtsc_precise();
3290 		for (enqueued = 0; enqueued < num_to_process;) {
3291 			num_to_enq = burst_sz;
3292 
3293 			if (unlikely(num_to_process - enqueued < num_to_enq))
3294 				num_to_enq = num_to_process - enqueued;
3295 
3296 			enq = 0;
3297 			do {
3298 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3299 						queue_id, &ops[enqueued],
3300 						num_to_enq);
3301 			} while (unlikely(num_to_enq != enq));
3302 			enqueued += enq;
3303 
3304 			/* Write to thread burst_sz current number of enqueued
3305 			 * descriptors. It ensures that proper number of
3306 			 * descriptors will be dequeued in callback
3307 			 * function - needed for last batch in case where
3308 			 * the number of operations is not a multiple of
3309 			 * burst size.
3310 			 */
3311 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3312 
3313 			/* Wait until processing of previous batch is
3314 			 * completed
3315 			 */
3316 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3317 		}
3318 		if (j != TEST_REPETITIONS - 1)
3319 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3320 	}
3321 
3322 	return TEST_SUCCESS;
3323 }
3324 
3325 static int
3326 throughput_intr_lcore_enc(void *arg)
3327 {
3328 	struct thread_params *tp = arg;
3329 	unsigned int enqueued;
3330 	const uint16_t queue_id = tp->queue_id;
3331 	const uint16_t burst_sz = tp->op_params->burst_sz;
3332 	const uint16_t num_to_process = tp->op_params->num_to_process;
3333 	struct rte_bbdev_enc_op *ops[num_to_process];
3334 	struct test_buffers *bufs = NULL;
3335 	struct rte_bbdev_info info;
3336 	int ret, i, j;
3337 	uint16_t num_to_enq, enq;
3338 
3339 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3340 			"BURST_SIZE should be <= %u", MAX_BURST);
3341 
3342 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3343 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3344 			tp->dev_id, queue_id);
3345 
3346 	rte_bbdev_info_get(tp->dev_id, &info);
3347 
3348 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3349 			"NUM_OPS cannot exceed %u for this device",
3350 			info.drv.queue_size_lim);
3351 
3352 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3353 
3354 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3355 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3356 
3357 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3358 
3359 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3360 			num_to_process);
3361 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3362 			num_to_process);
3363 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3364 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
3365 				bufs->hard_outputs, tp->op_params->ref_enc_op);
3366 
3367 	/* Set counter to validate the ordering */
3368 	for (j = 0; j < num_to_process; ++j)
3369 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3370 
3371 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3372 		for (i = 0; i < num_to_process; ++i)
3373 			mbuf_reset(ops[i]->turbo_enc.output.data);
3374 
3375 		tp->start_time = rte_rdtsc_precise();
3376 		for (enqueued = 0; enqueued < num_to_process;) {
3377 			num_to_enq = burst_sz;
3378 
3379 			if (unlikely(num_to_process - enqueued < num_to_enq))
3380 				num_to_enq = num_to_process - enqueued;
3381 
3382 			enq = 0;
3383 			do {
3384 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3385 						queue_id, &ops[enqueued],
3386 						num_to_enq);
3387 			} while (unlikely(enq != num_to_enq));
3388 			enqueued += enq;
3389 
3390 			/* Write to thread burst_sz current number of enqueued
3391 			 * descriptors. It ensures that proper number of
3392 			 * descriptors will be dequeued in callback
3393 			 * function - needed for last batch in case where
3394 			 * the number of operations is not a multiple of
3395 			 * burst size.
3396 			 */
3397 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3398 
3399 			/* Wait until processing of previous batch is
3400 			 * completed
3401 			 */
3402 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3403 		}
3404 		if (j != TEST_REPETITIONS - 1)
3405 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3406 	}
3407 
3408 	return TEST_SUCCESS;
3409 }
3410 
3411 
3412 static int
3413 throughput_intr_lcore_ldpc_enc(void *arg)
3414 {
3415 	struct thread_params *tp = arg;
3416 	unsigned int enqueued;
3417 	const uint16_t queue_id = tp->queue_id;
3418 	const uint16_t burst_sz = tp->op_params->burst_sz;
3419 	const uint16_t num_to_process = tp->op_params->num_to_process;
3420 	struct rte_bbdev_enc_op *ops[num_to_process];
3421 	struct test_buffers *bufs = NULL;
3422 	struct rte_bbdev_info info;
3423 	int ret, i, j;
3424 	uint16_t num_to_enq, enq;
3425 
3426 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3427 			"BURST_SIZE should be <= %u", MAX_BURST);
3428 
3429 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3430 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3431 			tp->dev_id, queue_id);
3432 
3433 	rte_bbdev_info_get(tp->dev_id, &info);
3434 
3435 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3436 			"NUM_OPS cannot exceed %u for this device",
3437 			info.drv.queue_size_lim);
3438 
3439 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3440 
3441 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3442 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3443 
3444 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3445 
3446 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3447 			num_to_process);
3448 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3449 			num_to_process);
3450 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3451 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3452 				bufs->inputs, bufs->hard_outputs,
3453 				tp->op_params->ref_enc_op);
3454 
3455 	/* Set counter to validate the ordering */
3456 	for (j = 0; j < num_to_process; ++j)
3457 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3458 
3459 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3460 		for (i = 0; i < num_to_process; ++i)
3461 			mbuf_reset(ops[i]->turbo_enc.output.data);
3462 
3463 		tp->start_time = rte_rdtsc_precise();
3464 		for (enqueued = 0; enqueued < num_to_process;) {
3465 			num_to_enq = burst_sz;
3466 
3467 			if (unlikely(num_to_process - enqueued < num_to_enq))
3468 				num_to_enq = num_to_process - enqueued;
3469 
3470 			enq = 0;
3471 			do {
3472 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3473 						tp->dev_id,
3474 						queue_id, &ops[enqueued],
3475 						num_to_enq);
3476 			} while (unlikely(enq != num_to_enq));
3477 			enqueued += enq;
3478 
3479 			/* Write to thread burst_sz current number of enqueued
3480 			 * descriptors. It ensures that proper number of
3481 			 * descriptors will be dequeued in callback
3482 			 * function - needed for last batch in case where
3483 			 * the number of operations is not a multiple of
3484 			 * burst size.
3485 			 */
3486 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3487 
3488 			/* Wait until processing of previous batch is
3489 			 * completed
3490 			 */
3491 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3492 		}
3493 		if (j != TEST_REPETITIONS - 1)
3494 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3495 	}
3496 
3497 	return TEST_SUCCESS;
3498 }
3499 
3500 
3501 static int
3502 throughput_intr_lcore_fft(void *arg)
3503 {
3504 	struct thread_params *tp = arg;
3505 	unsigned int enqueued;
3506 	const uint16_t queue_id = tp->queue_id;
3507 	const uint16_t burst_sz = tp->op_params->burst_sz;
3508 	const uint16_t num_to_process = tp->op_params->num_to_process;
3509 	struct rte_bbdev_fft_op *ops[num_to_process];
3510 	struct test_buffers *bufs = NULL;
3511 	struct rte_bbdev_info info;
3512 	int ret, i, j;
3513 	uint16_t num_to_enq, enq;
3514 
3515 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3516 			"BURST_SIZE should be <= %u", MAX_BURST);
3517 
3518 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3519 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3520 			tp->dev_id, queue_id);
3521 
3522 	rte_bbdev_info_get(tp->dev_id, &info);
3523 
3524 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3525 			"NUM_OPS cannot exceed %u for this device",
3526 			info.drv.queue_size_lim);
3527 
3528 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3529 
3530 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3531 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3532 
3533 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3534 
3535 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops,
3536 			num_to_process);
3537 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3538 			num_to_process);
3539 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3540 		copy_reference_fft_op(ops, num_to_process, 0, bufs->inputs,
3541 				bufs->hard_outputs, bufs->soft_outputs, tp->op_params->ref_fft_op);
3542 
3543 	/* Set counter to validate the ordering */
3544 	for (j = 0; j < num_to_process; ++j)
3545 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3546 
3547 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3548 		for (i = 0; i < num_to_process; ++i)
3549 			mbuf_reset(ops[i]->fft.base_output.data);
3550 
3551 		tp->start_time = rte_rdtsc_precise();
3552 		for (enqueued = 0; enqueued < num_to_process;) {
3553 			num_to_enq = burst_sz;
3554 
3555 			if (unlikely(num_to_process - enqueued < num_to_enq))
3556 				num_to_enq = num_to_process - enqueued;
3557 
3558 			enq = 0;
3559 			do {
3560 				enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
3561 						queue_id, &ops[enqueued],
3562 						num_to_enq);
3563 			} while (unlikely(enq != num_to_enq));
3564 			enqueued += enq;
3565 
3566 			/* Write to thread burst_sz current number of enqueued
3567 			 * descriptors. It ensures that proper number of
3568 			 * descriptors will be dequeued in callback
3569 			 * function - needed for last batch in case where
3570 			 * the number of operations is not a multiple of
3571 			 * burst size.
3572 			 */
3573 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3574 
3575 			/* Wait until processing of previous batch is
3576 			 * completed
3577 			 */
3578 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3579 		}
3580 		if (j != TEST_REPETITIONS - 1)
3581 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3582 	}
3583 
3584 	return TEST_SUCCESS;
3585 }
3586 
3587 static int
3588 throughput_pmd_lcore_dec(void *arg)
3589 {
3590 	struct thread_params *tp = arg;
3591 	uint16_t enq, deq;
3592 	uint64_t total_time = 0, start_time;
3593 	const uint16_t queue_id = tp->queue_id;
3594 	const uint16_t burst_sz = tp->op_params->burst_sz;
3595 	const uint16_t num_ops = tp->op_params->num_to_process;
3596 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3597 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3598 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3599 	struct test_buffers *bufs = NULL;
3600 	int i, j, ret;
3601 	struct rte_bbdev_info info;
3602 	uint16_t num_to_enq;
3603 	bool so_enable;
3604 
3605 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3606 			"BURST_SIZE should be <= %u", MAX_BURST);
3607 
3608 	rte_bbdev_info_get(tp->dev_id, &info);
3609 
3610 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3611 			"NUM_OPS cannot exceed %u for this device",
3612 			info.drv.queue_size_lim);
3613 
3614 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3615 
3616 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3617 
3618 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3619 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3620 	ref_op->turbo_dec.iter_max = get_iter_max();
3621 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3622 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3623 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3624 
3625 	so_enable = check_bit(ops_enq[0]->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT);
3626 
3627 	/* Set counter to validate the ordering */
3628 	for (j = 0; j < num_ops; ++j)
3629 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3630 
3631 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3632 		uint32_t time_out = 0;
3633 		for (j = 0; j < num_ops; ++j)
3634 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3635 		if (so_enable)
3636 			for (j = 0; j < num_ops; ++j)
3637 				mbuf_reset(ops_enq[j]->turbo_dec.soft_output.data);
3638 
3639 		start_time = rte_rdtsc_precise();
3640 
3641 		for (enq = 0, deq = 0; enq < num_ops;) {
3642 			num_to_enq = burst_sz;
3643 
3644 			if (unlikely(num_ops - enq < num_to_enq))
3645 				num_to_enq = num_ops - enq;
3646 
3647 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3648 					queue_id, &ops_enq[enq], num_to_enq);
3649 
3650 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3651 					queue_id, &ops_deq[deq], enq - deq);
3652 			time_out++;
3653 			if (time_out >= TIME_OUT_POLL) {
3654 				timeout_exit(tp->dev_id);
3655 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3656 			}
3657 		}
3658 
3659 		/* dequeue the remaining */
3660 		time_out = 0;
3661 		while (deq < enq) {
3662 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3663 					queue_id, &ops_deq[deq], enq - deq);
3664 			time_out++;
3665 			if (time_out >= TIME_OUT_POLL) {
3666 				timeout_exit(tp->dev_id);
3667 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3668 			}
3669 		}
3670 
3671 		total_time += rte_rdtsc_precise() - start_time;
3672 	}
3673 
3674 	tp->iter_count = 0;
3675 	/* get the max of iter_count for all dequeued ops */
3676 	for (i = 0; i < num_ops; ++i) {
3677 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3678 				tp->iter_count);
3679 	}
3680 
3681 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3682 		ret = validate_dec_op(ops_deq, num_ops, ref_op);
3683 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3684 	}
3685 
3686 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3687 
3688 	double tb_len_bits = calc_dec_TB_size(ref_op);
3689 
3690 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3691 			((double)total_time / (double)rte_get_tsc_hz());
3692 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3693 			1000000.0) / ((double)total_time /
3694 			(double)rte_get_tsc_hz());
3695 
3696 	return TEST_SUCCESS;
3697 }
3698 
3699 static int
3700 bler_pmd_lcore_ldpc_dec(void *arg)
3701 {
3702 	struct thread_params *tp = arg;
3703 	uint16_t enq, deq;
3704 	uint64_t total_time = 0, start_time;
3705 	const uint16_t queue_id = tp->queue_id;
3706 	const uint16_t burst_sz = tp->op_params->burst_sz;
3707 	const uint16_t num_ops = tp->op_params->num_to_process;
3708 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3709 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3710 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3711 	struct test_buffers *bufs = NULL;
3712 	int i, j, ret;
3713 	float parity_bler = 0;
3714 	struct rte_bbdev_info info;
3715 	uint16_t num_to_enq;
3716 	bool extDdr = check_bit(ldpc_cap_flags,
3717 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3718 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3719 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3720 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3721 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3722 
3723 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3724 			"BURST_SIZE should be <= %u", MAX_BURST);
3725 	TEST_ASSERT_SUCCESS((num_ops == 0), "NUM_OPS must be greater than 0");
3726 
3727 	rte_bbdev_info_get(tp->dev_id, &info);
3728 
3729 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3730 			"NUM_OPS cannot exceed %u for this device",
3731 			info.drv.queue_size_lim);
3732 
3733 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3734 
3735 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3736 
3737 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3738 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3739 
3740 	/* For BLER tests we need to enable early termination */
3741 	if (!check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3742 		ref_op->ldpc_dec.op_flags += RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3743 
3744 	ref_op->ldpc_dec.iter_max = get_iter_max();
3745 
3746 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3747 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3748 				bufs->hard_outputs, bufs->soft_outputs,
3749 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3750 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3751 
3752 	/* Set counter to validate the ordering */
3753 	for (j = 0; j < num_ops; ++j)
3754 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3755 
3756 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3757 		uint32_t time_out = 0;
3758 		for (j = 0; j < num_ops; ++j) {
3759 			if (!loopback)
3760 				mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
3761 			if (hc_out || loopback)
3762 				mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data);
3763 			if (ops_enq[j]->ldpc_dec.soft_output.data != NULL)
3764 				mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data);
3765 		}
3766 		if (extDdr)
3767 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3768 					num_ops, true);
3769 		start_time = rte_rdtsc_precise();
3770 
3771 		for (enq = 0, deq = 0; enq < num_ops;) {
3772 			num_to_enq = burst_sz;
3773 
3774 			if (unlikely(num_ops - enq < num_to_enq))
3775 				num_to_enq = num_ops - enq;
3776 
3777 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3778 					queue_id, &ops_enq[enq], num_to_enq);
3779 
3780 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3781 					queue_id, &ops_deq[deq], enq - deq);
3782 			time_out++;
3783 			if (time_out >= TIME_OUT_POLL) {
3784 				timeout_exit(tp->dev_id);
3785 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3786 			}
3787 		}
3788 
3789 		/* dequeue the remaining */
3790 		time_out = 0;
3791 		while (deq < enq) {
3792 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3793 					queue_id, &ops_deq[deq], enq - deq);
3794 			time_out++;
3795 			if (time_out >= TIME_OUT_POLL) {
3796 				timeout_exit(tp->dev_id);
3797 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3798 			}
3799 		}
3800 
3801 		total_time += rte_rdtsc_precise() - start_time;
3802 	}
3803 
3804 	tp->iter_count = 0;
3805 	tp->iter_average = 0;
3806 	/* get the max of iter_count for all dequeued ops */
3807 	for (i = 0; i < num_ops; ++i) {
3808 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3809 				tp->iter_count);
3810 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3811 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3812 			parity_bler += 1.0;
3813 	}
3814 
3815 	parity_bler /= num_ops; /* This one is based on SYND */
3816 	tp->iter_average /= num_ops;
3817 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3818 
3819 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3820 			&& tp->bler == 0
3821 			&& parity_bler == 0
3822 			&& !hc_out) {
3823 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3824 				tp->op_params->vector_mask);
3825 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3826 	}
3827 
3828 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3829 
3830 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3831 	tp->ops_per_sec = ((double)num_ops * 1) /
3832 			((double)total_time / (double)rte_get_tsc_hz());
3833 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3834 			1000000.0) / ((double)total_time /
3835 			(double)rte_get_tsc_hz());
3836 
3837 	return TEST_SUCCESS;
3838 }
3839 
3840 
3841 static int
3842 bler_pmd_lcore_turbo_dec(void *arg)
3843 {
3844 	struct thread_params *tp = arg;
3845 	uint16_t enq, deq;
3846 	uint64_t total_time = 0, start_time;
3847 	const uint16_t queue_id = tp->queue_id;
3848 	const uint16_t burst_sz = tp->op_params->burst_sz;
3849 	const uint16_t num_ops = tp->op_params->num_to_process;
3850 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3851 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3852 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3853 	struct test_buffers *bufs = NULL;
3854 	int i, j, ret;
3855 	struct rte_bbdev_info info;
3856 	uint16_t num_to_enq;
3857 
3858 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3859 			"BURST_SIZE should be <= %u", MAX_BURST);
3860 	TEST_ASSERT_SUCCESS((num_ops == 0), "NUM_OPS must be greater than 0");
3861 
3862 	rte_bbdev_info_get(tp->dev_id, &info);
3863 
3864 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3865 			"NUM_OPS cannot exceed %u for this device",
3866 			info.drv.queue_size_lim);
3867 
3868 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3869 
3870 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3871 
3872 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3873 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3874 
3875 	/* For BLER tests we need to enable early termination */
3876 	if (!check_bit(ref_op->turbo_dec.op_flags, RTE_BBDEV_TURBO_EARLY_TERMINATION))
3877 		ref_op->turbo_dec.op_flags += RTE_BBDEV_TURBO_EARLY_TERMINATION;
3878 
3879 	ref_op->turbo_dec.iter_max = get_iter_max();
3880 
3881 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3882 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3883 				bufs->hard_outputs, bufs->soft_outputs,
3884 				ref_op);
3885 	generate_turbo_llr_input(num_ops, bufs->inputs, ref_op);
3886 
3887 	/* Set counter to validate the ordering */
3888 	for (j = 0; j < num_ops; ++j)
3889 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3890 
3891 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3892 		uint32_t time_out = 0;
3893 		for (j = 0; j < num_ops; ++j) {
3894 			mbuf_reset(
3895 			ops_enq[j]->turbo_dec.hard_output.data);
3896 		}
3897 
3898 		start_time = rte_rdtsc_precise();
3899 
3900 		for (enq = 0, deq = 0; enq < num_ops;) {
3901 			num_to_enq = burst_sz;
3902 
3903 			if (unlikely(num_ops - enq < num_to_enq))
3904 				num_to_enq = num_ops - enq;
3905 
3906 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3907 					queue_id, &ops_enq[enq], num_to_enq);
3908 
3909 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3910 					queue_id, &ops_deq[deq], enq - deq);
3911 			time_out++;
3912 			if (time_out >= TIME_OUT_POLL) {
3913 				timeout_exit(tp->dev_id);
3914 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3915 			}
3916 		}
3917 
3918 		/* dequeue the remaining */
3919 		time_out = 0;
3920 		while (deq < enq) {
3921 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3922 					queue_id, &ops_deq[deq], enq - deq);
3923 			time_out++;
3924 			if (time_out >= TIME_OUT_POLL) {
3925 				timeout_exit(tp->dev_id);
3926 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3927 			}
3928 		}
3929 
3930 		total_time += rte_rdtsc_precise() - start_time;
3931 	}
3932 
3933 	tp->iter_count = 0;
3934 	tp->iter_average = 0;
3935 	/* get the max of iter_count for all dequeued ops */
3936 	for (i = 0; i < num_ops; ++i) {
3937 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3938 				tp->iter_count);
3939 		tp->iter_average += (double) ops_enq[i]->turbo_dec.iter_count;
3940 	}
3941 
3942 	tp->iter_average /= num_ops;
3943 	tp->bler = (double) validate_turbo_bler(ops_deq, num_ops) / num_ops;
3944 
3945 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3946 
3947 	double tb_len_bits = calc_dec_TB_size(ref_op);
3948 	tp->ops_per_sec = ((double)num_ops * 1) /
3949 			((double)total_time / (double)rte_get_tsc_hz());
3950 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3951 			1000000.0) / ((double)total_time /
3952 			(double)rte_get_tsc_hz());
3953 	printf("TBS %.0f Time %.0f\n", tb_len_bits, 1000000.0 *
3954 			((double)total_time / (double)rte_get_tsc_hz()));
3955 
3956 	return TEST_SUCCESS;
3957 }
3958 
3959 static int
3960 throughput_pmd_lcore_ldpc_dec(void *arg)
3961 {
3962 	struct thread_params *tp = arg;
3963 	uint16_t enq, deq;
3964 	uint64_t total_time = 0, start_time;
3965 	const uint16_t queue_id = tp->queue_id;
3966 	const uint16_t burst_sz = tp->op_params->burst_sz;
3967 	const uint16_t num_ops = tp->op_params->num_to_process;
3968 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3969 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3970 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3971 	struct test_buffers *bufs = NULL;
3972 	int i, j, ret;
3973 	struct rte_bbdev_info info;
3974 	uint16_t num_to_enq;
3975 	bool extDdr = check_bit(ldpc_cap_flags,
3976 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3977 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3978 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3979 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3980 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3981 
3982 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3983 			"BURST_SIZE should be <= %u", MAX_BURST);
3984 
3985 	rte_bbdev_info_get(tp->dev_id, &info);
3986 
3987 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3988 			"NUM_OPS cannot exceed %u for this device",
3989 			info.drv.queue_size_lim);
3990 
3991 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3992 
3993 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3994 
3995 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3996 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3997 
3998 	/* For throughput tests we need to disable early termination */
3999 	if (check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4000 		ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4001 
4002 	ref_op->ldpc_dec.iter_max = get_iter_max();
4003 	/* Since ET is disabled, the expected iter_count is iter_max */
4004 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4005 
4006 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4007 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
4008 				bufs->hard_outputs, bufs->soft_outputs,
4009 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
4010 
4011 	/* Set counter to validate the ordering */
4012 	for (j = 0; j < num_ops; ++j)
4013 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4014 
4015 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4016 		uint32_t time_out = 0;
4017 		for (j = 0; j < num_ops; ++j) {
4018 			if (!loopback)
4019 				mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
4020 			if (hc_out || loopback)
4021 				mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data);
4022 			if (ops_enq[j]->ldpc_dec.soft_output.data != NULL)
4023 				mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data);
4024 		}
4025 		if (extDdr)
4026 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
4027 					num_ops, true);
4028 		start_time = rte_rdtsc_precise();
4029 
4030 		for (enq = 0, deq = 0; enq < num_ops;) {
4031 			num_to_enq = burst_sz;
4032 
4033 			if (unlikely(num_ops - enq < num_to_enq))
4034 				num_to_enq = num_ops - enq;
4035 
4036 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
4037 					queue_id, &ops_enq[enq], num_to_enq);
4038 
4039 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
4040 					queue_id, &ops_deq[deq], enq - deq);
4041 			time_out++;
4042 			if (time_out >= TIME_OUT_POLL) {
4043 				timeout_exit(tp->dev_id);
4044 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4045 			}
4046 		}
4047 
4048 		/* dequeue the remaining */
4049 		time_out = 0;
4050 		while (deq < enq) {
4051 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
4052 					queue_id, &ops_deq[deq], enq - deq);
4053 			time_out++;
4054 			if (time_out >= TIME_OUT_POLL) {
4055 				timeout_exit(tp->dev_id);
4056 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4057 			}
4058 		}
4059 
4060 		total_time += rte_rdtsc_precise() - start_time;
4061 	}
4062 
4063 	tp->iter_count = 0;
4064 	/* get the max of iter_count for all dequeued ops */
4065 	for (i = 0; i < num_ops; ++i) {
4066 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
4067 				tp->iter_count);
4068 	}
4069 	if (extDdr) {
4070 		/* Read loopback is not thread safe */
4071 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
4072 	}
4073 
4074 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4075 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
4076 				tp->op_params->vector_mask);
4077 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4078 	}
4079 
4080 	ret = rte_bbdev_queue_stop(tp->dev_id, queue_id);
4081 	if (ret != 0)
4082 		printf("Failed to stop queue on dev %u q_id: %u\n", tp->dev_id, queue_id);
4083 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
4084 
4085 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
4086 
4087 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4088 			((double)total_time / (double)rte_get_tsc_hz());
4089 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
4090 			1000000.0) / ((double)total_time /
4091 			(double)rte_get_tsc_hz());
4092 
4093 	return TEST_SUCCESS;
4094 }
4095 
4096 static int
4097 throughput_pmd_lcore_enc(void *arg)
4098 {
4099 	struct thread_params *tp = arg;
4100 	uint16_t enq, deq;
4101 	uint64_t total_time = 0, start_time;
4102 	const uint16_t queue_id = tp->queue_id;
4103 	const uint16_t burst_sz = tp->op_params->burst_sz;
4104 	const uint16_t num_ops = tp->op_params->num_to_process;
4105 	struct rte_bbdev_enc_op *ops_enq[num_ops];
4106 	struct rte_bbdev_enc_op *ops_deq[num_ops];
4107 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
4108 	struct test_buffers *bufs = NULL;
4109 	int i, j, ret;
4110 	struct rte_bbdev_info info;
4111 	uint16_t num_to_enq;
4112 
4113 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4114 			"BURST_SIZE should be <= %u", MAX_BURST);
4115 
4116 	rte_bbdev_info_get(tp->dev_id, &info);
4117 
4118 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4119 			"NUM_OPS cannot exceed %u for this device",
4120 			info.drv.queue_size_lim);
4121 
4122 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4123 
4124 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4125 
4126 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
4127 			num_ops);
4128 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
4129 			num_ops);
4130 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4131 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
4132 				bufs->hard_outputs, ref_op);
4133 
4134 	/* Set counter to validate the ordering */
4135 	for (j = 0; j < num_ops; ++j)
4136 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4137 
4138 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4139 		uint32_t time_out = 0;
4140 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4141 			for (j = 0; j < num_ops; ++j)
4142 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
4143 
4144 		start_time = rte_rdtsc_precise();
4145 
4146 		for (enq = 0, deq = 0; enq < num_ops;) {
4147 			num_to_enq = burst_sz;
4148 
4149 			if (unlikely(num_ops - enq < num_to_enq))
4150 				num_to_enq = num_ops - enq;
4151 
4152 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
4153 					queue_id, &ops_enq[enq], num_to_enq);
4154 
4155 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
4156 					queue_id, &ops_deq[deq], enq - deq);
4157 			time_out++;
4158 			if (time_out >= TIME_OUT_POLL) {
4159 				timeout_exit(tp->dev_id);
4160 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4161 			}
4162 		}
4163 
4164 		/* dequeue the remaining */
4165 		time_out = 0;
4166 		while (deq < enq) {
4167 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
4168 					queue_id, &ops_deq[deq], enq - deq);
4169 			time_out++;
4170 			if (time_out >= TIME_OUT_POLL) {
4171 				timeout_exit(tp->dev_id);
4172 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4173 			}
4174 		}
4175 
4176 		total_time += rte_rdtsc_precise() - start_time;
4177 	}
4178 
4179 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4180 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
4181 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4182 	}
4183 
4184 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
4185 
4186 	double tb_len_bits = calc_enc_TB_size(ref_op);
4187 
4188 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4189 			((double)total_time / (double)rte_get_tsc_hz());
4190 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
4191 			/ 1000000.0) / ((double)total_time /
4192 			(double)rte_get_tsc_hz());
4193 
4194 	return TEST_SUCCESS;
4195 }
4196 
4197 static int
4198 throughput_pmd_lcore_ldpc_enc(void *arg)
4199 {
4200 	struct thread_params *tp = arg;
4201 	uint16_t enq, deq;
4202 	uint64_t total_time = 0, start_time;
4203 	const uint16_t queue_id = tp->queue_id;
4204 	const uint16_t burst_sz = tp->op_params->burst_sz;
4205 	const uint16_t num_ops = tp->op_params->num_to_process;
4206 	struct rte_bbdev_enc_op *ops_enq[num_ops];
4207 	struct rte_bbdev_enc_op *ops_deq[num_ops];
4208 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
4209 	struct test_buffers *bufs = NULL;
4210 	int i, j, ret;
4211 	struct rte_bbdev_info info;
4212 	uint16_t num_to_enq;
4213 
4214 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4215 			"BURST_SIZE should be <= %u", MAX_BURST);
4216 
4217 	rte_bbdev_info_get(tp->dev_id, &info);
4218 
4219 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4220 			"NUM_OPS cannot exceed %u for this device",
4221 			info.drv.queue_size_lim);
4222 
4223 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4224 
4225 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4226 
4227 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
4228 			num_ops);
4229 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
4230 			num_ops);
4231 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4232 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
4233 				bufs->hard_outputs, ref_op);
4234 
4235 	/* Set counter to validate the ordering */
4236 	for (j = 0; j < num_ops; ++j)
4237 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4238 
4239 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4240 		uint32_t time_out = 0;
4241 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4242 			for (j = 0; j < num_ops; ++j)
4243 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
4244 
4245 		start_time = rte_rdtsc_precise();
4246 
4247 		for (enq = 0, deq = 0; enq < num_ops;) {
4248 			num_to_enq = burst_sz;
4249 
4250 			if (unlikely(num_ops - enq < num_to_enq))
4251 				num_to_enq = num_ops - enq;
4252 
4253 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
4254 					queue_id, &ops_enq[enq], num_to_enq);
4255 
4256 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4257 					queue_id, &ops_deq[deq], enq - deq);
4258 			time_out++;
4259 			if (time_out >= TIME_OUT_POLL) {
4260 				timeout_exit(tp->dev_id);
4261 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4262 			}
4263 		}
4264 
4265 		/* dequeue the remaining */
4266 		time_out = 0;
4267 		while (deq < enq) {
4268 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4269 					queue_id, &ops_deq[deq], enq - deq);
4270 			time_out++;
4271 			if (time_out >= TIME_OUT_POLL) {
4272 				timeout_exit(tp->dev_id);
4273 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4274 			}
4275 		}
4276 
4277 		total_time += rte_rdtsc_precise() - start_time;
4278 	}
4279 
4280 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4281 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
4282 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4283 	}
4284 
4285 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
4286 
4287 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
4288 
4289 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4290 			((double)total_time / (double)rte_get_tsc_hz());
4291 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
4292 			/ 1000000.0) / ((double)total_time /
4293 			(double)rte_get_tsc_hz());
4294 
4295 	return TEST_SUCCESS;
4296 }
4297 
4298 static int
4299 throughput_pmd_lcore_fft(void *arg)
4300 {
4301 	struct thread_params *tp = arg;
4302 	uint16_t enq, deq;
4303 	uint64_t total_time = 0, start_time;
4304 	const uint16_t queue_id = tp->queue_id;
4305 	const uint16_t burst_sz = tp->op_params->burst_sz;
4306 	const uint16_t num_ops = tp->op_params->num_to_process;
4307 	struct rte_bbdev_fft_op *ops_enq[num_ops];
4308 	struct rte_bbdev_fft_op *ops_deq[num_ops];
4309 	struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
4310 	struct test_buffers *bufs = NULL;
4311 	int i, j, ret;
4312 	struct rte_bbdev_info info;
4313 	uint16_t num_to_enq;
4314 
4315 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4316 			"BURST_SIZE should be <= %u", MAX_BURST);
4317 
4318 	rte_bbdev_info_get(tp->dev_id, &info);
4319 
4320 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4321 			"NUM_OPS cannot exceed %u for this device",
4322 			info.drv.queue_size_lim);
4323 
4324 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4325 
4326 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4327 
4328 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
4329 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
4330 
4331 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4332 		copy_reference_fft_op(ops_enq, num_ops, 0, bufs->inputs,
4333 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
4334 
4335 	/* Set counter to validate the ordering */
4336 	for (j = 0; j < num_ops; ++j)
4337 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4338 
4339 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4340 		uint32_t time_out = 0;
4341 		for (j = 0; j < num_ops; ++j)
4342 			mbuf_reset(ops_enq[j]->fft.base_output.data);
4343 
4344 		start_time = rte_rdtsc_precise();
4345 
4346 		for (enq = 0, deq = 0; enq < num_ops;) {
4347 			num_to_enq = burst_sz;
4348 
4349 			if (unlikely(num_ops - enq < num_to_enq))
4350 				num_to_enq = num_ops - enq;
4351 
4352 			enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
4353 					queue_id, &ops_enq[enq], num_to_enq);
4354 
4355 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4356 					queue_id, &ops_deq[deq], enq - deq);
4357 			time_out++;
4358 			if (time_out >= TIME_OUT_POLL) {
4359 				timeout_exit(tp->dev_id);
4360 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4361 			}
4362 		}
4363 
4364 		/* dequeue the remaining */
4365 		time_out = 0;
4366 		while (deq < enq) {
4367 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4368 					queue_id, &ops_deq[deq], enq - deq);
4369 			time_out++;
4370 			if (time_out >= TIME_OUT_POLL) {
4371 				timeout_exit(tp->dev_id);
4372 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4373 			}
4374 		}
4375 
4376 		total_time += rte_rdtsc_precise() - start_time;
4377 	}
4378 
4379 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4380 		ret = validate_fft_op(ops_deq, num_ops, ref_op);
4381 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4382 	}
4383 
4384 	rte_bbdev_fft_op_free_bulk(ops_enq, num_ops);
4385 
4386 	double tb_len_bits = calc_fft_size(ref_op);
4387 
4388 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4389 			((double)total_time / (double)rte_get_tsc_hz());
4390 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
4391 			1000000.0) / ((double)total_time /
4392 			(double)rte_get_tsc_hz());
4393 
4394 	return TEST_SUCCESS;
4395 }
4396 
4397 static void
4398 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
4399 {
4400 	unsigned int iter = 0;
4401 	double total_mops = 0, total_mbps = 0;
4402 
4403 	for (iter = 0; iter < used_cores; iter++) {
4404 		printf(
4405 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
4406 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
4407 			t_params[iter].mbps);
4408 		total_mops += t_params[iter].ops_per_sec;
4409 		total_mbps += t_params[iter].mbps;
4410 	}
4411 	printf(
4412 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
4413 		used_cores, total_mops, total_mbps);
4414 }
4415 
4416 /* Aggregate the performance results over the number of cores used */
4417 static void
4418 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
4419 {
4420 	unsigned int core_idx = 0;
4421 	double total_mops = 0, total_mbps = 0;
4422 	uint8_t iter_count = 0;
4423 
4424 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4425 		printf(
4426 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
4427 			t_params[core_idx].lcore_id,
4428 			t_params[core_idx].ops_per_sec,
4429 			t_params[core_idx].mbps,
4430 			t_params[core_idx].iter_count);
4431 		total_mops += t_params[core_idx].ops_per_sec;
4432 		total_mbps += t_params[core_idx].mbps;
4433 		iter_count = RTE_MAX(iter_count,
4434 				t_params[core_idx].iter_count);
4435 	}
4436 	printf(
4437 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
4438 		used_cores, total_mops, total_mbps, iter_count);
4439 }
4440 
4441 /* Aggregate the performance results over the number of cores used */
4442 static void
4443 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
4444 {
4445 	unsigned int core_idx = 0;
4446 	double total_mbps = 0, total_bler = 0, total_iter = 0;
4447 	double snr = get_snr();
4448 
4449 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4450 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
4451 				t_params[core_idx].lcore_id,
4452 				t_params[core_idx].bler * 100,
4453 				t_params[core_idx].iter_average,
4454 				t_params[core_idx].mbps,
4455 				get_vector_filename());
4456 		total_mbps += t_params[core_idx].mbps;
4457 		total_bler += t_params[core_idx].bler;
4458 		total_iter += t_params[core_idx].iter_average;
4459 	}
4460 	total_bler /= used_cores;
4461 	total_iter /= used_cores;
4462 
4463 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.3f Mbps %s\n",
4464 			snr, total_bler * 100, total_iter, get_iter_max(),
4465 			total_mbps, get_vector_filename());
4466 }
4467 
4468 /*
4469  * Test function that determines BLER wireless performance
4470  */
4471 static int
4472 bler_test(struct active_device *ad,
4473 		struct test_op_params *op_params)
4474 {
4475 	int ret;
4476 	unsigned int lcore_id, used_cores = 0;
4477 	struct thread_params *t_params;
4478 	struct rte_bbdev_info info;
4479 	lcore_function_t *bler_function;
4480 	uint16_t num_lcores;
4481 	const char *op_type_str;
4482 
4483 	rte_bbdev_info_get(ad->dev_id, &info);
4484 
4485 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4486 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4487 			test_vector.op_type);
4488 
4489 	printf("+ ------------------------------------------------------- +\n");
4490 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4491 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4492 			op_params->num_to_process, op_params->num_lcores,
4493 			op_type_str,
4494 			intr_enabled ? "Interrupt mode" : "PMD mode",
4495 			(double)rte_get_tsc_hz() / 1000000000.0);
4496 
4497 	/* Set number of lcores */
4498 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4499 			? ad->nb_queues
4500 			: op_params->num_lcores;
4501 
4502 	/* Allocate memory for thread parameters structure */
4503 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4504 			RTE_CACHE_LINE_SIZE);
4505 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4506 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4507 				RTE_CACHE_LINE_SIZE));
4508 
4509 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
4510 			!check_bit(test_vector.ldpc_dec.op_flags,
4511 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
4512 			&& !check_bit(test_vector.ldpc_dec.op_flags,
4513 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
4514 		bler_function = bler_pmd_lcore_ldpc_dec;
4515 	else if ((test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) &&
4516 			!check_bit(test_vector.turbo_dec.op_flags,
4517 			RTE_BBDEV_TURBO_SOFT_OUTPUT))
4518 		bler_function = bler_pmd_lcore_turbo_dec;
4519 	else
4520 		return TEST_SKIPPED;
4521 
4522 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4523 
4524 	/* Main core is set at first entry */
4525 	t_params[0].dev_id = ad->dev_id;
4526 	t_params[0].lcore_id = rte_lcore_id();
4527 	t_params[0].op_params = op_params;
4528 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4529 	t_params[0].iter_count = 0;
4530 
4531 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4532 		if (used_cores >= num_lcores)
4533 			break;
4534 
4535 		t_params[used_cores].dev_id = ad->dev_id;
4536 		t_params[used_cores].lcore_id = lcore_id;
4537 		t_params[used_cores].op_params = op_params;
4538 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4539 		t_params[used_cores].iter_count = 0;
4540 
4541 		rte_eal_remote_launch(bler_function,
4542 				&t_params[used_cores++], lcore_id);
4543 	}
4544 
4545 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4546 	ret = bler_function(&t_params[0]);
4547 
4548 	/* Main core is always used */
4549 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4550 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4551 
4552 	print_dec_bler(t_params, num_lcores);
4553 
4554 	/* Return if test failed */
4555 	if (ret) {
4556 		rte_free(t_params);
4557 		return ret;
4558 	}
4559 
4560 	/* Function to print something  here*/
4561 	rte_free(t_params);
4562 	return ret;
4563 }
4564 
4565 /*
4566  * Test function that determines how long an enqueue + dequeue of a burst
4567  * takes on available lcores.
4568  */
4569 static int
4570 throughput_test(struct active_device *ad,
4571 		struct test_op_params *op_params)
4572 {
4573 	int ret;
4574 	unsigned int lcore_id, used_cores = 0;
4575 	struct thread_params *t_params, *tp;
4576 	struct rte_bbdev_info info;
4577 	lcore_function_t *throughput_function;
4578 	uint16_t num_lcores;
4579 	const char *op_type_str;
4580 
4581 	rte_bbdev_info_get(ad->dev_id, &info);
4582 
4583 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4584 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4585 			test_vector.op_type);
4586 
4587 	printf("+ ------------------------------------------------------- +\n");
4588 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4589 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4590 			op_params->num_to_process, op_params->num_lcores,
4591 			op_type_str,
4592 			intr_enabled ? "Interrupt mode" : "PMD mode",
4593 			(double)rte_get_tsc_hz() / 1000000000.0);
4594 
4595 	/* Set number of lcores */
4596 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4597 			? ad->nb_queues
4598 			: op_params->num_lcores;
4599 
4600 	/* Allocate memory for thread parameters structure */
4601 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4602 			RTE_CACHE_LINE_SIZE);
4603 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4604 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4605 				RTE_CACHE_LINE_SIZE));
4606 
4607 	if (intr_enabled) {
4608 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4609 			throughput_function = throughput_intr_lcore_dec;
4610 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4611 			throughput_function = throughput_intr_lcore_ldpc_dec;
4612 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4613 			throughput_function = throughput_intr_lcore_enc;
4614 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4615 			throughput_function = throughput_intr_lcore_ldpc_enc;
4616 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4617 			throughput_function = throughput_intr_lcore_fft;
4618 		else
4619 			throughput_function = throughput_intr_lcore_enc;
4620 
4621 		/* Dequeue interrupt callback registration */
4622 		ret = rte_bbdev_callback_register(ad->dev_id,
4623 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
4624 				t_params);
4625 		if (ret < 0) {
4626 			rte_free(t_params);
4627 			return ret;
4628 		}
4629 	} else {
4630 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4631 			throughput_function = throughput_pmd_lcore_dec;
4632 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4633 			throughput_function = throughput_pmd_lcore_ldpc_dec;
4634 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4635 			throughput_function = throughput_pmd_lcore_enc;
4636 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4637 			throughput_function = throughput_pmd_lcore_ldpc_enc;
4638 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4639 			throughput_function = throughput_pmd_lcore_fft;
4640 		else
4641 			throughput_function = throughput_pmd_lcore_enc;
4642 	}
4643 
4644 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4645 
4646 	/* Main core is set at first entry */
4647 	t_params[0].dev_id = ad->dev_id;
4648 	t_params[0].lcore_id = rte_lcore_id();
4649 	t_params[0].op_params = op_params;
4650 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4651 	t_params[0].iter_count = 0;
4652 
4653 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4654 		if (used_cores >= num_lcores)
4655 			break;
4656 
4657 		t_params[used_cores].dev_id = ad->dev_id;
4658 		t_params[used_cores].lcore_id = lcore_id;
4659 		t_params[used_cores].op_params = op_params;
4660 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4661 		t_params[used_cores].iter_count = 0;
4662 
4663 		rte_eal_remote_launch(throughput_function,
4664 				&t_params[used_cores++], lcore_id);
4665 	}
4666 
4667 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4668 	ret = throughput_function(&t_params[0]);
4669 
4670 	/* Main core is always used */
4671 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4672 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4673 
4674 	/* Return if test failed */
4675 	if (ret) {
4676 		rte_free(t_params);
4677 		return ret;
4678 	}
4679 
4680 	/* Print throughput if interrupts are disabled and test passed */
4681 	if (!intr_enabled) {
4682 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4683 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4684 			print_dec_throughput(t_params, num_lcores);
4685 		else
4686 			print_enc_throughput(t_params, num_lcores);
4687 		rte_free(t_params);
4688 		return ret;
4689 	}
4690 
4691 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
4692 	 * all pending operations. Skip waiting for queues which reported an
4693 	 * error using processing_status variable.
4694 	 * Wait for main lcore operations.
4695 	 */
4696 	tp = &t_params[0];
4697 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4698 		op_params->num_to_process) &&
4699 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4700 		TEST_FAILED))
4701 		rte_pause();
4702 
4703 	tp->ops_per_sec /= TEST_REPETITIONS;
4704 	tp->mbps /= TEST_REPETITIONS;
4705 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4706 
4707 	/* Wait for worker lcores operations */
4708 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
4709 		tp = &t_params[used_cores];
4710 
4711 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4712 			op_params->num_to_process) &&
4713 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4714 			TEST_FAILED))
4715 			rte_pause();
4716 
4717 		tp->ops_per_sec /= TEST_REPETITIONS;
4718 		tp->mbps /= TEST_REPETITIONS;
4719 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4720 	}
4721 
4722 	/* Print throughput if test passed */
4723 	if (!ret) {
4724 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4725 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4726 			print_dec_throughput(t_params, num_lcores);
4727 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
4728 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4729 			print_enc_throughput(t_params, num_lcores);
4730 	}
4731 
4732 	rte_free(t_params);
4733 	return ret;
4734 }
4735 
4736 static int
4737 latency_test_dec(struct rte_mempool *mempool,
4738 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4739 		uint16_t dev_id, uint16_t queue_id,
4740 		const uint16_t num_to_process, uint16_t burst_sz,
4741 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, bool disable_et)
4742 {
4743 	int ret = TEST_SUCCESS;
4744 	uint16_t i, j, dequeued;
4745 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4746 	uint64_t start_time = 0, last_time = 0;
4747 
4748 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4749 		uint16_t enq = 0, deq = 0;
4750 		uint32_t time_out = 0;
4751 		bool first_time = true;
4752 		last_time = 0;
4753 
4754 		if (unlikely(num_to_process - dequeued < burst_sz))
4755 			burst_sz = num_to_process - dequeued;
4756 
4757 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4758 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_dec_op_alloc_bulk() failed");
4759 
4760 		ref_op->turbo_dec.iter_max = get_iter_max();
4761 		/* For validation tests we want to enable early termination */
4762 		if (!disable_et && !check_bit(ref_op->turbo_dec.op_flags,
4763 				RTE_BBDEV_TURBO_EARLY_TERMINATION))
4764 			ref_op->turbo_dec.op_flags |= RTE_BBDEV_TURBO_EARLY_TERMINATION;
4765 
4766 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4767 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4768 					bufs->inputs,
4769 					bufs->hard_outputs,
4770 					bufs->soft_outputs,
4771 					ref_op);
4772 
4773 		/* Set counter to validate the ordering */
4774 		for (j = 0; j < burst_sz; ++j)
4775 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4776 
4777 		start_time = rte_rdtsc_precise();
4778 
4779 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4780 				burst_sz);
4781 		TEST_ASSERT(enq == burst_sz,
4782 				"Error enqueueing burst, expected %u, got %u",
4783 				burst_sz, enq);
4784 
4785 		/* Dequeue */
4786 		do {
4787 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4788 					&ops_deq[deq], burst_sz - deq);
4789 			if (likely(first_time && (deq > 0))) {
4790 				last_time = rte_rdtsc_precise() - start_time;
4791 				first_time = false;
4792 			}
4793 			time_out++;
4794 			if (time_out >= TIME_OUT_POLL) {
4795 				timeout_exit(dev_id);
4796 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4797 			}
4798 		} while (unlikely(burst_sz != deq));
4799 
4800 		*max_time = RTE_MAX(*max_time, last_time);
4801 		*min_time = RTE_MIN(*min_time, last_time);
4802 		*total_time += last_time;
4803 
4804 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4805 			ret = validate_dec_op(ops_deq, burst_sz, ref_op);
4806 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4807 		}
4808 
4809 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4810 		dequeued += deq;
4811 	}
4812 
4813 	return i;
4814 }
4815 
4816 /* Test case for latency/validation for LDPC Decoder */
4817 static int
4818 latency_test_ldpc_dec(struct rte_mempool *mempool,
4819 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4820 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4821 		const uint16_t num_to_process, uint16_t burst_sz,
4822 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4823 		bool disable_et)
4824 {
4825 	int ret = TEST_SUCCESS;
4826 	uint16_t i, j, dequeued;
4827 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4828 	uint64_t start_time = 0, last_time = 0;
4829 	bool extDdr = ldpc_cap_flags &
4830 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4831 
4832 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4833 		uint16_t enq = 0, deq = 0;
4834 		uint32_t time_out = 0;
4835 		bool first_time = true;
4836 		last_time = 0;
4837 
4838 		if (unlikely(num_to_process - dequeued < burst_sz))
4839 			burst_sz = num_to_process - dequeued;
4840 
4841 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4842 		TEST_ASSERT_SUCCESS(ret,
4843 				"rte_bbdev_dec_op_alloc_bulk() failed");
4844 
4845 		/* For latency tests we need to disable early termination */
4846 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4847 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4848 			ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4849 
4850 		ref_op->ldpc_dec.iter_max = get_iter_max();
4851 		/* When ET is disabled, the expected iter_count is iter_max */
4852 		if (disable_et)
4853 			ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4854 
4855 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4856 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4857 					bufs->inputs,
4858 					bufs->hard_outputs,
4859 					bufs->soft_outputs,
4860 					bufs->harq_inputs,
4861 					bufs->harq_outputs,
4862 					ref_op);
4863 
4864 		if (extDdr)
4865 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4866 					burst_sz, true);
4867 
4868 		/* Set counter to validate the ordering */
4869 		for (j = 0; j < burst_sz; ++j)
4870 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4871 
4872 		start_time = rte_rdtsc_precise();
4873 
4874 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4875 				&ops_enq[enq], burst_sz);
4876 		TEST_ASSERT(enq == burst_sz,
4877 				"Error enqueueing burst, expected %u, got %u",
4878 				burst_sz, enq);
4879 
4880 		/* Dequeue */
4881 		do {
4882 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4883 					&ops_deq[deq], burst_sz - deq);
4884 			if (likely(first_time && (deq > 0))) {
4885 				last_time = rte_rdtsc_precise() - start_time;
4886 				first_time = false;
4887 			}
4888 			time_out++;
4889 			if (time_out >= TIME_OUT_POLL) {
4890 				timeout_exit(dev_id);
4891 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4892 			}
4893 		} while (unlikely(burst_sz != deq));
4894 
4895 		*max_time = RTE_MAX(*max_time, last_time);
4896 		*min_time = RTE_MIN(*min_time, last_time);
4897 		*total_time += last_time;
4898 
4899 		if (extDdr)
4900 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4901 
4902 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4903 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, vector_mask);
4904 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4905 		}
4906 
4907 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4908 		dequeued += deq;
4909 	}
4910 	return i;
4911 }
4912 
4913 static int
4914 latency_test_enc(struct rte_mempool *mempool,
4915 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4916 		uint16_t dev_id, uint16_t queue_id,
4917 		const uint16_t num_to_process, uint16_t burst_sz,
4918 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4919 {
4920 	int ret = TEST_SUCCESS;
4921 	uint16_t i, j, dequeued;
4922 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4923 	uint64_t start_time = 0, last_time = 0;
4924 
4925 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4926 		uint16_t enq = 0, deq = 0;
4927 		uint32_t time_out = 0;
4928 		bool first_time = true;
4929 		last_time = 0;
4930 
4931 		if (unlikely(num_to_process - dequeued < burst_sz))
4932 			burst_sz = num_to_process - dequeued;
4933 
4934 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4935 		TEST_ASSERT_SUCCESS(ret,
4936 				"rte_bbdev_enc_op_alloc_bulk() failed");
4937 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4938 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4939 					bufs->inputs,
4940 					bufs->hard_outputs,
4941 					ref_op);
4942 
4943 		/* Set counter to validate the ordering */
4944 		for (j = 0; j < burst_sz; ++j)
4945 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4946 
4947 		start_time = rte_rdtsc_precise();
4948 
4949 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4950 				burst_sz);
4951 		TEST_ASSERT(enq == burst_sz,
4952 				"Error enqueueing burst, expected %u, got %u",
4953 				burst_sz, enq);
4954 
4955 		/* Dequeue */
4956 		do {
4957 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4958 					&ops_deq[deq], burst_sz - deq);
4959 			if (likely(first_time && (deq > 0))) {
4960 				last_time += rte_rdtsc_precise() - start_time;
4961 				first_time = false;
4962 			}
4963 			time_out++;
4964 			if (time_out >= TIME_OUT_POLL) {
4965 				timeout_exit(dev_id);
4966 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4967 			}
4968 		} while (unlikely(burst_sz != deq));
4969 
4970 		*max_time = RTE_MAX(*max_time, last_time);
4971 		*min_time = RTE_MIN(*min_time, last_time);
4972 		*total_time += last_time;
4973 
4974 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4975 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4976 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4977 		}
4978 
4979 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4980 		dequeued += deq;
4981 	}
4982 
4983 	return i;
4984 }
4985 
4986 static int
4987 latency_test_ldpc_enc(struct rte_mempool *mempool,
4988 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4989 		uint16_t dev_id, uint16_t queue_id,
4990 		const uint16_t num_to_process, uint16_t burst_sz,
4991 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4992 {
4993 	int ret = TEST_SUCCESS;
4994 	uint16_t i, j, dequeued;
4995 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4996 	uint64_t start_time = 0, last_time = 0;
4997 
4998 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4999 		uint16_t enq = 0, deq = 0;
5000 		uint32_t time_out = 0;
5001 		bool first_time = true;
5002 		last_time = 0;
5003 
5004 		if (unlikely(num_to_process - dequeued < burst_sz))
5005 			burst_sz = num_to_process - dequeued;
5006 
5007 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5008 		TEST_ASSERT_SUCCESS(ret,
5009 				"rte_bbdev_enc_op_alloc_bulk() failed");
5010 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5011 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
5012 					bufs->inputs,
5013 					bufs->hard_outputs,
5014 					ref_op);
5015 
5016 		/* Set counter to validate the ordering */
5017 		for (j = 0; j < burst_sz; ++j)
5018 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
5019 
5020 		start_time = rte_rdtsc_precise();
5021 
5022 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
5023 				&ops_enq[enq], burst_sz);
5024 		TEST_ASSERT(enq == burst_sz,
5025 				"Error enqueueing burst, expected %u, got %u",
5026 				burst_sz, enq);
5027 
5028 		/* Dequeue */
5029 		do {
5030 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5031 					&ops_deq[deq], burst_sz - deq);
5032 			if (likely(first_time && (deq > 0))) {
5033 				last_time += rte_rdtsc_precise() - start_time;
5034 				first_time = false;
5035 			}
5036 			time_out++;
5037 			if (time_out >= TIME_OUT_POLL) {
5038 				timeout_exit(dev_id);
5039 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
5040 			}
5041 		} while (unlikely(burst_sz != deq));
5042 
5043 		*max_time = RTE_MAX(*max_time, last_time);
5044 		*min_time = RTE_MIN(*min_time, last_time);
5045 		*total_time += last_time;
5046 
5047 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
5048 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
5049 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
5050 		}
5051 
5052 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5053 		dequeued += deq;
5054 	}
5055 
5056 	return i;
5057 }
5058 
5059 
5060 static int
5061 latency_test_fft(struct rte_mempool *mempool,
5062 		struct test_buffers *bufs, struct rte_bbdev_fft_op *ref_op,
5063 		uint16_t dev_id, uint16_t queue_id,
5064 		const uint16_t num_to_process, uint16_t burst_sz,
5065 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
5066 {
5067 	int ret = TEST_SUCCESS;
5068 	uint16_t i, j, dequeued;
5069 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5070 	uint64_t start_time = 0, last_time = 0;
5071 
5072 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5073 		uint16_t enq = 0, deq = 0;
5074 		uint32_t time_out = 0;
5075 		bool first_time = true;
5076 		last_time = 0;
5077 
5078 		if (unlikely(num_to_process - dequeued < burst_sz))
5079 			burst_sz = num_to_process - dequeued;
5080 
5081 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
5082 		TEST_ASSERT_SUCCESS(ret,
5083 				"rte_bbdev_fft_op_alloc_bulk() failed");
5084 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5085 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
5086 					bufs->inputs,
5087 					bufs->hard_outputs, bufs->soft_outputs,
5088 					ref_op);
5089 
5090 		/* Set counter to validate the ordering */
5091 		for (j = 0; j < burst_sz; ++j)
5092 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
5093 
5094 		start_time = rte_rdtsc_precise();
5095 
5096 		enq = rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
5097 				&ops_enq[enq], burst_sz);
5098 		TEST_ASSERT(enq == burst_sz,
5099 				"Error enqueueing burst, expected %u, got %u",
5100 				burst_sz, enq);
5101 
5102 		/* Dequeue */
5103 		do {
5104 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5105 					&ops_deq[deq], burst_sz - deq);
5106 			if (likely(first_time && (deq > 0))) {
5107 				last_time += rte_rdtsc_precise() - start_time;
5108 				first_time = false;
5109 			}
5110 			time_out++;
5111 			if (time_out >= TIME_OUT_POLL) {
5112 				timeout_exit(dev_id);
5113 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
5114 			}
5115 		} while (unlikely(burst_sz != deq));
5116 
5117 		*max_time = RTE_MAX(*max_time, last_time);
5118 		*min_time = RTE_MIN(*min_time, last_time);
5119 		*total_time += last_time;
5120 
5121 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
5122 			ret = validate_fft_op(ops_deq, burst_sz, ref_op);
5123 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
5124 		}
5125 
5126 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
5127 		dequeued += deq;
5128 	}
5129 
5130 	return i;
5131 }
5132 
5133 /* Common function for running validation and latency test cases */
5134 static int
5135 validation_latency_test(struct active_device *ad,
5136 		struct test_op_params *op_params, bool latency_flag)
5137 {
5138 	int iter;
5139 	uint16_t burst_sz = op_params->burst_sz;
5140 	const uint16_t num_to_process = op_params->num_to_process;
5141 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5142 	const uint16_t queue_id = ad->queue_ids[0];
5143 	struct test_buffers *bufs = NULL;
5144 	struct rte_bbdev_info info;
5145 	uint64_t total_time, min_time, max_time;
5146 	const char *op_type_str;
5147 
5148 	total_time = max_time = 0;
5149 	min_time = UINT64_MAX;
5150 
5151 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5152 			"BURST_SIZE should be <= %u", MAX_BURST);
5153 
5154 	rte_bbdev_info_get(ad->dev_id, &info);
5155 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
5156 
5157 	op_type_str = rte_bbdev_op_type_str(op_type);
5158 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5159 
5160 	printf("+ ------------------------------------------------------- +\n");
5161 	if (latency_flag)
5162 		printf("== test: latency\ndev:");
5163 	else
5164 		printf("== test: validation\ndev:");
5165 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
5166 			info.dev_name, burst_sz, num_to_process, op_type_str);
5167 
5168 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
5169 		iter = latency_test_dec(op_params->mp, bufs,
5170 				op_params->ref_dec_op, ad->dev_id, queue_id,
5171 				num_to_process, burst_sz, &total_time,
5172 				&min_time, &max_time, latency_flag);
5173 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5174 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
5175 				op_params->ref_enc_op, ad->dev_id, queue_id,
5176 				num_to_process, burst_sz, &total_time,
5177 				&min_time, &max_time);
5178 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5179 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
5180 				op_params->ref_dec_op, op_params->vector_mask,
5181 				ad->dev_id, queue_id, num_to_process,
5182 				burst_sz, &total_time, &min_time, &max_time,
5183 				latency_flag);
5184 	else if (op_type == RTE_BBDEV_OP_FFT)
5185 		iter = latency_test_fft(op_params->mp, bufs,
5186 				op_params->ref_fft_op,
5187 				ad->dev_id, queue_id,
5188 				num_to_process, burst_sz, &total_time,
5189 				&min_time, &max_time);
5190 	else /* RTE_BBDEV_OP_TURBO_ENC */
5191 		iter = latency_test_enc(op_params->mp, bufs,
5192 				op_params->ref_enc_op,
5193 				ad->dev_id, queue_id,
5194 				num_to_process, burst_sz, &total_time,
5195 				&min_time, &max_time);
5196 
5197 	if (iter <= 0)
5198 		return TEST_FAILED;
5199 
5200 	printf("Operation latency:\n"
5201 			"\tavg: %lg cycles, %lg us\n"
5202 			"\tmin: %lg cycles, %lg us\n"
5203 			"\tmax: %lg cycles, %lg us\n",
5204 			(double)total_time / (double)iter,
5205 			(double)(total_time * 1000000) / (double)iter /
5206 			(double)rte_get_tsc_hz(), (double)min_time,
5207 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
5208 			(double)max_time, (double)(max_time * 1000000) /
5209 			(double)rte_get_tsc_hz());
5210 
5211 	return TEST_SUCCESS;
5212 }
5213 
5214 static int
5215 latency_test(struct active_device *ad, struct test_op_params *op_params)
5216 {
5217 	return validation_latency_test(ad, op_params, true);
5218 }
5219 
5220 static int
5221 validation_test(struct active_device *ad, struct test_op_params *op_params)
5222 {
5223 	return validation_latency_test(ad, op_params, false);
5224 }
5225 
5226 static int
5227 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
5228 		struct rte_bbdev_stats *stats)
5229 {
5230 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
5231 	struct rte_bbdev_stats *q_stats;
5232 
5233 	if (queue_id >= dev->data->num_queues)
5234 		return -1;
5235 
5236 	q_stats = &dev->data->queues[queue_id].queue_stats;
5237 
5238 	stats->enqueued_count = q_stats->enqueued_count;
5239 	stats->dequeued_count = q_stats->dequeued_count;
5240 	stats->enqueue_err_count = q_stats->enqueue_err_count;
5241 	stats->dequeue_err_count = q_stats->dequeue_err_count;
5242 	stats->enqueue_warn_count = q_stats->enqueue_warn_count;
5243 	stats->dequeue_warn_count = q_stats->dequeue_warn_count;
5244 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
5245 
5246 	return 0;
5247 }
5248 
5249 static int
5250 offload_latency_test_fft(struct rte_mempool *mempool, struct test_buffers *bufs,
5251 		struct rte_bbdev_fft_op *ref_op, uint16_t dev_id,
5252 		uint16_t queue_id, const uint16_t num_to_process,
5253 		uint16_t burst_sz, struct test_time_stats *time_st)
5254 {
5255 	int i, dequeued, ret;
5256 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5257 	uint64_t enq_start_time, deq_start_time;
5258 	uint64_t enq_sw_last_time, deq_last_time;
5259 	struct rte_bbdev_stats stats;
5260 
5261 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5262 		uint16_t enq = 0, deq = 0;
5263 
5264 		if (unlikely(num_to_process - dequeued < burst_sz))
5265 			burst_sz = num_to_process - dequeued;
5266 
5267 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
5268 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5269 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5270 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
5271 					bufs->inputs,
5272 					bufs->hard_outputs, bufs->soft_outputs,
5273 					ref_op);
5274 
5275 		/* Start time meas for enqueue function offload latency */
5276 		enq_start_time = rte_rdtsc_precise();
5277 		do {
5278 			enq += rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
5279 					&ops_enq[enq], burst_sz - enq);
5280 		} while (unlikely(burst_sz != enq));
5281 
5282 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5283 		TEST_ASSERT_SUCCESS(ret,
5284 				"Failed to get stats for queue (%u) of device (%u)",
5285 				queue_id, dev_id);
5286 
5287 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
5288 				stats.acc_offload_cycles;
5289 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5290 				enq_sw_last_time);
5291 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5292 				enq_sw_last_time);
5293 		time_st->enq_sw_total_time += enq_sw_last_time;
5294 
5295 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5296 				stats.acc_offload_cycles);
5297 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5298 				stats.acc_offload_cycles);
5299 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5300 
5301 		/* give time for device to process ops */
5302 		rte_delay_us(WAIT_OFFLOAD_US);
5303 
5304 		/* Start time meas for dequeue function offload latency */
5305 		deq_start_time = rte_rdtsc_precise();
5306 		/* Dequeue one operation */
5307 		do {
5308 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5309 					&ops_deq[deq], enq);
5310 		} while (unlikely(deq == 0));
5311 
5312 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5313 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5314 				deq_last_time);
5315 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5316 				deq_last_time);
5317 		time_st->deq_total_time += deq_last_time;
5318 
5319 		/* Dequeue remaining operations if needed*/
5320 		while (burst_sz != deq)
5321 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5322 					&ops_deq[deq], burst_sz - deq);
5323 
5324 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
5325 		dequeued += deq;
5326 	}
5327 
5328 	return i;
5329 }
5330 
5331 static int
5332 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
5333 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5334 		uint16_t queue_id, const uint16_t num_to_process,
5335 		uint16_t burst_sz, struct test_time_stats *time_st)
5336 {
5337 	int i, dequeued, ret;
5338 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5339 	uint64_t enq_start_time, deq_start_time;
5340 	uint64_t enq_sw_last_time, deq_last_time;
5341 	struct rte_bbdev_stats stats;
5342 
5343 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5344 		uint16_t enq = 0, deq = 0;
5345 
5346 		if (unlikely(num_to_process - dequeued < burst_sz))
5347 			burst_sz = num_to_process - dequeued;
5348 
5349 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5350 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5351 		ref_op->turbo_dec.iter_max = get_iter_max();
5352 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5353 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
5354 					bufs->inputs,
5355 					bufs->hard_outputs,
5356 					bufs->soft_outputs,
5357 					ref_op);
5358 
5359 		/* Start time meas for enqueue function offload latency */
5360 		enq_start_time = rte_rdtsc_precise();
5361 		do {
5362 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
5363 					&ops_enq[enq], burst_sz - enq);
5364 		} while (unlikely(burst_sz != enq));
5365 
5366 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5367 		TEST_ASSERT_SUCCESS(ret,
5368 				"Failed to get stats for queue (%u) of device (%u)",
5369 				queue_id, dev_id);
5370 
5371 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
5372 				stats.acc_offload_cycles;
5373 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5374 				enq_sw_last_time);
5375 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5376 				enq_sw_last_time);
5377 		time_st->enq_sw_total_time += enq_sw_last_time;
5378 
5379 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5380 				stats.acc_offload_cycles);
5381 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5382 				stats.acc_offload_cycles);
5383 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5384 
5385 		/* give time for device to process ops */
5386 		rte_delay_us(WAIT_OFFLOAD_US);
5387 
5388 		/* Start time meas for dequeue function offload latency */
5389 		deq_start_time = rte_rdtsc_precise();
5390 		/* Dequeue one operation */
5391 		do {
5392 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5393 					&ops_deq[deq], enq);
5394 		} while (unlikely(deq == 0));
5395 
5396 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5397 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5398 				deq_last_time);
5399 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5400 				deq_last_time);
5401 		time_st->deq_total_time += deq_last_time;
5402 
5403 		/* Dequeue remaining operations if needed*/
5404 		while (burst_sz != deq)
5405 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5406 					&ops_deq[deq], burst_sz - deq);
5407 
5408 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5409 		dequeued += deq;
5410 	}
5411 
5412 	return i;
5413 }
5414 
5415 static int
5416 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
5417 		struct test_buffers *bufs,
5418 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5419 		uint16_t queue_id, const uint16_t num_to_process,
5420 		uint16_t burst_sz, struct test_time_stats *time_st)
5421 {
5422 	int i, dequeued, ret;
5423 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5424 	uint64_t enq_start_time, deq_start_time;
5425 	uint64_t enq_sw_last_time, deq_last_time;
5426 	struct rte_bbdev_stats stats;
5427 	bool extDdr = ldpc_cap_flags &
5428 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
5429 
5430 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5431 		uint16_t enq = 0, deq = 0;
5432 
5433 		if (unlikely(num_to_process - dequeued < burst_sz))
5434 			burst_sz = num_to_process - dequeued;
5435 
5436 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5437 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5438 		ref_op->ldpc_dec.iter_max = get_iter_max();
5439 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5440 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
5441 					bufs->inputs,
5442 					bufs->hard_outputs,
5443 					bufs->soft_outputs,
5444 					bufs->harq_inputs,
5445 					bufs->harq_outputs,
5446 					ref_op);
5447 
5448 		if (extDdr)
5449 			preload_harq_ddr(dev_id, queue_id, ops_enq,
5450 					burst_sz, true);
5451 
5452 		/* Start time meas for enqueue function offload latency */
5453 		enq_start_time = rte_rdtsc_precise();
5454 		do {
5455 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
5456 					&ops_enq[enq], burst_sz - enq);
5457 		} while (unlikely(burst_sz != enq));
5458 
5459 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5460 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5461 		TEST_ASSERT_SUCCESS(ret,
5462 				"Failed to get stats for queue (%u) of device (%u)",
5463 				queue_id, dev_id);
5464 
5465 		enq_sw_last_time -= stats.acc_offload_cycles;
5466 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5467 				enq_sw_last_time);
5468 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5469 				enq_sw_last_time);
5470 		time_st->enq_sw_total_time += enq_sw_last_time;
5471 
5472 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5473 				stats.acc_offload_cycles);
5474 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5475 				stats.acc_offload_cycles);
5476 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5477 
5478 		/* give time for device to process ops */
5479 		rte_delay_us(WAIT_OFFLOAD_US);
5480 
5481 		/* Start time meas for dequeue function offload latency */
5482 		deq_start_time = rte_rdtsc_precise();
5483 		/* Dequeue one operation */
5484 		do {
5485 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5486 					&ops_deq[deq], enq);
5487 		} while (unlikely(deq == 0));
5488 
5489 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5490 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5491 				deq_last_time);
5492 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5493 				deq_last_time);
5494 		time_st->deq_total_time += deq_last_time;
5495 
5496 		/* Dequeue remaining operations if needed*/
5497 		while (burst_sz != deq)
5498 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5499 					&ops_deq[deq], burst_sz - deq);
5500 
5501 		if (extDdr) {
5502 			/* Read loopback is not thread safe */
5503 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
5504 		}
5505 
5506 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5507 		dequeued += deq;
5508 	}
5509 
5510 	return i;
5511 }
5512 
5513 static int
5514 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
5515 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5516 		uint16_t queue_id, const uint16_t num_to_process,
5517 		uint16_t burst_sz, struct test_time_stats *time_st)
5518 {
5519 	int i, dequeued, ret;
5520 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5521 	uint64_t enq_start_time, deq_start_time;
5522 	uint64_t enq_sw_last_time, deq_last_time;
5523 	struct rte_bbdev_stats stats;
5524 
5525 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5526 		uint16_t enq = 0, deq = 0;
5527 
5528 		if (unlikely(num_to_process - dequeued < burst_sz))
5529 			burst_sz = num_to_process - dequeued;
5530 
5531 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5532 		TEST_ASSERT_SUCCESS(ret,
5533 				"rte_bbdev_enc_op_alloc_bulk() failed");
5534 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5535 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
5536 					bufs->inputs,
5537 					bufs->hard_outputs,
5538 					ref_op);
5539 
5540 		/* Start time meas for enqueue function offload latency */
5541 		enq_start_time = rte_rdtsc_precise();
5542 		do {
5543 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
5544 					&ops_enq[enq], burst_sz - enq);
5545 		} while (unlikely(burst_sz != enq));
5546 
5547 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5548 
5549 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5550 		TEST_ASSERT_SUCCESS(ret,
5551 				"Failed to get stats for queue (%u) of device (%u)",
5552 				queue_id, dev_id);
5553 		enq_sw_last_time -= stats.acc_offload_cycles;
5554 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5555 				enq_sw_last_time);
5556 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5557 				enq_sw_last_time);
5558 		time_st->enq_sw_total_time += enq_sw_last_time;
5559 
5560 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5561 				stats.acc_offload_cycles);
5562 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5563 				stats.acc_offload_cycles);
5564 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5565 
5566 		/* give time for device to process ops */
5567 		rte_delay_us(WAIT_OFFLOAD_US);
5568 
5569 		/* Start time meas for dequeue function offload latency */
5570 		deq_start_time = rte_rdtsc_precise();
5571 		/* Dequeue one operation */
5572 		do {
5573 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5574 					&ops_deq[deq], enq);
5575 		} while (unlikely(deq == 0));
5576 
5577 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5578 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5579 				deq_last_time);
5580 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5581 				deq_last_time);
5582 		time_st->deq_total_time += deq_last_time;
5583 
5584 		while (burst_sz != deq)
5585 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5586 					&ops_deq[deq], burst_sz - deq);
5587 
5588 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5589 		dequeued += deq;
5590 	}
5591 
5592 	return i;
5593 }
5594 
5595 static int
5596 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
5597 		struct test_buffers *bufs,
5598 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5599 		uint16_t queue_id, const uint16_t num_to_process,
5600 		uint16_t burst_sz, struct test_time_stats *time_st)
5601 {
5602 	int i, dequeued, ret;
5603 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5604 	uint64_t enq_start_time, deq_start_time;
5605 	uint64_t enq_sw_last_time, deq_last_time;
5606 	struct rte_bbdev_stats stats;
5607 
5608 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5609 		uint16_t enq = 0, deq = 0;
5610 
5611 		if (unlikely(num_to_process - dequeued < burst_sz))
5612 			burst_sz = num_to_process - dequeued;
5613 
5614 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5615 		TEST_ASSERT_SUCCESS(ret,
5616 				"rte_bbdev_enc_op_alloc_bulk() failed");
5617 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5618 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
5619 					bufs->inputs,
5620 					bufs->hard_outputs,
5621 					ref_op);
5622 
5623 		/* Start time meas for enqueue function offload latency */
5624 		enq_start_time = rte_rdtsc_precise();
5625 		do {
5626 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
5627 					&ops_enq[enq], burst_sz - enq);
5628 		} while (unlikely(burst_sz != enq));
5629 
5630 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5631 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5632 		TEST_ASSERT_SUCCESS(ret,
5633 				"Failed to get stats for queue (%u) of device (%u)",
5634 				queue_id, dev_id);
5635 
5636 		enq_sw_last_time -= stats.acc_offload_cycles;
5637 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5638 				enq_sw_last_time);
5639 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5640 				enq_sw_last_time);
5641 		time_st->enq_sw_total_time += enq_sw_last_time;
5642 
5643 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5644 				stats.acc_offload_cycles);
5645 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5646 				stats.acc_offload_cycles);
5647 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5648 
5649 		/* give time for device to process ops */
5650 		rte_delay_us(WAIT_OFFLOAD_US);
5651 
5652 		/* Start time meas for dequeue function offload latency */
5653 		deq_start_time = rte_rdtsc_precise();
5654 		/* Dequeue one operation */
5655 		do {
5656 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5657 					&ops_deq[deq], enq);
5658 		} while (unlikely(deq == 0));
5659 
5660 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5661 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5662 				deq_last_time);
5663 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5664 				deq_last_time);
5665 		time_st->deq_total_time += deq_last_time;
5666 
5667 		while (burst_sz != deq)
5668 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5669 					&ops_deq[deq], burst_sz - deq);
5670 
5671 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5672 		dequeued += deq;
5673 	}
5674 
5675 	return i;
5676 }
5677 
5678 static int
5679 offload_cost_test(struct active_device *ad,
5680 		struct test_op_params *op_params)
5681 {
5682 	int iter, ret;
5683 	uint16_t burst_sz = op_params->burst_sz;
5684 	const uint16_t num_to_process = op_params->num_to_process;
5685 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5686 	const uint16_t queue_id = ad->queue_ids[0];
5687 	struct test_buffers *bufs = NULL;
5688 	struct rte_bbdev_info info;
5689 	const char *op_type_str;
5690 	struct test_time_stats time_st;
5691 
5692 	memset(&time_st, 0, sizeof(struct test_time_stats));
5693 	time_st.enq_sw_min_time = UINT64_MAX;
5694 	time_st.enq_acc_min_time = UINT64_MAX;
5695 	time_st.deq_min_time = UINT64_MAX;
5696 
5697 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5698 			"BURST_SIZE should be <= %u", MAX_BURST);
5699 
5700 	rte_bbdev_info_get(ad->dev_id, &info);
5701 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
5702 
5703 	op_type_str = rte_bbdev_op_type_str(op_type);
5704 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5705 
5706 	printf("+ ------------------------------------------------------- +\n");
5707 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5708 			info.dev_name, burst_sz, num_to_process, op_type_str);
5709 
5710 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
5711 		iter = offload_latency_test_dec(op_params->mp, bufs,
5712 				op_params->ref_dec_op, ad->dev_id, queue_id,
5713 				num_to_process, burst_sz, &time_st);
5714 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
5715 		iter = offload_latency_test_enc(op_params->mp, bufs,
5716 				op_params->ref_enc_op, ad->dev_id, queue_id,
5717 				num_to_process, burst_sz, &time_st);
5718 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5719 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
5720 				op_params->ref_enc_op, ad->dev_id, queue_id,
5721 				num_to_process, burst_sz, &time_st);
5722 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5723 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
5724 			op_params->ref_dec_op, ad->dev_id, queue_id,
5725 			num_to_process, burst_sz, &time_st);
5726 	else if (op_type == RTE_BBDEV_OP_FFT)
5727 		iter = offload_latency_test_fft(op_params->mp, bufs,
5728 			op_params->ref_fft_op, ad->dev_id, queue_id,
5729 			num_to_process, burst_sz, &time_st);
5730 	else
5731 		iter = offload_latency_test_enc(op_params->mp, bufs,
5732 				op_params->ref_enc_op, ad->dev_id, queue_id,
5733 				num_to_process, burst_sz, &time_st);
5734 
5735 	if (iter <= 0)
5736 		return TEST_FAILED;
5737 
5738 	printf("Enqueue driver offload cost latency:\n"
5739 			"\tavg: %lg cycles, %lg us\n"
5740 			"\tmin: %lg cycles, %lg us\n"
5741 			"\tmax: %lg cycles, %lg us\n"
5742 			"Enqueue accelerator offload cost latency:\n"
5743 			"\tavg: %lg cycles, %lg us\n"
5744 			"\tmin: %lg cycles, %lg us\n"
5745 			"\tmax: %lg cycles, %lg us\n",
5746 			(double)time_st.enq_sw_total_time / (double)iter,
5747 			(double)(time_st.enq_sw_total_time * 1000000) /
5748 			(double)iter / (double)rte_get_tsc_hz(),
5749 			(double)time_st.enq_sw_min_time,
5750 			(double)(time_st.enq_sw_min_time * 1000000) /
5751 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
5752 			(double)(time_st.enq_sw_max_time * 1000000) /
5753 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
5754 			(double)iter,
5755 			(double)(time_st.enq_acc_total_time * 1000000) /
5756 			(double)iter / (double)rte_get_tsc_hz(),
5757 			(double)time_st.enq_acc_min_time,
5758 			(double)(time_st.enq_acc_min_time * 1000000) /
5759 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
5760 			(double)(time_st.enq_acc_max_time * 1000000) /
5761 			rte_get_tsc_hz());
5762 
5763 	printf("Dequeue offload cost latency - one op:\n"
5764 			"\tavg: %lg cycles, %lg us\n"
5765 			"\tmin: %lg cycles, %lg us\n"
5766 			"\tmax: %lg cycles, %lg us\n",
5767 			(double)time_st.deq_total_time / (double)iter,
5768 			(double)(time_st.deq_total_time * 1000000) /
5769 			(double)iter / (double)rte_get_tsc_hz(),
5770 			(double)time_st.deq_min_time,
5771 			(double)(time_st.deq_min_time * 1000000) /
5772 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
5773 			(double)(time_st.deq_max_time * 1000000) /
5774 			rte_get_tsc_hz());
5775 
5776 	struct rte_bbdev_stats stats = {0};
5777 	ret = get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
5778 	TEST_ASSERT_SUCCESS(ret,
5779 			"Failed to get stats for queue (%u) of device (%u)",
5780 			queue_id, ad->dev_id);
5781 	if (stats.enqueue_warn_count > 0)
5782 		printf("Warning reported on the queue : %10"PRIu64"\n",
5783 			stats.enqueue_warn_count);
5784 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
5785 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
5786 				"Mismatch in enqueue count %10"PRIu64" %d",
5787 				stats.enqueued_count, num_to_process);
5788 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
5789 				"Mismatch in dequeue count %10"PRIu64" %d",
5790 				stats.dequeued_count, num_to_process);
5791 	}
5792 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
5793 			"Enqueue count Error %10"PRIu64"",
5794 			stats.enqueue_err_count);
5795 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
5796 			"Dequeue count Error (%10"PRIu64"",
5797 			stats.dequeue_err_count);
5798 
5799 	return TEST_SUCCESS;
5800 }
5801 
5802 static int
5803 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
5804 		const uint16_t num_to_process, uint16_t burst_sz,
5805 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5806 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5807 {
5808 	int i, deq_total;
5809 	struct rte_bbdev_dec_op *ops[MAX_BURST];
5810 	uint64_t deq_start_time, deq_last_time;
5811 
5812 	/* Test deq offload latency from an empty queue */
5813 
5814 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5815 			++i, deq_total += burst_sz) {
5816 		deq_start_time = rte_rdtsc_precise();
5817 
5818 		if (unlikely(num_to_process - deq_total < burst_sz))
5819 			burst_sz = num_to_process - deq_total;
5820 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5821 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
5822 					burst_sz);
5823 		else
5824 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
5825 					burst_sz);
5826 
5827 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5828 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5829 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5830 		*deq_total_time += deq_last_time;
5831 	}
5832 
5833 	return i;
5834 }
5835 
5836 static int
5837 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
5838 		const uint16_t num_to_process, uint16_t burst_sz,
5839 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5840 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5841 {
5842 	int i, deq_total;
5843 	struct rte_bbdev_enc_op *ops[MAX_BURST];
5844 	uint64_t deq_start_time, deq_last_time;
5845 
5846 	/* Test deq offload latency from an empty queue */
5847 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5848 			++i, deq_total += burst_sz) {
5849 		deq_start_time = rte_rdtsc_precise();
5850 
5851 		if (unlikely(num_to_process - deq_total < burst_sz))
5852 			burst_sz = num_to_process - deq_total;
5853 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5854 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
5855 					burst_sz);
5856 		else
5857 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
5858 					burst_sz);
5859 
5860 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5861 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5862 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5863 		*deq_total_time += deq_last_time;
5864 	}
5865 
5866 	return i;
5867 }
5868 
5869 static int
5870 offload_latency_empty_q_test(struct active_device *ad,
5871 		struct test_op_params *op_params)
5872 {
5873 	int iter;
5874 	uint64_t deq_total_time, deq_min_time, deq_max_time;
5875 	uint16_t burst_sz = op_params->burst_sz;
5876 	const uint16_t num_to_process = op_params->num_to_process;
5877 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5878 	const uint16_t queue_id = ad->queue_ids[0];
5879 	struct rte_bbdev_info info;
5880 	const char *op_type_str;
5881 
5882 	deq_total_time = deq_max_time = 0;
5883 	deq_min_time = UINT64_MAX;
5884 
5885 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5886 			"BURST_SIZE should be <= %u", MAX_BURST);
5887 
5888 	rte_bbdev_info_get(ad->dev_id, &info);
5889 
5890 	op_type_str = rte_bbdev_op_type_str(op_type);
5891 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5892 
5893 	printf("+ ------------------------------------------------------- +\n");
5894 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5895 			info.dev_name, burst_sz, num_to_process, op_type_str);
5896 
5897 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
5898 			op_type == RTE_BBDEV_OP_LDPC_DEC)
5899 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
5900 				num_to_process, burst_sz, &deq_total_time,
5901 				&deq_min_time, &deq_max_time, op_type);
5902 	else
5903 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
5904 				num_to_process, burst_sz, &deq_total_time,
5905 				&deq_min_time, &deq_max_time, op_type);
5906 
5907 	if (iter <= 0)
5908 		return TEST_FAILED;
5909 
5910 	printf("Empty dequeue offload:\n"
5911 			"\tavg: %lg cycles, %lg us\n"
5912 			"\tmin: %lg cycles, %lg us\n"
5913 			"\tmax: %lg cycles, %lg us\n",
5914 			(double)deq_total_time / (double)iter,
5915 			(double)(deq_total_time * 1000000) / (double)iter /
5916 			(double)rte_get_tsc_hz(), (double)deq_min_time,
5917 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
5918 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
5919 			rte_get_tsc_hz());
5920 
5921 	return TEST_SUCCESS;
5922 }
5923 
5924 static int
5925 bler_tc(void)
5926 {
5927 	return run_test_case(bler_test);
5928 }
5929 
5930 static int
5931 throughput_tc(void)
5932 {
5933 	return run_test_case(throughput_test);
5934 }
5935 
5936 static int
5937 offload_cost_tc(void)
5938 {
5939 	return run_test_case(offload_cost_test);
5940 }
5941 
5942 static int
5943 offload_latency_empty_q_tc(void)
5944 {
5945 	return run_test_case(offload_latency_empty_q_test);
5946 }
5947 
5948 static int
5949 latency_tc(void)
5950 {
5951 	return run_test_case(latency_test);
5952 }
5953 
5954 static int
5955 validation_tc(void)
5956 {
5957 	return run_test_case(validation_test);
5958 }
5959 
5960 static int
5961 interrupt_tc(void)
5962 {
5963 	return run_test_case(throughput_test);
5964 }
5965 
5966 static struct unit_test_suite bbdev_bler_testsuite = {
5967 	.suite_name = "BBdev BLER Tests",
5968 	.setup = testsuite_setup,
5969 	.teardown = testsuite_teardown,
5970 	.unit_test_cases = {
5971 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5972 		TEST_CASES_END() /**< NULL terminate unit test array */
5973 	}
5974 };
5975 
5976 static struct unit_test_suite bbdev_throughput_testsuite = {
5977 	.suite_name = "BBdev Throughput Tests",
5978 	.setup = testsuite_setup,
5979 	.teardown = testsuite_teardown,
5980 	.unit_test_cases = {
5981 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5982 		TEST_CASES_END() /**< NULL terminate unit test array */
5983 	}
5984 };
5985 
5986 static struct unit_test_suite bbdev_validation_testsuite = {
5987 	.suite_name = "BBdev Validation Tests",
5988 	.setup = testsuite_setup,
5989 	.teardown = testsuite_teardown,
5990 	.unit_test_cases = {
5991 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5992 		TEST_CASES_END() /**< NULL terminate unit test array */
5993 	}
5994 };
5995 
5996 static struct unit_test_suite bbdev_latency_testsuite = {
5997 	.suite_name = "BBdev Latency Tests",
5998 	.setup = testsuite_setup,
5999 	.teardown = testsuite_teardown,
6000 	.unit_test_cases = {
6001 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
6002 		TEST_CASES_END() /**< NULL terminate unit test array */
6003 	}
6004 };
6005 
6006 static struct unit_test_suite bbdev_offload_cost_testsuite = {
6007 	.suite_name = "BBdev Offload Cost Tests",
6008 	.setup = testsuite_setup,
6009 	.teardown = testsuite_teardown,
6010 	.unit_test_cases = {
6011 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
6012 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
6013 		TEST_CASES_END() /**< NULL terminate unit test array */
6014 	}
6015 };
6016 
6017 static struct unit_test_suite bbdev_interrupt_testsuite = {
6018 	.suite_name = "BBdev Interrupt Tests",
6019 	.setup = interrupt_testsuite_setup,
6020 	.teardown = testsuite_teardown,
6021 	.unit_test_cases = {
6022 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
6023 		TEST_CASES_END() /**< NULL terminate unit test array */
6024 	}
6025 };
6026 
6027 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
6028 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
6029 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
6030 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
6031 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
6032 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
6033