xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision a131d9ec3f4367719ca6b82bfefae8e98cea74c4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <inttypes.h>
8 #include <math.h>
9 
10 #include <rte_eal.h>
11 #include <rte_common.h>
12 #include <rte_dev.h>
13 #include <rte_launch.h>
14 #include <rte_bbdev.h>
15 #include <rte_cycles.h>
16 #include <rte_lcore.h>
17 #include <rte_malloc.h>
18 #include <rte_random.h>
19 #include <rte_hexdump.h>
20 #include <rte_interrupts.h>
21 
22 #include "main.h"
23 #include "test_bbdev_vector.h"
24 
25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
26 
27 #define MAX_QUEUES RTE_MAX_LCORE
28 #define TEST_REPETITIONS 100
29 #define TIME_OUT_POLL 1e8
30 #define WAIT_OFFLOAD_US 1000
31 
32 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
33 #include <fpga_lte_fec.h>
34 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
35 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
36 #define VF_UL_4G_QUEUE_VALUE 4
37 #define VF_DL_4G_QUEUE_VALUE 4
38 #define UL_4G_BANDWIDTH 3
39 #define DL_4G_BANDWIDTH 3
40 #define UL_4G_LOAD_BALANCE 128
41 #define DL_4G_LOAD_BALANCE 128
42 #define FLR_4G_TIMEOUT 610
43 #endif
44 
45 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
46 #include <rte_pmd_fpga_5gnr_fec.h>
47 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
48 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
49 #define VF_UL_5G_QUEUE_VALUE 4
50 #define VF_DL_5G_QUEUE_VALUE 4
51 #define UL_5G_BANDWIDTH 3
52 #define DL_5G_BANDWIDTH 3
53 #define UL_5G_LOAD_BALANCE 128
54 #define DL_5G_LOAD_BALANCE 128
55 #endif
56 
57 #ifdef RTE_BASEBAND_ACC
58 #include <rte_acc_cfg.h>
59 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
60 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
61 #define ACC100_QMGR_NUM_AQS 16
62 #define ACC100_QMGR_NUM_QGS 2
63 #define ACC100_QMGR_AQ_DEPTH 5
64 #define ACC100_QMGR_INVALID_IDX -1
65 #define ACC100_QMGR_RR 1
66 #define ACC100_QOS_GBR 0
67 #define ACC200PF_DRIVER_NAME   ("intel_acc200_pf")
68 #define ACC200VF_DRIVER_NAME   ("intel_acc200_vf")
69 #define ACC200_QMGR_NUM_AQS 16
70 #define ACC200_QMGR_NUM_QGS 2
71 #define ACC200_QMGR_AQ_DEPTH 5
72 #define ACC200_QMGR_INVALID_IDX -1
73 #define ACC200_QMGR_RR 1
74 #define ACC200_QOS_GBR 0
75 #endif
76 
77 #define OPS_CACHE_SIZE 256U
78 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
79 
80 #define SYNC_WAIT 0
81 #define SYNC_START 1
82 
83 #define INVALID_QUEUE_ID -1
84 /* Increment for next code block in external HARQ memory */
85 #define HARQ_INCR 32768
86 /* Headroom for filler LLRs insertion in HARQ buffer */
87 #define FILLER_HEADROOM 2048
88 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
89 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
90 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
91 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
92 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
93 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
94 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
95 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
96 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
97 
98 #define HARQ_MEM_TOLERANCE 256
99 static struct test_bbdev_vector test_vector;
100 
101 /* Switch between PMD and Interrupt for throughput TC */
102 static bool intr_enabled;
103 
104 /* LLR arithmetic representation for numerical conversion */
105 static int ldpc_llr_decimals;
106 static int ldpc_llr_size;
107 /* Keep track of the LDPC decoder device capability flag */
108 static uint32_t ldpc_cap_flags;
109 
110 /* Represents tested active devices */
111 static struct active_device {
112 	const char *driver_name;
113 	uint8_t dev_id;
114 	uint16_t supported_ops;
115 	uint16_t queue_ids[MAX_QUEUES];
116 	uint16_t nb_queues;
117 	struct rte_mempool *ops_mempool;
118 	struct rte_mempool *in_mbuf_pool;
119 	struct rte_mempool *hard_out_mbuf_pool;
120 	struct rte_mempool *soft_out_mbuf_pool;
121 	struct rte_mempool *harq_in_mbuf_pool;
122 	struct rte_mempool *harq_out_mbuf_pool;
123 } active_devs[RTE_BBDEV_MAX_DEVS];
124 
125 static uint8_t nb_active_devs;
126 
127 /* Data buffers used by BBDEV ops */
128 struct test_buffers {
129 	struct rte_bbdev_op_data *inputs;
130 	struct rte_bbdev_op_data *hard_outputs;
131 	struct rte_bbdev_op_data *soft_outputs;
132 	struct rte_bbdev_op_data *harq_inputs;
133 	struct rte_bbdev_op_data *harq_outputs;
134 };
135 
136 /* Operation parameters specific for given test case */
137 struct test_op_params {
138 	struct rte_mempool *mp;
139 	struct rte_bbdev_dec_op *ref_dec_op;
140 	struct rte_bbdev_enc_op *ref_enc_op;
141 	struct rte_bbdev_fft_op *ref_fft_op;
142 	uint16_t burst_sz;
143 	uint16_t num_to_process;
144 	uint16_t num_lcores;
145 	int vector_mask;
146 	uint16_t sync;
147 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
148 };
149 
150 /* Contains per lcore params */
151 struct thread_params {
152 	uint8_t dev_id;
153 	uint16_t queue_id;
154 	uint32_t lcore_id;
155 	uint64_t start_time;
156 	double ops_per_sec;
157 	double mbps;
158 	uint8_t iter_count;
159 	double iter_average;
160 	double bler;
161 	uint16_t nb_dequeued;
162 	int16_t processing_status;
163 	uint16_t burst_sz;
164 	struct test_op_params *op_params;
165 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
166 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
167 	struct rte_bbdev_fft_op *fft_ops[MAX_BURST];
168 };
169 
170 /* Stores time statistics */
171 struct test_time_stats {
172 	/* Stores software enqueue total working time */
173 	uint64_t enq_sw_total_time;
174 	/* Stores minimum value of software enqueue working time */
175 	uint64_t enq_sw_min_time;
176 	/* Stores maximum value of software enqueue working time */
177 	uint64_t enq_sw_max_time;
178 	/* Stores turbo enqueue total working time */
179 	uint64_t enq_acc_total_time;
180 	/* Stores minimum value of accelerator enqueue working time */
181 	uint64_t enq_acc_min_time;
182 	/* Stores maximum value of accelerator enqueue working time */
183 	uint64_t enq_acc_max_time;
184 	/* Stores dequeue total working time */
185 	uint64_t deq_total_time;
186 	/* Stores minimum value of dequeue working time */
187 	uint64_t deq_min_time;
188 	/* Stores maximum value of dequeue working time */
189 	uint64_t deq_max_time;
190 };
191 
192 typedef int (test_case_function)(struct active_device *ad,
193 		struct test_op_params *op_params);
194 
195 /* Get device status before timeout exit */
196 static inline void
197 timeout_exit(uint8_t dev_id)
198 {
199 	struct rte_bbdev_info info;
200 	rte_bbdev_info_get(dev_id, &info);
201 	printf("Device Status %s\n", rte_bbdev_device_status_str(info.drv.device_status));
202 }
203 
204 static inline void
205 mbuf_reset(struct rte_mbuf *m)
206 {
207 	m->pkt_len = 0;
208 
209 	do {
210 		m->data_len = 0;
211 		m = m->next;
212 	} while (m != NULL);
213 }
214 
215 /* Read flag value 0/1 from bitmap */
216 static inline bool
217 check_bit(uint32_t bitmap, uint32_t bitmask)
218 {
219 	return bitmap & bitmask;
220 }
221 
222 static inline void
223 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
224 {
225 	ad->supported_ops |= (1 << op_type);
226 }
227 
228 static inline bool
229 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
230 {
231 	return ad->supported_ops & (1 << op_type);
232 }
233 
234 static inline bool
235 flags_match(uint32_t flags_req, uint32_t flags_present)
236 {
237 	return (flags_req & flags_present) == flags_req;
238 }
239 
240 static void
241 clear_soft_out_cap(uint32_t *op_flags)
242 {
243 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
244 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
245 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
246 }
247 
248 /* This API is to convert all the test vector op data entries
249  * to big endian format. It is used when the device supports
250  * the input in the big endian format.
251  */
252 static inline void
253 convert_op_data_to_be(void)
254 {
255 	struct op_data_entries *op;
256 	enum op_data_type type;
257 	uint8_t nb_segs, *rem_data, temp;
258 	uint32_t *data, len;
259 	int complete, rem, i, j;
260 
261 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
262 		nb_segs = test_vector.entries[type].nb_segments;
263 		op = &test_vector.entries[type];
264 
265 		/* Invert byte endianness for all the segments */
266 		for (i = 0; i < nb_segs; ++i) {
267 			len = op->segments[i].length;
268 			data = op->segments[i].addr;
269 
270 			/* Swap complete u32 bytes */
271 			complete = len / 4;
272 			for (j = 0; j < complete; j++)
273 				data[j] = rte_bswap32(data[j]);
274 
275 			/* Swap any remaining bytes */
276 			rem = len % 4;
277 			rem_data = (uint8_t *)&data[j];
278 			for (j = 0; j < rem/2; j++) {
279 				temp = rem_data[j];
280 				rem_data[j] = rem_data[rem - j - 1];
281 				rem_data[rem - j - 1] = temp;
282 			}
283 		}
284 	}
285 }
286 
287 static int
288 check_dev_cap(const struct rte_bbdev_info *dev_info)
289 {
290 	unsigned int i;
291 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
292 		nb_harq_inputs, nb_harq_outputs;
293 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
294 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
295 
296 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
297 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
298 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
299 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
300 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
301 
302 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
303 		if (op_cap->type != test_vector.op_type)
304 			continue;
305 
306 		if (dev_data_endianness == RTE_BIG_ENDIAN)
307 			convert_op_data_to_be();
308 
309 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
310 			const struct rte_bbdev_op_cap_turbo_dec *cap =
311 					&op_cap->cap.turbo_dec;
312 			/* Ignore lack of soft output capability, just skip
313 			 * checking if soft output is valid.
314 			 */
315 			if ((test_vector.turbo_dec.op_flags &
316 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
317 					!(cap->capability_flags &
318 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319 				printf(
320 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
321 					dev_info->dev_name);
322 				clear_soft_out_cap(
323 					&test_vector.turbo_dec.op_flags);
324 			}
325 
326 			if (!flags_match(test_vector.turbo_dec.op_flags,
327 					cap->capability_flags))
328 				return TEST_FAILED;
329 			if (nb_inputs > cap->num_buffers_src) {
330 				printf("Too many inputs defined: %u, max: %u\n",
331 					nb_inputs, cap->num_buffers_src);
332 				return TEST_FAILED;
333 			}
334 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
335 					(test_vector.turbo_dec.op_flags &
336 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
337 				printf(
338 					"Too many soft outputs defined: %u, max: %u\n",
339 						nb_soft_outputs,
340 						cap->num_buffers_soft_out);
341 				return TEST_FAILED;
342 			}
343 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
344 				printf(
345 					"Too many hard outputs defined: %u, max: %u\n",
346 						nb_hard_outputs,
347 						cap->num_buffers_hard_out);
348 				return TEST_FAILED;
349 			}
350 			if (intr_enabled && !(cap->capability_flags &
351 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
352 				printf(
353 					"Dequeue interrupts are not supported!\n");
354 				return TEST_FAILED;
355 			}
356 
357 			return TEST_SUCCESS;
358 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
359 			const struct rte_bbdev_op_cap_turbo_enc *cap =
360 					&op_cap->cap.turbo_enc;
361 
362 			if (!flags_match(test_vector.turbo_enc.op_flags,
363 					cap->capability_flags))
364 				return TEST_FAILED;
365 			if (nb_inputs > cap->num_buffers_src) {
366 				printf("Too many inputs defined: %u, max: %u\n",
367 					nb_inputs, cap->num_buffers_src);
368 				return TEST_FAILED;
369 			}
370 			if (nb_hard_outputs > cap->num_buffers_dst) {
371 				printf(
372 					"Too many hard outputs defined: %u, max: %u\n",
373 					nb_hard_outputs, cap->num_buffers_dst);
374 				return TEST_FAILED;
375 			}
376 			if (intr_enabled && !(cap->capability_flags &
377 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
378 				printf(
379 					"Dequeue interrupts are not supported!\n");
380 				return TEST_FAILED;
381 			}
382 
383 			return TEST_SUCCESS;
384 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
385 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
386 					&op_cap->cap.ldpc_enc;
387 
388 			if (!flags_match(test_vector.ldpc_enc.op_flags,
389 					cap->capability_flags)){
390 				printf("Flag Mismatch\n");
391 				return TEST_FAILED;
392 			}
393 			if (nb_inputs > cap->num_buffers_src) {
394 				printf("Too many inputs defined: %u, max: %u\n",
395 					nb_inputs, cap->num_buffers_src);
396 				return TEST_FAILED;
397 			}
398 			if (nb_hard_outputs > cap->num_buffers_dst) {
399 				printf(
400 					"Too many hard outputs defined: %u, max: %u\n",
401 					nb_hard_outputs, cap->num_buffers_dst);
402 				return TEST_FAILED;
403 			}
404 			if (intr_enabled && !(cap->capability_flags &
405 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
406 				printf(
407 					"Dequeue interrupts are not supported!\n");
408 				return TEST_FAILED;
409 			}
410 
411 			return TEST_SUCCESS;
412 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
413 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
414 					&op_cap->cap.ldpc_dec;
415 
416 			if (!flags_match(test_vector.ldpc_dec.op_flags,
417 					cap->capability_flags)){
418 				printf("Flag Mismatch\n");
419 				return TEST_FAILED;
420 			}
421 			if (nb_inputs > cap->num_buffers_src) {
422 				printf("Too many inputs defined: %u, max: %u\n",
423 					nb_inputs, cap->num_buffers_src);
424 				return TEST_FAILED;
425 			}
426 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
427 				printf(
428 					"Too many hard outputs defined: %u, max: %u\n",
429 					nb_hard_outputs,
430 					cap->num_buffers_hard_out);
431 				return TEST_FAILED;
432 			}
433 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
434 				printf(
435 					"Too many HARQ inputs defined: %u, max: %u\n",
436 					nb_harq_inputs,
437 					cap->num_buffers_hard_out);
438 				return TEST_FAILED;
439 			}
440 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
441 				printf(
442 					"Too many HARQ outputs defined: %u, max: %u\n",
443 					nb_harq_outputs,
444 					cap->num_buffers_hard_out);
445 				return TEST_FAILED;
446 			}
447 			if (intr_enabled && !(cap->capability_flags &
448 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
449 				printf(
450 					"Dequeue interrupts are not supported!\n");
451 				return TEST_FAILED;
452 			}
453 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
454 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
455 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
456 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
457 					))) {
458 				printf("Skip loop-back with interrupt\n");
459 				return TEST_FAILED;
460 			}
461 			return TEST_SUCCESS;
462 		} else if (op_cap->type == RTE_BBDEV_OP_FFT) {
463 			const struct rte_bbdev_op_cap_fft *cap = &op_cap->cap.fft;
464 
465 			if (!flags_match(test_vector.fft.op_flags, cap->capability_flags)) {
466 				printf("Flag Mismatch\n");
467 				return TEST_FAILED;
468 			}
469 			if (nb_inputs > cap->num_buffers_src) {
470 				printf("Too many inputs defined: %u, max: %u\n",
471 					nb_inputs, cap->num_buffers_src);
472 				return TEST_FAILED;
473 			}
474 			return TEST_SUCCESS;
475 		}
476 	}
477 
478 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
479 		return TEST_SUCCESS; /* Special case for NULL device */
480 
481 	return TEST_FAILED;
482 }
483 
484 /* calculates optimal mempool size not smaller than the val */
485 static unsigned int
486 optimal_mempool_size(unsigned int val)
487 {
488 	return rte_align32pow2(val + 1) - 1;
489 }
490 
491 /* allocates mbuf mempool for inputs and outputs */
492 static struct rte_mempool *
493 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
494 		int socket_id, unsigned int mbuf_pool_size,
495 		const char *op_type_str)
496 {
497 	unsigned int i;
498 	uint32_t max_seg_sz = 0;
499 	char pool_name[RTE_MEMPOOL_NAMESIZE];
500 
501 	/* find max input segment size */
502 	for (i = 0; i < entries->nb_segments; ++i)
503 		if (entries->segments[i].length > max_seg_sz)
504 			max_seg_sz = entries->segments[i].length;
505 
506 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
507 			dev_id);
508 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
509 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
510 					+ FILLER_HEADROOM,
511 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
512 }
513 
514 static int
515 create_mempools(struct active_device *ad, int socket_id,
516 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
517 {
518 	struct rte_mempool *mp;
519 	unsigned int ops_pool_size, mbuf_pool_size = 0;
520 	char pool_name[RTE_MEMPOOL_NAMESIZE];
521 	const char *op_type_str;
522 	enum rte_bbdev_op_type op_type = org_op_type;
523 
524 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
525 	struct op_data_entries *hard_out =
526 			&test_vector.entries[DATA_HARD_OUTPUT];
527 	struct op_data_entries *soft_out =
528 			&test_vector.entries[DATA_SOFT_OUTPUT];
529 	struct op_data_entries *harq_in =
530 			&test_vector.entries[DATA_HARQ_INPUT];
531 	struct op_data_entries *harq_out =
532 			&test_vector.entries[DATA_HARQ_OUTPUT];
533 
534 	/* allocate ops mempool */
535 	ops_pool_size = optimal_mempool_size(RTE_MAX(
536 			/* Ops used plus 1 reference op */
537 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
538 			/* Minimal cache size plus 1 reference op */
539 			(unsigned int)(1.5 * rte_lcore_count() *
540 					OPS_CACHE_SIZE + 1)),
541 			OPS_POOL_SIZE_MIN));
542 
543 	if (org_op_type == RTE_BBDEV_OP_NONE)
544 		op_type = RTE_BBDEV_OP_TURBO_ENC;
545 
546 	op_type_str = rte_bbdev_op_type_str(op_type);
547 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
548 
549 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
550 			ad->dev_id);
551 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
552 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
553 	TEST_ASSERT_NOT_NULL(mp,
554 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
555 			ops_pool_size,
556 			ad->dev_id,
557 			socket_id);
558 	ad->ops_mempool = mp;
559 
560 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
561 	if (org_op_type == RTE_BBDEV_OP_NONE)
562 		return TEST_SUCCESS;
563 
564 	/* Inputs */
565 	if (in->nb_segments > 0) {
566 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
567 				in->nb_segments);
568 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
569 				mbuf_pool_size, "in");
570 		TEST_ASSERT_NOT_NULL(mp,
571 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
572 				mbuf_pool_size,
573 				ad->dev_id,
574 				socket_id);
575 		ad->in_mbuf_pool = mp;
576 	}
577 
578 	/* Hard outputs */
579 	if (hard_out->nb_segments > 0) {
580 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
581 				hard_out->nb_segments);
582 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
583 				mbuf_pool_size,
584 				"hard_out");
585 		TEST_ASSERT_NOT_NULL(mp,
586 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
587 				mbuf_pool_size,
588 				ad->dev_id,
589 				socket_id);
590 		ad->hard_out_mbuf_pool = mp;
591 	}
592 
593 	/* Soft outputs */
594 	if (soft_out->nb_segments > 0) {
595 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
596 				soft_out->nb_segments);
597 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
598 				mbuf_pool_size,
599 				"soft_out");
600 		TEST_ASSERT_NOT_NULL(mp,
601 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
602 				mbuf_pool_size,
603 				ad->dev_id,
604 				socket_id);
605 		ad->soft_out_mbuf_pool = mp;
606 	}
607 
608 	/* HARQ inputs */
609 	if (harq_in->nb_segments > 0) {
610 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
611 				harq_in->nb_segments);
612 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
613 				mbuf_pool_size,
614 				"harq_in");
615 		TEST_ASSERT_NOT_NULL(mp,
616 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
617 				mbuf_pool_size,
618 				ad->dev_id,
619 				socket_id);
620 		ad->harq_in_mbuf_pool = mp;
621 	}
622 
623 	/* HARQ outputs */
624 	if (harq_out->nb_segments > 0) {
625 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
626 				harq_out->nb_segments);
627 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
628 				mbuf_pool_size,
629 				"harq_out");
630 		TEST_ASSERT_NOT_NULL(mp,
631 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
632 				mbuf_pool_size,
633 				ad->dev_id,
634 				socket_id);
635 		ad->harq_out_mbuf_pool = mp;
636 	}
637 
638 	return TEST_SUCCESS;
639 }
640 
641 static int
642 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
643 		struct test_bbdev_vector *vector)
644 {
645 	int ret;
646 	unsigned int queue_id;
647 	struct rte_bbdev_queue_conf qconf;
648 	struct active_device *ad = &active_devs[nb_active_devs];
649 	unsigned int nb_queues;
650 	enum rte_bbdev_op_type op_type = vector->op_type;
651 
652 /* Configure fpga lte fec with PF & VF values
653  * if '-i' flag is set and using fpga device
654  */
655 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
656 	if ((get_init_device() == true) &&
657 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
658 		struct rte_fpga_lte_fec_conf conf;
659 		unsigned int i;
660 
661 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
662 				info->drv.driver_name);
663 
664 		/* clear default configuration before initialization */
665 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
666 
667 		/* Set PF mode :
668 		 * true if PF is used for data plane
669 		 * false for VFs
670 		 */
671 		conf.pf_mode_en = true;
672 
673 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
674 			/* Number of UL queues per VF (fpga supports 8 VFs) */
675 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
676 			/* Number of DL queues per VF (fpga supports 8 VFs) */
677 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
678 		}
679 
680 		/* UL bandwidth. Needed for schedule algorithm */
681 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
682 		/* DL bandwidth */
683 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
684 
685 		/* UL & DL load Balance Factor to 64 */
686 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
687 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
688 
689 		/**< FLR timeout value */
690 		conf.flr_time_out = FLR_4G_TIMEOUT;
691 
692 		/* setup FPGA PF with configuration information */
693 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
694 		TEST_ASSERT_SUCCESS(ret,
695 				"Failed to configure 4G FPGA PF for bbdev %s",
696 				info->dev_name);
697 	}
698 #endif
699 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
700 	if ((get_init_device() == true) &&
701 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
702 		struct rte_fpga_5gnr_fec_conf conf;
703 		unsigned int i;
704 
705 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
706 				info->drv.driver_name);
707 
708 		/* clear default configuration before initialization */
709 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
710 
711 		/* Set PF mode :
712 		 * true if PF is used for data plane
713 		 * false for VFs
714 		 */
715 		conf.pf_mode_en = true;
716 
717 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
718 			/* Number of UL queues per VF (fpga supports 8 VFs) */
719 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
720 			/* Number of DL queues per VF (fpga supports 8 VFs) */
721 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
722 		}
723 
724 		/* UL bandwidth. Needed for schedule algorithm */
725 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
726 		/* DL bandwidth */
727 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
728 
729 		/* UL & DL load Balance Factor to 64 */
730 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
731 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
732 
733 		/* setup FPGA PF with configuration information */
734 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
735 		TEST_ASSERT_SUCCESS(ret,
736 				"Failed to configure 5G FPGA PF for bbdev %s",
737 				info->dev_name);
738 	}
739 #endif
740 #ifdef RTE_BASEBAND_ACC
741 	if ((get_init_device() == true) &&
742 			(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
743 		struct rte_acc_conf conf;
744 		unsigned int i;
745 
746 		printf("Configure ACC100/ACC101 FEC Driver %s with default values\n",
747 				info->drv.driver_name);
748 
749 		/* clear default configuration before initialization */
750 		memset(&conf, 0, sizeof(struct rte_acc_conf));
751 
752 		/* Always set in PF mode for built-in configuration */
753 		conf.pf_mode_en = true;
754 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
755 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
756 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
757 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
758 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
759 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
760 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
761 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
762 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
763 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
764 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
765 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
766 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
767 		}
768 
769 		conf.input_pos_llr_1_bit = true;
770 		conf.output_pos_llr_1_bit = true;
771 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
772 
773 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
774 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
775 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
776 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
777 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
778 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
779 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
780 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
781 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
782 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
783 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
784 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
785 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
786 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
787 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
788 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
789 
790 		/* setup PF with configuration information */
791 		ret = rte_acc_configure(info->dev_name, &conf);
792 		TEST_ASSERT_SUCCESS(ret,
793 				"Failed to configure ACC100 PF for bbdev %s",
794 				info->dev_name);
795 	}
796 	if ((get_init_device() == true) &&
797 		(!strcmp(info->drv.driver_name, ACC200PF_DRIVER_NAME))) {
798 		struct rte_acc_conf conf;
799 		unsigned int i;
800 
801 		printf("Configure ACC200 FEC Driver %s with default values\n",
802 				info->drv.driver_name);
803 
804 		/* clear default configuration before initialization */
805 		memset(&conf, 0, sizeof(struct rte_acc_conf));
806 
807 		/* Always set in PF mode for built-in configuration */
808 		conf.pf_mode_en = true;
809 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
810 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
811 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
812 			conf.arb_dl_4g[i].round_robin_weight = ACC200_QMGR_RR;
813 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
814 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
815 			conf.arb_ul_4g[i].round_robin_weight = ACC200_QMGR_RR;
816 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
817 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
818 			conf.arb_dl_5g[i].round_robin_weight = ACC200_QMGR_RR;
819 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
820 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
821 			conf.arb_ul_5g[i].round_robin_weight = ACC200_QMGR_RR;
822 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
823 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
824 			conf.arb_fft[i].round_robin_weight = ACC200_QMGR_RR;
825 		}
826 
827 		conf.input_pos_llr_1_bit = true;
828 		conf.output_pos_llr_1_bit = true;
829 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
830 
831 		conf.q_ul_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
832 		conf.q_ul_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
833 		conf.q_ul_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
834 		conf.q_ul_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
835 		conf.q_dl_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
836 		conf.q_dl_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
837 		conf.q_dl_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
838 		conf.q_dl_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
839 		conf.q_ul_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
840 		conf.q_ul_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
841 		conf.q_ul_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
842 		conf.q_ul_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
843 		conf.q_dl_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
844 		conf.q_dl_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
845 		conf.q_dl_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
846 		conf.q_dl_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
847 		conf.q_fft.num_qgroups = ACC200_QMGR_NUM_QGS;
848 		conf.q_fft.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
849 		conf.q_fft.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
850 		conf.q_fft.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
851 
852 		/* setup PF with configuration information */
853 		ret = rte_acc_configure(info->dev_name, &conf);
854 		TEST_ASSERT_SUCCESS(ret,
855 				"Failed to configure ACC200 PF for bbdev %s",
856 				info->dev_name);
857 	}
858 #endif
859 	/* Let's refresh this now this is configured */
860 	rte_bbdev_info_get(dev_id, info);
861 	if (info->drv.device_status == RTE_BBDEV_DEV_FATAL_ERR)
862 		printf("Device Status %s\n", rte_bbdev_device_status_str(info->drv.device_status));
863 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
864 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
865 
866 	/* setup device */
867 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
868 	if (ret < 0) {
869 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
870 				dev_id, nb_queues, info->socket_id, ret);
871 		return TEST_FAILED;
872 	}
873 
874 	/* configure interrupts if needed */
875 	if (intr_enabled) {
876 		ret = rte_bbdev_intr_enable(dev_id);
877 		if (ret < 0) {
878 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
879 					ret);
880 			return TEST_FAILED;
881 		}
882 	}
883 
884 	/* setup device queues */
885 	qconf.socket = info->socket_id;
886 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
887 	qconf.priority = 0;
888 	qconf.deferred_start = 0;
889 	qconf.op_type = op_type;
890 
891 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
892 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
893 		if (ret != 0) {
894 			printf(
895 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
896 					queue_id, qconf.priority, dev_id);
897 			qconf.priority++;
898 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf);
899 		}
900 		if (ret != 0) {
901 			printf("All queues on dev %u allocated: %u\n", dev_id, queue_id);
902 			break;
903 		}
904 		ret = rte_bbdev_queue_start(ad->dev_id, queue_id);
905 		if (ret != 0) {
906 			printf("Failed to start queue on dev %u q_id: %u\n", dev_id, queue_id);
907 			break;
908 		}
909 		ad->queue_ids[queue_id] = queue_id;
910 	}
911 	TEST_ASSERT(queue_id != 0,
912 			"ERROR Failed to configure any queues on dev %u\n"
913 			"\tthe device may not support the related operation capability\n"
914 			"\tor the device may not have been configured yet", dev_id);
915 	ad->nb_queues = queue_id;
916 
917 	set_avail_op(ad, op_type);
918 
919 	return TEST_SUCCESS;
920 }
921 
922 static int
923 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
924 		struct test_bbdev_vector *vector)
925 {
926 	int ret;
927 
928 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
929 	active_devs[nb_active_devs].dev_id = dev_id;
930 
931 	ret = add_bbdev_dev(dev_id, info, vector);
932 	if (ret == TEST_SUCCESS)
933 		++nb_active_devs;
934 	return ret;
935 }
936 
937 static uint8_t
938 populate_active_devices(void)
939 {
940 	int ret;
941 	uint8_t dev_id;
942 	uint8_t nb_devs_added = 0;
943 	struct rte_bbdev_info info;
944 
945 	RTE_BBDEV_FOREACH(dev_id) {
946 		rte_bbdev_info_get(dev_id, &info);
947 
948 		if (check_dev_cap(&info)) {
949 			printf(
950 				"Device %d (%s) does not support specified capabilities\n",
951 					dev_id, info.dev_name);
952 			continue;
953 		}
954 
955 		ret = add_active_device(dev_id, &info, &test_vector);
956 		if (ret != 0) {
957 			printf("Adding active bbdev %s skipped\n",
958 					info.dev_name);
959 			continue;
960 		}
961 		nb_devs_added++;
962 	}
963 
964 	return nb_devs_added;
965 }
966 
967 static int
968 read_test_vector(void)
969 {
970 	int ret;
971 
972 	memset(&test_vector, 0, sizeof(test_vector));
973 	printf("Test vector file = %s\n", get_vector_filename());
974 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
975 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
976 			get_vector_filename());
977 
978 	return TEST_SUCCESS;
979 }
980 
981 static int
982 testsuite_setup(void)
983 {
984 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
985 
986 	if (populate_active_devices() == 0) {
987 		printf("No suitable devices found!\n");
988 		return TEST_SKIPPED;
989 	}
990 
991 	return TEST_SUCCESS;
992 }
993 
994 static int
995 interrupt_testsuite_setup(void)
996 {
997 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
998 
999 	/* Enable interrupts */
1000 	intr_enabled = true;
1001 
1002 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
1003 	if (populate_active_devices() == 0 ||
1004 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
1005 		intr_enabled = false;
1006 		printf("No suitable devices found!\n");
1007 		return TEST_SKIPPED;
1008 	}
1009 
1010 	return TEST_SUCCESS;
1011 }
1012 
1013 static void
1014 testsuite_teardown(void)
1015 {
1016 	uint8_t dev_id;
1017 
1018 	/* Unconfigure devices */
1019 	RTE_BBDEV_FOREACH(dev_id)
1020 		rte_bbdev_close(dev_id);
1021 
1022 	/* Clear active devices structs. */
1023 	memset(active_devs, 0, sizeof(active_devs));
1024 	nb_active_devs = 0;
1025 
1026 	/* Disable interrupts */
1027 	intr_enabled = false;
1028 }
1029 
1030 static int
1031 ut_setup(void)
1032 {
1033 	uint8_t i, dev_id;
1034 
1035 	for (i = 0; i < nb_active_devs; i++) {
1036 		dev_id = active_devs[i].dev_id;
1037 		/* reset bbdev stats */
1038 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
1039 				"Failed to reset stats of bbdev %u", dev_id);
1040 		/* start the device */
1041 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
1042 				"Failed to start bbdev %u", dev_id);
1043 	}
1044 
1045 	return TEST_SUCCESS;
1046 }
1047 
1048 static void
1049 ut_teardown(void)
1050 {
1051 	uint8_t i, dev_id;
1052 	struct rte_bbdev_stats stats;
1053 
1054 	for (i = 0; i < nb_active_devs; i++) {
1055 		dev_id = active_devs[i].dev_id;
1056 		/* read stats and print */
1057 		rte_bbdev_stats_get(dev_id, &stats);
1058 		/* Stop the device */
1059 		rte_bbdev_stop(dev_id);
1060 	}
1061 }
1062 
1063 static int
1064 init_op_data_objs(struct rte_bbdev_op_data *bufs,
1065 		struct op_data_entries *ref_entries,
1066 		struct rte_mempool *mbuf_pool, const uint16_t n,
1067 		enum op_data_type op_type, uint16_t min_alignment)
1068 {
1069 	int ret;
1070 	unsigned int i, j;
1071 	bool large_input = false;
1072 
1073 	for (i = 0; i < n; ++i) {
1074 		char *data;
1075 		struct op_data_buf *seg = &ref_entries->segments[0];
1076 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
1077 		TEST_ASSERT_NOT_NULL(m_head,
1078 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1079 				op_type, n * ref_entries->nb_segments,
1080 				mbuf_pool->size);
1081 
1082 		if ((seg->length + RTE_PKTMBUF_HEADROOM) > RTE_BBDEV_LDPC_E_MAX_MBUF) {
1083 			/*
1084 			 * Special case when DPDK mbuf cannot handle
1085 			 * the required input size
1086 			 */
1087 			large_input = true;
1088 		}
1089 		bufs[i].data = m_head;
1090 		bufs[i].offset = 0;
1091 		bufs[i].length = 0;
1092 
1093 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1094 			if (large_input) {
1095 				/* Allocate a fake overused mbuf */
1096 				data = rte_malloc(NULL, seg->length, 0);
1097 				TEST_ASSERT_NOT_NULL(data,
1098 					"rte malloc failed with %u bytes",
1099 					seg->length);
1100 				memcpy(data, seg->addr, seg->length);
1101 				m_head->buf_addr = data;
1102 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1103 				m_head->data_off = 0;
1104 				m_head->data_len = seg->length;
1105 			} else {
1106 				data = rte_pktmbuf_append(m_head, seg->length);
1107 				TEST_ASSERT_NOT_NULL(data,
1108 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1109 					seg->length, op_type);
1110 
1111 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1112 						data, min_alignment),
1113 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1114 					data, min_alignment);
1115 				rte_memcpy(data, seg->addr, seg->length);
1116 			}
1117 
1118 			bufs[i].length += seg->length;
1119 
1120 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1121 				struct rte_mbuf *m_tail =
1122 						rte_pktmbuf_alloc(mbuf_pool);
1123 				TEST_ASSERT_NOT_NULL(m_tail,
1124 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1125 						op_type,
1126 						n * ref_entries->nb_segments,
1127 						mbuf_pool->size);
1128 				seg += 1;
1129 
1130 				data = rte_pktmbuf_append(m_tail, seg->length);
1131 				TEST_ASSERT_NOT_NULL(data,
1132 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1133 						seg->length, op_type);
1134 
1135 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1136 						min_alignment),
1137 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1138 						data, min_alignment);
1139 				rte_memcpy(data, seg->addr, seg->length);
1140 				bufs[i].length += seg->length;
1141 
1142 				ret = rte_pktmbuf_chain(m_head, m_tail);
1143 				TEST_ASSERT_SUCCESS(ret,
1144 						"Couldn't chain mbufs from %d data type mbuf pool",
1145 						op_type);
1146 			}
1147 		} else {
1148 			if (((op_type == DATA_HARD_OUTPUT) || (op_type == DATA_SOFT_OUTPUT))
1149 					&& ((seg->length + RTE_PKTMBUF_HEADROOM)
1150 					> RTE_BBDEV_LDPC_E_MAX_MBUF)) {
1151 				/* Allocate a fake overused mbuf + margin */
1152 				data = rte_malloc(NULL, seg->length + 1024, 0);
1153 				TEST_ASSERT_NOT_NULL(data,
1154 					"rte malloc failed with %u bytes",
1155 					seg->length + 1024);
1156 				m_head->buf_addr = data;
1157 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1158 				m_head->data_off = 0;
1159 				m_head->data_len = seg->length;
1160 			} else {
1161 				/* allocate chained-mbuf for output buffer */
1162 				for (j = 1; j < ref_entries->nb_segments; ++j) {
1163 					struct rte_mbuf *m_tail =
1164 						rte_pktmbuf_alloc(mbuf_pool);
1165 					TEST_ASSERT_NOT_NULL(m_tail,
1166 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1167 						op_type,
1168 						n * ref_entries->nb_segments,
1169 						mbuf_pool->size);
1170 
1171 					ret = rte_pktmbuf_chain(m_head, m_tail);
1172 					TEST_ASSERT_SUCCESS(ret,
1173 						"Couldn't chain mbufs from %d data type mbuf pool",
1174 						op_type);
1175 				}
1176 			}
1177 			bufs[i].length += seg->length;
1178 		}
1179 	}
1180 
1181 	return 0;
1182 }
1183 
1184 static int
1185 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1186 		const int socket)
1187 {
1188 	int i;
1189 
1190 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1191 	if (*buffers == NULL) {
1192 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1193 				socket);
1194 		/* try to allocate memory on other detected sockets */
1195 		for (i = 0; i < socket; i++) {
1196 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1197 			if (*buffers != NULL)
1198 				break;
1199 		}
1200 	}
1201 
1202 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1203 }
1204 
1205 static void
1206 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1207 		const uint16_t n, const int8_t max_llr_modulus)
1208 {
1209 	uint16_t i, byte_idx;
1210 
1211 	for (i = 0; i < n; ++i) {
1212 		struct rte_mbuf *m = input_ops[i].data;
1213 		while (m != NULL) {
1214 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1215 					input_ops[i].offset);
1216 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1217 					++byte_idx)
1218 				llr[byte_idx] = round((double)max_llr_modulus *
1219 						llr[byte_idx] / INT8_MAX);
1220 
1221 			m = m->next;
1222 		}
1223 	}
1224 }
1225 
1226 /*
1227  * We may have to insert filler bits
1228  * when they are required by the HARQ assumption
1229  */
1230 static void
1231 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1232 		const uint16_t n, struct test_op_params *op_params)
1233 {
1234 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1235 
1236 	if (input_ops == NULL)
1237 		return;
1238 	/* No need to add filler if not required by device */
1239 	if (!(ldpc_cap_flags &
1240 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1241 		return;
1242 	/* No need to add filler for loopback operation */
1243 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1244 		return;
1245 
1246 	uint16_t i, j, parity_offset;
1247 	for (i = 0; i < n; ++i) {
1248 		struct rte_mbuf *m = input_ops[i].data;
1249 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1250 				input_ops[i].offset);
1251 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1252 				* dec.z_c - dec.n_filler;
1253 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1254 		m->data_len = new_hin_size;
1255 		input_ops[i].length = new_hin_size;
1256 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1257 				j--)
1258 			llr[j] = llr[j - dec.n_filler];
1259 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1260 		for (j = 0; j < dec.n_filler; j++)
1261 			llr[parity_offset + j] = llr_max_pre_scaling;
1262 	}
1263 }
1264 
1265 static void
1266 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1267 		const uint16_t n, const int8_t llr_size,
1268 		const int8_t llr_decimals)
1269 {
1270 	if (input_ops == NULL)
1271 		return;
1272 
1273 	uint16_t i, byte_idx;
1274 
1275 	int16_t llr_max, llr_min, llr_tmp;
1276 	llr_max = (1 << (llr_size - 1)) - 1;
1277 	llr_min = -llr_max;
1278 	for (i = 0; i < n; ++i) {
1279 		struct rte_mbuf *m = input_ops[i].data;
1280 		while (m != NULL) {
1281 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1282 					input_ops[i].offset);
1283 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1284 					++byte_idx) {
1285 
1286 				llr_tmp = llr[byte_idx];
1287 				if (llr_decimals == 4)
1288 					llr_tmp *= 8;
1289 				else if (llr_decimals == 2)
1290 					llr_tmp *= 2;
1291 				else if (llr_decimals == 0)
1292 					llr_tmp /= 2;
1293 				llr_tmp = RTE_MIN(llr_max,
1294 						RTE_MAX(llr_min, llr_tmp));
1295 				llr[byte_idx] = (int8_t) llr_tmp;
1296 			}
1297 
1298 			m = m->next;
1299 		}
1300 	}
1301 }
1302 
1303 
1304 
1305 static int
1306 fill_queue_buffers(struct test_op_params *op_params,
1307 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1308 		struct rte_mempool *soft_out_mp,
1309 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1310 		uint16_t queue_id,
1311 		const struct rte_bbdev_op_cap *capabilities,
1312 		uint16_t min_alignment, const int socket_id)
1313 {
1314 	int ret;
1315 	enum op_data_type type;
1316 	const uint16_t n = op_params->num_to_process;
1317 
1318 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1319 		in_mp,
1320 		soft_out_mp,
1321 		hard_out_mp,
1322 		harq_in_mp,
1323 		harq_out_mp,
1324 	};
1325 
1326 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1327 		&op_params->q_bufs[socket_id][queue_id].inputs,
1328 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1329 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1330 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1331 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1332 	};
1333 
1334 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1335 		struct op_data_entries *ref_entries =
1336 				&test_vector.entries[type];
1337 		if (ref_entries->nb_segments == 0)
1338 			continue;
1339 
1340 		ret = allocate_buffers_on_socket(queue_ops[type],
1341 				n * sizeof(struct rte_bbdev_op_data),
1342 				socket_id);
1343 		TEST_ASSERT_SUCCESS(ret,
1344 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1345 
1346 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1347 				mbuf_pools[type], n, type, min_alignment);
1348 		TEST_ASSERT_SUCCESS(ret,
1349 				"Couldn't init rte_bbdev_op_data structs");
1350 	}
1351 
1352 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1353 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1354 			capabilities->cap.turbo_dec.max_llr_modulus);
1355 
1356 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1357 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1358 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1359 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1360 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1361 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1362 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1363 
1364 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1365 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1366 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1367 		if (!loopback && !llr_comp)
1368 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1369 					ldpc_llr_size, ldpc_llr_decimals);
1370 		if (!loopback && !harq_comp)
1371 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1372 					ldpc_llr_size, ldpc_llr_decimals);
1373 		if (!loopback)
1374 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1375 					op_params);
1376 	}
1377 
1378 	return 0;
1379 }
1380 
1381 static void
1382 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1383 {
1384 	unsigned int i, j;
1385 
1386 	rte_mempool_free(ad->ops_mempool);
1387 	rte_mempool_free(ad->in_mbuf_pool);
1388 	rte_mempool_free(ad->hard_out_mbuf_pool);
1389 	rte_mempool_free(ad->soft_out_mbuf_pool);
1390 	rte_mempool_free(ad->harq_in_mbuf_pool);
1391 	rte_mempool_free(ad->harq_out_mbuf_pool);
1392 
1393 	for (i = 0; i < rte_lcore_count(); ++i) {
1394 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1395 			rte_free(op_params->q_bufs[j][i].inputs);
1396 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1397 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1398 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1399 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1400 		}
1401 	}
1402 }
1403 
1404 static void
1405 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1406 		unsigned int start_idx,
1407 		struct rte_bbdev_op_data *inputs,
1408 		struct rte_bbdev_op_data *hard_outputs,
1409 		struct rte_bbdev_op_data *soft_outputs,
1410 		struct rte_bbdev_dec_op *ref_op)
1411 {
1412 	unsigned int i;
1413 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1414 
1415 	for (i = 0; i < n; ++i) {
1416 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1417 			ops[i]->turbo_dec.tb_params.ea =
1418 					turbo_dec->tb_params.ea;
1419 			ops[i]->turbo_dec.tb_params.eb =
1420 					turbo_dec->tb_params.eb;
1421 			ops[i]->turbo_dec.tb_params.k_pos =
1422 					turbo_dec->tb_params.k_pos;
1423 			ops[i]->turbo_dec.tb_params.k_neg =
1424 					turbo_dec->tb_params.k_neg;
1425 			ops[i]->turbo_dec.tb_params.c =
1426 					turbo_dec->tb_params.c;
1427 			ops[i]->turbo_dec.tb_params.c_neg =
1428 					turbo_dec->tb_params.c_neg;
1429 			ops[i]->turbo_dec.tb_params.cab =
1430 					turbo_dec->tb_params.cab;
1431 			ops[i]->turbo_dec.tb_params.r =
1432 					turbo_dec->tb_params.r;
1433 		} else {
1434 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1435 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1436 		}
1437 
1438 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1439 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1440 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1441 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1442 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1443 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1444 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1445 
1446 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1447 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1448 		if (soft_outputs != NULL)
1449 			ops[i]->turbo_dec.soft_output =
1450 				soft_outputs[start_idx + i];
1451 	}
1452 }
1453 
1454 static void
1455 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1456 		unsigned int start_idx,
1457 		struct rte_bbdev_op_data *inputs,
1458 		struct rte_bbdev_op_data *outputs,
1459 		struct rte_bbdev_enc_op *ref_op)
1460 {
1461 	unsigned int i;
1462 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1463 	for (i = 0; i < n; ++i) {
1464 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1465 			ops[i]->turbo_enc.tb_params.ea =
1466 					turbo_enc->tb_params.ea;
1467 			ops[i]->turbo_enc.tb_params.eb =
1468 					turbo_enc->tb_params.eb;
1469 			ops[i]->turbo_enc.tb_params.k_pos =
1470 					turbo_enc->tb_params.k_pos;
1471 			ops[i]->turbo_enc.tb_params.k_neg =
1472 					turbo_enc->tb_params.k_neg;
1473 			ops[i]->turbo_enc.tb_params.c =
1474 					turbo_enc->tb_params.c;
1475 			ops[i]->turbo_enc.tb_params.c_neg =
1476 					turbo_enc->tb_params.c_neg;
1477 			ops[i]->turbo_enc.tb_params.cab =
1478 					turbo_enc->tb_params.cab;
1479 			ops[i]->turbo_enc.tb_params.ncb_pos =
1480 					turbo_enc->tb_params.ncb_pos;
1481 			ops[i]->turbo_enc.tb_params.ncb_neg =
1482 					turbo_enc->tb_params.ncb_neg;
1483 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1484 		} else {
1485 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1486 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1487 			ops[i]->turbo_enc.cb_params.ncb =
1488 					turbo_enc->cb_params.ncb;
1489 		}
1490 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1491 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1492 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1493 
1494 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1495 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1496 	}
1497 }
1498 
1499 
1500 /* Returns a random number drawn from a normal distribution
1501  * with mean of 0 and variance of 1
1502  * Marsaglia algorithm
1503  */
1504 static double
1505 randn(int n)
1506 {
1507 	double S, Z, U1, U2, u, v, fac;
1508 
1509 	do {
1510 		U1 = (double)rand() / RAND_MAX;
1511 		U2 = (double)rand() / RAND_MAX;
1512 		u = 2. * U1 - 1.;
1513 		v = 2. * U2 - 1.;
1514 		S = u * u + v * v;
1515 	} while (S >= 1 || S == 0);
1516 	fac = sqrt(-2. * log(S) / S);
1517 	Z = (n % 2) ? u * fac : v * fac;
1518 	return Z;
1519 }
1520 
1521 static inline double
1522 maxstar(double A, double B)
1523 {
1524 	if (fabs(A - B) > 5)
1525 		return RTE_MAX(A, B);
1526 	else
1527 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1528 }
1529 
1530 /*
1531  * Generate Qm LLRS for Qm==8
1532  * Modulation, AWGN and LLR estimation from max log development
1533  */
1534 static void
1535 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1536 {
1537 	int qm = 8;
1538 	int qam = 256;
1539 	int m, k;
1540 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1541 	/* 5.1.4 of TS38.211 */
1542 	const double symbols_I[256] = {
1543 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1544 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1545 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1546 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1547 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1548 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1549 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1550 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1551 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1552 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1553 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1554 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1555 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1556 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1557 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1558 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1559 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1560 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1561 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1562 			-13, -13, -15, -15, -13, -13, -15, -15};
1563 	const double symbols_Q[256] = {
1564 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1565 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1566 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1567 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1568 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1569 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1570 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1571 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1572 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1573 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1574 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1575 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1576 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1577 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1578 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1579 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1580 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1581 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1582 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1583 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1584 	/* Average constellation point energy */
1585 	N0 *= 170.0;
1586 	for (k = 0; k < qm; k++)
1587 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1588 	/* 5.1.4 of TS38.211 */
1589 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1590 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1591 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1592 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1593 	/* AWGN channel */
1594 	I += sqrt(N0 / 2) * randn(0);
1595 	Q += sqrt(N0 / 2) * randn(1);
1596 	/*
1597 	 * Calculate the log of the probability that each of
1598 	 * the constellation points was transmitted
1599 	 */
1600 	for (m = 0; m < qam; m++)
1601 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1602 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1603 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1604 	for (k = 0; k < qm; k++) {
1605 		p0 = -999999;
1606 		p1 = -999999;
1607 		/* For each constellation point */
1608 		for (m = 0; m < qam; m++) {
1609 			if ((m >> (qm - k - 1)) & 1)
1610 				p1 = maxstar(p1, log_syml_prob[m]);
1611 			else
1612 				p0 = maxstar(p0, log_syml_prob[m]);
1613 		}
1614 		/* Calculate the LLR */
1615 		llr_ = p0 - p1;
1616 		llr_ *= (1 << ldpc_llr_decimals);
1617 		llr_ = round(llr_);
1618 		if (llr_ > llr_max)
1619 			llr_ = llr_max;
1620 		if (llr_ < -llr_max)
1621 			llr_ = -llr_max;
1622 		llrs[qm * i + k] = (int8_t) llr_;
1623 	}
1624 }
1625 
1626 
1627 /*
1628  * Generate Qm LLRS for Qm==6
1629  * Modulation, AWGN and LLR estimation from max log development
1630  */
1631 static void
1632 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1633 {
1634 	int qm = 6;
1635 	int qam = 64;
1636 	int m, k;
1637 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1638 	/* 5.1.4 of TS38.211 */
1639 	const double symbols_I[64] = {
1640 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1641 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1642 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1643 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1644 			-5, -5, -7, -7, -5, -5, -7, -7};
1645 	const double symbols_Q[64] = {
1646 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1647 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1648 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1649 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1650 			-3, -1, -3, -1, -5, -7, -5, -7};
1651 	/* Average constellation point energy */
1652 	N0 *= 42.0;
1653 	for (k = 0; k < qm; k++)
1654 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1655 	/* 5.1.4 of TS38.211 */
1656 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1657 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1658 	/* AWGN channel */
1659 	I += sqrt(N0 / 2) * randn(0);
1660 	Q += sqrt(N0 / 2) * randn(1);
1661 	/*
1662 	 * Calculate the log of the probability that each of
1663 	 * the constellation points was transmitted
1664 	 */
1665 	for (m = 0; m < qam; m++)
1666 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1667 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1668 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1669 	for (k = 0; k < qm; k++) {
1670 		p0 = -999999;
1671 		p1 = -999999;
1672 		/* For each constellation point */
1673 		for (m = 0; m < qam; m++) {
1674 			if ((m >> (qm - k - 1)) & 1)
1675 				p1 = maxstar(p1, log_syml_prob[m]);
1676 			else
1677 				p0 = maxstar(p0, log_syml_prob[m]);
1678 		}
1679 		/* Calculate the LLR */
1680 		llr_ = p0 - p1;
1681 		llr_ *= (1 << ldpc_llr_decimals);
1682 		llr_ = round(llr_);
1683 		if (llr_ > llr_max)
1684 			llr_ = llr_max;
1685 		if (llr_ < -llr_max)
1686 			llr_ = -llr_max;
1687 		llrs[qm * i + k] = (int8_t) llr_;
1688 	}
1689 }
1690 
1691 /*
1692  * Generate Qm LLRS for Qm==4
1693  * Modulation, AWGN and LLR estimation from max log development
1694  */
1695 static void
1696 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1697 {
1698 	int qm = 4;
1699 	int qam = 16;
1700 	int m, k;
1701 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1702 	/* 5.1.4 of TS38.211 */
1703 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1704 			-1, -1, -3, -3, -1, -1, -3, -3};
1705 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1706 			1, 3, 1, 3, -1, -3, -1, -3};
1707 	/* Average constellation point energy */
1708 	N0 *= 10.0;
1709 	for (k = 0; k < qm; k++)
1710 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1711 	/* 5.1.4 of TS38.211 */
1712 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1713 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1714 	/* AWGN channel */
1715 	I += sqrt(N0 / 2) * randn(0);
1716 	Q += sqrt(N0 / 2) * randn(1);
1717 	/*
1718 	 * Calculate the log of the probability that each of
1719 	 * the constellation points was transmitted
1720 	 */
1721 	for (m = 0; m < qam; m++)
1722 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1723 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1724 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1725 	for (k = 0; k < qm; k++) {
1726 		p0 = -999999;
1727 		p1 = -999999;
1728 		/* For each constellation point */
1729 		for (m = 0; m < qam; m++) {
1730 			if ((m >> (qm - k - 1)) & 1)
1731 				p1 = maxstar(p1, log_syml_prob[m]);
1732 			else
1733 				p0 = maxstar(p0, log_syml_prob[m]);
1734 		}
1735 		/* Calculate the LLR */
1736 		llr_ = p0 - p1;
1737 		llr_ *= (1 << ldpc_llr_decimals);
1738 		llr_ = round(llr_);
1739 		if (llr_ > llr_max)
1740 			llr_ = llr_max;
1741 		if (llr_ < -llr_max)
1742 			llr_ = -llr_max;
1743 		llrs[qm * i + k] = (int8_t) llr_;
1744 	}
1745 }
1746 
1747 static void
1748 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1749 {
1750 	double b, b1, n;
1751 	double coeff = 2.0 * sqrt(N0);
1752 
1753 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1754 	if (llrs[j] < 8 && llrs[j] > -8)
1755 		return;
1756 
1757 	/* Note don't change sign here */
1758 	n = randn(j % 2);
1759 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1760 			+ coeff * n) / N0;
1761 	b = b1 * (1 << ldpc_llr_decimals);
1762 	b = round(b);
1763 	if (b > llr_max)
1764 		b = llr_max;
1765 	if (b < -llr_max)
1766 		b = -llr_max;
1767 	llrs[j] = (int8_t) b;
1768 }
1769 
1770 /* Simple LLR generation assuming AWGN and QPSK */
1771 static void
1772 gen_turbo_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1773 {
1774 	double b, b1, n;
1775 	double coeff = 2.0 * sqrt(N0);
1776 
1777 	/* Ignore in vectors null LLRs not to be saturated */
1778 	if (llrs[j] == 0)
1779 		return;
1780 
1781 	/* Note don't change sign here */
1782 	n = randn(j % 2);
1783 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1784 			+ coeff * n) / N0;
1785 	b = b1 * (1 << 4);
1786 	b = round(b);
1787 	if (b > llr_max)
1788 		b = llr_max;
1789 	if (b < -llr_max)
1790 		b = -llr_max;
1791 	llrs[j] = (int8_t) b;
1792 }
1793 
1794 /* Generate LLR for a given SNR */
1795 static void
1796 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1797 		struct rte_bbdev_dec_op *ref_op)
1798 {
1799 	struct rte_mbuf *m;
1800 	uint16_t qm;
1801 	uint32_t i, j, e, range;
1802 	double N0, llr_max;
1803 
1804 	e = ref_op->ldpc_dec.cb_params.e;
1805 	qm = ref_op->ldpc_dec.q_m;
1806 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1807 	range = e / qm;
1808 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1809 
1810 	for (i = 0; i < n; ++i) {
1811 		m = inputs[i].data;
1812 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1813 		if (qm == 8) {
1814 			for (j = 0; j < range; ++j)
1815 				gen_qm8_llr(llrs, j, N0, llr_max);
1816 		} else if (qm == 6) {
1817 			for (j = 0; j < range; ++j)
1818 				gen_qm6_llr(llrs, j, N0, llr_max);
1819 		} else if (qm == 4) {
1820 			for (j = 0; j < range; ++j)
1821 				gen_qm4_llr(llrs, j, N0, llr_max);
1822 		} else {
1823 			for (j = 0; j < e; ++j)
1824 				gen_qm2_llr(llrs, j, N0, llr_max);
1825 		}
1826 	}
1827 }
1828 
1829 /* Generate LLR for turbo decoder for a given SNR */
1830 static void
1831 generate_turbo_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1832 		struct rte_bbdev_dec_op *ref_op)
1833 {
1834 	struct rte_mbuf *m;
1835 	uint32_t i, j, range;
1836 	double N0, llr_max;
1837 
1838 	llr_max = 127;
1839 	range = ref_op->turbo_dec.input.length;
1840 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1841 
1842 	for (i = 0; i < n; ++i) {
1843 		m = inputs[i].data;
1844 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1845 		for (j = 0; j < range; ++j)
1846 			gen_turbo_llr(llrs, j, N0, llr_max);
1847 	}
1848 }
1849 
1850 static void
1851 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1852 		unsigned int start_idx,
1853 		struct rte_bbdev_op_data *inputs,
1854 		struct rte_bbdev_op_data *hard_outputs,
1855 		struct rte_bbdev_op_data *soft_outputs,
1856 		struct rte_bbdev_op_data *harq_inputs,
1857 		struct rte_bbdev_op_data *harq_outputs,
1858 		struct rte_bbdev_dec_op *ref_op)
1859 {
1860 	unsigned int i;
1861 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1862 
1863 	for (i = 0; i < n; ++i) {
1864 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1865 			ops[i]->ldpc_dec.tb_params.ea =
1866 					ldpc_dec->tb_params.ea;
1867 			ops[i]->ldpc_dec.tb_params.eb =
1868 					ldpc_dec->tb_params.eb;
1869 			ops[i]->ldpc_dec.tb_params.c =
1870 					ldpc_dec->tb_params.c;
1871 			ops[i]->ldpc_dec.tb_params.cab =
1872 					ldpc_dec->tb_params.cab;
1873 			ops[i]->ldpc_dec.tb_params.r =
1874 					ldpc_dec->tb_params.r;
1875 		} else {
1876 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1877 		}
1878 
1879 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1880 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1881 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1882 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1883 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1884 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1885 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1886 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1887 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1888 
1889 		if (hard_outputs != NULL)
1890 			ops[i]->ldpc_dec.hard_output =
1891 					hard_outputs[start_idx + i];
1892 		if (inputs != NULL)
1893 			ops[i]->ldpc_dec.input =
1894 					inputs[start_idx + i];
1895 		if (soft_outputs != NULL)
1896 			ops[i]->ldpc_dec.soft_output =
1897 					soft_outputs[start_idx + i];
1898 		if (harq_inputs != NULL)
1899 			ops[i]->ldpc_dec.harq_combined_input =
1900 					harq_inputs[start_idx + i];
1901 		if (harq_outputs != NULL)
1902 			ops[i]->ldpc_dec.harq_combined_output =
1903 					harq_outputs[start_idx + i];
1904 	}
1905 }
1906 
1907 
1908 static void
1909 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1910 		unsigned int start_idx,
1911 		struct rte_bbdev_op_data *inputs,
1912 		struct rte_bbdev_op_data *outputs,
1913 		struct rte_bbdev_enc_op *ref_op)
1914 {
1915 	unsigned int i;
1916 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1917 	for (i = 0; i < n; ++i) {
1918 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1919 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1920 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1921 			ops[i]->ldpc_enc.tb_params.cab =
1922 					ldpc_enc->tb_params.cab;
1923 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1924 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1925 		} else {
1926 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1927 		}
1928 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1929 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1930 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1931 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1932 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1933 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1934 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1935 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1936 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1937 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1938 	}
1939 }
1940 
1941 static void
1942 copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n,
1943 		unsigned int start_idx, struct rte_bbdev_op_data *inputs,
1944 		struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *pwrouts,
1945 		struct rte_bbdev_fft_op *ref_op)
1946 {
1947 	unsigned int i, j;
1948 	struct rte_bbdev_op_fft *fft = &ref_op->fft;
1949 	for (i = 0; i < n; i++) {
1950 		ops[i]->fft.input_sequence_size = fft->input_sequence_size;
1951 		ops[i]->fft.input_leading_padding = fft->input_leading_padding;
1952 		ops[i]->fft.output_sequence_size = fft->output_sequence_size;
1953 		ops[i]->fft.output_leading_depadding =
1954 				fft->output_leading_depadding;
1955 		for (j = 0; j < RTE_BBDEV_MAX_CS_2; j++)
1956 			ops[i]->fft.window_index[j] = fft->window_index[j];
1957 		ops[i]->fft.cs_bitmap = fft->cs_bitmap;
1958 		ops[i]->fft.num_antennas_log2 = fft->num_antennas_log2;
1959 		ops[i]->fft.idft_log2 = fft->idft_log2;
1960 		ops[i]->fft.dft_log2 = fft->dft_log2;
1961 		ops[i]->fft.cs_time_adjustment = fft->cs_time_adjustment;
1962 		ops[i]->fft.idft_shift = fft->idft_shift;
1963 		ops[i]->fft.dft_shift = fft->dft_shift;
1964 		ops[i]->fft.ncs_reciprocal = fft->ncs_reciprocal;
1965 		ops[i]->fft.power_shift = fft->power_shift;
1966 		ops[i]->fft.fp16_exp_adjust = fft->fp16_exp_adjust;
1967 		ops[i]->fft.base_output = outputs[start_idx + i];
1968 		ops[i]->fft.base_input = inputs[start_idx + i];
1969 		if (pwrouts != NULL)
1970 			ops[i]->fft.power_meas_output = pwrouts[start_idx + i];
1971 		ops[i]->fft.op_flags = fft->op_flags;
1972 	}
1973 }
1974 
1975 static int
1976 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1977 		unsigned int order_idx, const int expected_status)
1978 {
1979 	int status = op->status;
1980 	/* ignore parity mismatch false alarms for long iterations */
1981 	if (get_iter_max() >= 10) {
1982 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1983 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1984 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1985 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1986 		}
1987 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1988 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1989 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1990 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1991 		}
1992 	}
1993 
1994 	TEST_ASSERT(status == expected_status,
1995 			"op_status (%d) != expected_status (%d)",
1996 			op->status, expected_status);
1997 
1998 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1999 			"Ordering error, expected %p, got %p",
2000 			(void *)(uintptr_t)order_idx, op->opaque_data);
2001 
2002 	return TEST_SUCCESS;
2003 }
2004 
2005 static int
2006 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
2007 		unsigned int order_idx, const int expected_status)
2008 {
2009 	TEST_ASSERT(op->status == expected_status,
2010 			"op_status (%d) != expected_status (%d)",
2011 			op->status, expected_status);
2012 
2013 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
2014 			"Ordering error, expected %p, got %p",
2015 			(void *)(uintptr_t)order_idx, op->opaque_data);
2016 
2017 	return TEST_SUCCESS;
2018 }
2019 
2020 static int
2021 check_fft_status_and_ordering(struct rte_bbdev_fft_op *op,
2022 		unsigned int order_idx, const int expected_status)
2023 {
2024 	TEST_ASSERT(op->status == expected_status,
2025 			"op_status (%d) != expected_status (%d)",
2026 			op->status, expected_status);
2027 
2028 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
2029 			"Ordering error, expected %p, got %p",
2030 			(void *)(uintptr_t)order_idx, op->opaque_data);
2031 
2032 	return TEST_SUCCESS;
2033 }
2034 
2035 static inline int
2036 validate_op_chain(struct rte_bbdev_op_data *op,
2037 		struct op_data_entries *orig_op)
2038 {
2039 	uint8_t i;
2040 	struct rte_mbuf *m = op->data;
2041 	uint8_t nb_dst_segments = orig_op->nb_segments;
2042 	uint32_t total_data_size = 0;
2043 	bool ignore_mbuf = false; /* ignore mbuf limitations */
2044 
2045 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2046 			"Number of segments differ in original (%u) and filled (%u) op",
2047 			nb_dst_segments, m->nb_segs);
2048 
2049 	/* Validate each mbuf segment length */
2050 	for (i = 0; i < nb_dst_segments; ++i) {
2051 		/* Apply offset to the first mbuf segment */
2052 		uint16_t offset = (i == 0) ? op->offset : 0;
2053 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2054 		total_data_size += orig_op->segments[i].length;
2055 
2056 		if (orig_op->segments[i].length > RTE_BBDEV_LDPC_E_MAX_MBUF)
2057 			ignore_mbuf = true;
2058 		if (!ignore_mbuf)
2059 			TEST_ASSERT(orig_op->segments[i].length == data_len,
2060 					"Length of segment differ in original (%u) and filled (%u) op",
2061 					orig_op->segments[i].length, data_len);
2062 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
2063 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
2064 				orig_op->segments[i].length,
2065 				"Output buffers (CB=%u) are not equal", i);
2066 		m = m->next;
2067 	}
2068 
2069 	/* Validate total mbuf pkt length */
2070 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2071 	if (!ignore_mbuf)
2072 		TEST_ASSERT(total_data_size == pkt_len,
2073 				"Length of data differ in original (%u) and filled (%u) op",
2074 				total_data_size, pkt_len);
2075 
2076 	return TEST_SUCCESS;
2077 }
2078 
2079 /*
2080  * Compute K0 for a given configuration for HARQ output length computation
2081  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
2082  */
2083 static inline uint16_t
2084 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
2085 {
2086 	if (rv_index == 0)
2087 		return 0;
2088 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
2089 	if (n_cb == n) {
2090 		if (rv_index == 1)
2091 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
2092 		else if (rv_index == 2)
2093 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
2094 		else
2095 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
2096 	}
2097 	/* LBRM case - includes a division by N */
2098 	if (rv_index == 1)
2099 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
2100 				/ n) * z_c;
2101 	else if (rv_index == 2)
2102 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
2103 				/ n) * z_c;
2104 	else
2105 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
2106 				/ n) * z_c;
2107 }
2108 
2109 /* HARQ output length including the Filler bits */
2110 static inline uint16_t
2111 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
2112 {
2113 	uint16_t k0 = 0;
2114 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
2115 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
2116 	/* Compute RM out size and number of rows */
2117 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2118 			* ops_ld->z_c - ops_ld->n_filler;
2119 	uint16_t deRmOutSize = RTE_MIN(
2120 			k0 + ops_ld->cb_params.e +
2121 			((k0 > parity_offset) ?
2122 					0 : ops_ld->n_filler),
2123 					ops_ld->n_cb);
2124 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
2125 			/ ops_ld->z_c);
2126 	uint16_t harq_output_len = numRows * ops_ld->z_c;
2127 	return harq_output_len;
2128 }
2129 
2130 static inline int
2131 validate_op_harq_chain(struct rte_bbdev_op_data *op,
2132 		struct op_data_entries *orig_op,
2133 		struct rte_bbdev_op_ldpc_dec *ops_ld)
2134 {
2135 	uint8_t i;
2136 	uint32_t j, jj, k;
2137 	struct rte_mbuf *m = op->data;
2138 	uint8_t nb_dst_segments = orig_op->nb_segments;
2139 	uint32_t total_data_size = 0;
2140 	int8_t *harq_orig, *harq_out, abs_harq_origin;
2141 	uint32_t byte_error = 0, cum_error = 0, error;
2142 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
2143 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
2144 	uint16_t parity_offset;
2145 
2146 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2147 			"Number of segments differ in original (%u) and filled (%u) op",
2148 			nb_dst_segments, m->nb_segs);
2149 
2150 	/* Validate each mbuf segment length */
2151 	for (i = 0; i < nb_dst_segments; ++i) {
2152 		/* Apply offset to the first mbuf segment */
2153 		uint16_t offset = (i == 0) ? op->offset : 0;
2154 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2155 		total_data_size += orig_op->segments[i].length;
2156 
2157 		TEST_ASSERT(orig_op->segments[i].length < (uint32_t)(data_len + HARQ_MEM_TOLERANCE),
2158 				"Length of segment differ in original (%u) and filled (%u) op",
2159 				orig_op->segments[i].length, data_len);
2160 		harq_orig = (int8_t *) orig_op->segments[i].addr;
2161 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2162 
2163 		/* Cannot compare HARQ output data for such cases */
2164 		if ((ldpc_llr_decimals > 1) && ((ops_ld->op_flags & RTE_BBDEV_LDPC_LLR_COMPRESSION)
2165 				|| (ops_ld->op_flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)))
2166 			break;
2167 
2168 		if (!(ldpc_cap_flags &
2169 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
2170 				) || (ops_ld->op_flags &
2171 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2172 			data_len -= ops_ld->z_c;
2173 			parity_offset = data_len;
2174 		} else {
2175 			/* Compute RM out size and number of rows */
2176 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2177 					* ops_ld->z_c - ops_ld->n_filler;
2178 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
2179 					ops_ld->n_filler;
2180 			if (data_len > deRmOutSize)
2181 				data_len = deRmOutSize;
2182 		}
2183 		if (data_len > orig_op->segments[i].length)
2184 			data_len = orig_op->segments[i].length;
2185 		/*
2186 		 * HARQ output can have minor differences
2187 		 * due to integer representation and related scaling
2188 		 */
2189 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2190 			if (j == parity_offset) {
2191 				/* Special Handling of the filler bits */
2192 				for (k = 0; k < ops_ld->n_filler; k++) {
2193 					if (harq_out[jj] !=
2194 							llr_max_pre_scaling) {
2195 						printf("HARQ Filler issue %d: %d %d\n",
2196 							jj, harq_out[jj],
2197 							llr_max);
2198 						byte_error++;
2199 					}
2200 					jj++;
2201 				}
2202 			}
2203 			if (!(ops_ld->op_flags &
2204 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2205 				if (ldpc_llr_decimals > 1)
2206 					harq_out[jj] = (harq_out[jj] + 1)
2207 						>> (ldpc_llr_decimals - 1);
2208 				/* Saturated to S7 */
2209 				if (harq_orig[j] > llr_max)
2210 					harq_orig[j] = llr_max;
2211 				if (harq_orig[j] < -llr_max)
2212 					harq_orig[j] = -llr_max;
2213 			}
2214 			if (harq_orig[j] != harq_out[jj]) {
2215 				error = (harq_orig[j] > harq_out[jj]) ?
2216 						harq_orig[j] - harq_out[jj] :
2217 						harq_out[jj] - harq_orig[j];
2218 				abs_harq_origin = harq_orig[j] > 0 ?
2219 							harq_orig[j] :
2220 							-harq_orig[j];
2221 				/* Residual quantization error */
2222 				if ((error > 8 && (abs_harq_origin <
2223 						(llr_max - 16))) ||
2224 						(error > 16)) {
2225 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2226 							j, harq_orig[j],
2227 							harq_out[jj], error);
2228 					byte_error++;
2229 					cum_error += error;
2230 				}
2231 			}
2232 		}
2233 		m = m->next;
2234 	}
2235 
2236 	if (byte_error)
2237 		TEST_ASSERT(byte_error <= 1,
2238 				"HARQ output mismatch (%d) %d",
2239 				byte_error, cum_error);
2240 
2241 	/* Validate total mbuf pkt length */
2242 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2243 	TEST_ASSERT(total_data_size < pkt_len + HARQ_MEM_TOLERANCE,
2244 			"Length of data differ in original (%u) and filled (%u) op",
2245 			total_data_size, pkt_len);
2246 
2247 	return TEST_SUCCESS;
2248 }
2249 
2250 
2251 static inline int
2252 validate_op_so_chain(struct rte_bbdev_op_data *op,
2253 		struct op_data_entries *orig_op)
2254 {
2255 	struct rte_mbuf *m = op->data;
2256 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2257 	uint32_t j, jj;
2258 	int8_t *so_orig, *so_out;
2259 	uint32_t byte_error = 0, error, margin_error = 0;
2260 
2261 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2262 			"Number of segments differ in original (%u) and filled (%u) op",
2263 			nb_dst_segments, m->nb_segs);
2264 
2265 	/* Validate each mbuf segment length. */
2266 	for (i = 0; i < nb_dst_segments; ++i) {
2267 		/* Apply offset to the first mbuf segment. */
2268 		uint16_t offset = (i == 0) ? op->offset : 0;
2269 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2270 
2271 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2272 				"Length of segment differ in original (%u) and filled (%u) op",
2273 				orig_op->segments[i].length, data_len);
2274 		so_orig = (int8_t *) orig_op->segments[i].addr;
2275 		so_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2276 		margin_error += data_len / 8; /* Allow for few % errors. */
2277 
2278 		/* SO output can have minor differences due to algorithm variations. */
2279 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2280 			if (so_orig[j] != so_out[jj]) {
2281 				error = (so_orig[j] > so_out[jj]) ? so_orig[j] - so_out[jj] :
2282 						so_out[jj] - so_orig[j];
2283 				/* Residual quantization error. */
2284 				if (error > 32) {
2285 					printf("Warning: Soft mismatch %d: exp %d act %d => %d\n",
2286 							j, so_orig[j], so_out[jj], error);
2287 					byte_error++;
2288 				}
2289 			}
2290 		}
2291 		m = m->next;
2292 	}
2293 
2294 	if (byte_error > margin_error)
2295 		TEST_ASSERT(byte_error <= 1, "Soft output mismatch (%d) %d",
2296 				byte_error, margin_error);
2297 
2298 	return TEST_SUCCESS;
2299 }
2300 
2301 static int
2302 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2303 		struct rte_bbdev_dec_op *ref_op)
2304 {
2305 	unsigned int i;
2306 	int ret;
2307 	struct op_data_entries *hard_data_orig =
2308 			&test_vector.entries[DATA_HARD_OUTPUT];
2309 	struct op_data_entries *soft_data_orig =
2310 			&test_vector.entries[DATA_SOFT_OUTPUT];
2311 	struct rte_bbdev_op_turbo_dec *ops_td;
2312 	struct rte_bbdev_op_data *hard_output;
2313 	struct rte_bbdev_op_data *soft_output;
2314 
2315 	for (i = 0; i < n; ++i) {
2316 		ops_td = &ops[i]->turbo_dec;
2317 		hard_output = &ops_td->hard_output;
2318 		soft_output = &ops_td->soft_output;
2319 
2320 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2321 		TEST_ASSERT_SUCCESS(ret,
2322 				"Checking status and ordering for decoder failed");
2323 
2324 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2325 				hard_data_orig),
2326 				"Hard output buffers (CB=%u) are not equal",
2327 				i);
2328 
2329 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2330 			TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output,
2331 					soft_data_orig),
2332 					"Soft output buffers (CB=%u) are not equal",
2333 					i);
2334 	}
2335 
2336 	return TEST_SUCCESS;
2337 }
2338 
2339 /* Check Number of code blocks errors */
2340 static int
2341 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2342 {
2343 	unsigned int i;
2344 	struct op_data_entries *hard_data_orig =
2345 			&test_vector.entries[DATA_HARD_OUTPUT];
2346 	struct rte_bbdev_op_ldpc_dec *ops_td;
2347 	struct rte_bbdev_op_data *hard_output;
2348 	int errors = 0;
2349 	struct rte_mbuf *m;
2350 
2351 	for (i = 0; i < n; ++i) {
2352 		ops_td = &ops[i]->ldpc_dec;
2353 		hard_output = &ops_td->hard_output;
2354 		m = hard_output->data;
2355 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2356 				hard_data_orig->segments[0].addr,
2357 				hard_data_orig->segments[0].length))
2358 			errors++;
2359 	}
2360 	return errors;
2361 }
2362 
2363 /* Check Number of code blocks errors */
2364 static int
2365 validate_turbo_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2366 {
2367 	unsigned int i;
2368 	struct op_data_entries *hard_data_orig = &test_vector.entries[DATA_HARD_OUTPUT];
2369 	struct rte_bbdev_op_turbo_dec *ops_td;
2370 	struct rte_bbdev_op_data *hard_output;
2371 	int errors = 0;
2372 	struct rte_mbuf *m;
2373 
2374 	for (i = 0; i < n; ++i) {
2375 		ops_td = &ops[i]->turbo_dec;
2376 		hard_output = &ops_td->hard_output;
2377 		m = hard_output->data;
2378 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2379 				hard_data_orig->segments[0].addr,
2380 				hard_data_orig->segments[0].length))
2381 			errors++;
2382 	}
2383 	return errors;
2384 }
2385 
2386 
2387 static int
2388 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2389 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2390 {
2391 	unsigned int i;
2392 	int ret;
2393 	struct op_data_entries *hard_data_orig =
2394 			&test_vector.entries[DATA_HARD_OUTPUT];
2395 	struct op_data_entries *soft_data_orig =
2396 			&test_vector.entries[DATA_SOFT_OUTPUT];
2397 	struct op_data_entries *harq_data_orig =
2398 				&test_vector.entries[DATA_HARQ_OUTPUT];
2399 	struct rte_bbdev_op_ldpc_dec *ops_td;
2400 	struct rte_bbdev_op_data *hard_output;
2401 	struct rte_bbdev_op_data *harq_output;
2402 	struct rte_bbdev_op_data *soft_output;
2403 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2404 
2405 	for (i = 0; i < n; ++i) {
2406 		ops_td = &ops[i]->ldpc_dec;
2407 		hard_output = &ops_td->hard_output;
2408 		harq_output = &ops_td->harq_combined_output;
2409 		soft_output = &ops_td->soft_output;
2410 
2411 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2412 		TEST_ASSERT_SUCCESS(ret,
2413 				"Checking status and ordering for decoder failed");
2414 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2415 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2416 					"Returned iter_count (%d) > expected iter_count (%d)",
2417 					ops_td->iter_count, ref_td->iter_count);
2418 		/*
2419 		 * We can ignore output data when the decoding failed to
2420 		 * converge or for loop-back cases
2421 		 */
2422 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2423 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2424 				) && (
2425 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2426 						)) == 0)
2427 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2428 					hard_data_orig),
2429 					"Hard output buffers (CB=%u) are not equal",
2430 					i);
2431 
2432 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2433 			TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output,
2434 					soft_data_orig),
2435 					"Soft output buffers (CB=%u) are not equal",
2436 					i);
2437 		if (ref_op->ldpc_dec.op_flags &
2438 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2439 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2440 					harq_data_orig, ops_td),
2441 					"HARQ output buffers (CB=%u) are not equal",
2442 					i);
2443 		}
2444 		if (ref_op->ldpc_dec.op_flags &
2445 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2446 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2447 					harq_data_orig, ops_td),
2448 					"HARQ output buffers (CB=%u) are not equal",
2449 					i);
2450 
2451 	}
2452 
2453 	return TEST_SUCCESS;
2454 }
2455 
2456 
2457 static int
2458 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2459 		struct rte_bbdev_enc_op *ref_op)
2460 {
2461 	unsigned int i;
2462 	int ret;
2463 	struct op_data_entries *hard_data_orig =
2464 			&test_vector.entries[DATA_HARD_OUTPUT];
2465 
2466 	for (i = 0; i < n; ++i) {
2467 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2468 		TEST_ASSERT_SUCCESS(ret,
2469 				"Checking status and ordering for encoder failed");
2470 		TEST_ASSERT_SUCCESS(validate_op_chain(
2471 				&ops[i]->turbo_enc.output,
2472 				hard_data_orig),
2473 				"Output buffers (CB=%u) are not equal",
2474 				i);
2475 	}
2476 
2477 	return TEST_SUCCESS;
2478 }
2479 
2480 static int
2481 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2482 		struct rte_bbdev_enc_op *ref_op)
2483 {
2484 	unsigned int i;
2485 	int ret;
2486 	struct op_data_entries *hard_data_orig =
2487 			&test_vector.entries[DATA_HARD_OUTPUT];
2488 
2489 	for (i = 0; i < n; ++i) {
2490 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2491 		TEST_ASSERT_SUCCESS(ret,
2492 				"Checking status and ordering for encoder failed");
2493 		TEST_ASSERT_SUCCESS(validate_op_chain(
2494 				&ops[i]->ldpc_enc.output,
2495 				hard_data_orig),
2496 				"Output buffers (CB=%u) are not equal",
2497 				i);
2498 	}
2499 
2500 	return TEST_SUCCESS;
2501 }
2502 
2503 static inline int
2504 validate_op_fft_chain(struct rte_bbdev_op_data *op, struct op_data_entries *orig_op)
2505 {
2506 	struct rte_mbuf *m = op->data;
2507 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2508 	int16_t delt, abs_delt, thres_hold = 3;
2509 	uint32_t j, data_len_iq, error_num;
2510 	int16_t *ref_out, *op_out;
2511 
2512 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2513 			"Number of segments differ in original (%u) and filled (%u) op fft",
2514 			nb_dst_segments, m->nb_segs);
2515 
2516 	/* Due to size limitation of mbuf, FFT doesn't use real mbuf. */
2517 	for (i = 0; i < nb_dst_segments; ++i) {
2518 		uint16_t offset = (i == 0) ? op->offset : 0;
2519 		uint32_t data_len = op->length;
2520 
2521 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2522 				"Length of segment differ in original (%u) and filled (%u) op fft",
2523 				orig_op->segments[i].length, data_len);
2524 		/* Divided by 2 to get the number of 16bits data. */
2525 		data_len_iq = data_len >> 1;
2526 		ref_out = (int16_t *)(orig_op->segments[i].addr);
2527 		op_out = rte_pktmbuf_mtod_offset(m, int16_t *, offset);
2528 		error_num = 0;
2529 		for (j = 0; j < data_len_iq; j++) {
2530 			delt = ref_out[j] - op_out[j];
2531 			abs_delt = delt > 0 ? delt : -delt;
2532 			error_num += (abs_delt > thres_hold ? 1 : 0);
2533 		}
2534 		if (error_num > 0) {
2535 			rte_memdump(stdout, "Buffer A", ref_out, data_len);
2536 			rte_memdump(stdout, "Buffer B", op_out, data_len);
2537 			TEST_ASSERT(error_num == 0,
2538 				"FFT Output are not matched total (%u) errors (%u)",
2539 				data_len_iq, error_num);
2540 		}
2541 
2542 		m = m->next;
2543 	}
2544 
2545 	return TEST_SUCCESS;
2546 }
2547 
2548 static int
2549 validate_fft_op(struct rte_bbdev_fft_op **ops, const uint16_t n,
2550 		struct rte_bbdev_fft_op *ref_op)
2551 {
2552 	unsigned int i;
2553 	int ret;
2554 	struct op_data_entries *fft_data_orig = &test_vector.entries[DATA_HARD_OUTPUT];
2555 	struct op_data_entries *fft_pwr_orig = &test_vector.entries[DATA_SOFT_OUTPUT];
2556 
2557 	for (i = 0; i < n; ++i) {
2558 		ret = check_fft_status_and_ordering(ops[i], i, ref_op->status);
2559 		TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for FFT failed");
2560 		TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2561 				&ops[i]->fft.base_output, fft_data_orig),
2562 				"FFT Output buffers (op=%u) are not matched", i);
2563 		if (check_bit(ops[i]->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS))
2564 			TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2565 				&ops[i]->fft.power_meas_output, fft_pwr_orig),
2566 				"FFT Power Output buffers (op=%u) are not matched", i);
2567 	}
2568 
2569 	return TEST_SUCCESS;
2570 }
2571 
2572 static void
2573 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2574 {
2575 	unsigned int i;
2576 	struct op_data_entries *entry;
2577 
2578 	op->turbo_dec = test_vector.turbo_dec;
2579 	entry = &test_vector.entries[DATA_INPUT];
2580 	for (i = 0; i < entry->nb_segments; ++i)
2581 		op->turbo_dec.input.length +=
2582 				entry->segments[i].length;
2583 }
2584 
2585 static void
2586 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2587 {
2588 	unsigned int i;
2589 	struct op_data_entries *entry;
2590 
2591 	op->ldpc_dec = test_vector.ldpc_dec;
2592 	entry = &test_vector.entries[DATA_INPUT];
2593 	for (i = 0; i < entry->nb_segments; ++i)
2594 		op->ldpc_dec.input.length +=
2595 				entry->segments[i].length;
2596 	if (test_vector.ldpc_dec.op_flags &
2597 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2598 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2599 		for (i = 0; i < entry->nb_segments; ++i)
2600 			op->ldpc_dec.harq_combined_input.length +=
2601 				entry->segments[i].length;
2602 	}
2603 }
2604 
2605 static void
2606 create_reference_fft_op(struct rte_bbdev_fft_op *op)
2607 {
2608 	unsigned int i;
2609 	struct op_data_entries *entry;
2610 	op->fft = test_vector.fft;
2611 	entry = &test_vector.entries[DATA_INPUT];
2612 	for (i = 0; i < entry->nb_segments; ++i)
2613 		op->fft.base_input.length += entry->segments[i].length;
2614 }
2615 
2616 static void
2617 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2618 {
2619 	unsigned int i;
2620 	struct op_data_entries *entry;
2621 
2622 	op->turbo_enc = test_vector.turbo_enc;
2623 	entry = &test_vector.entries[DATA_INPUT];
2624 	for (i = 0; i < entry->nb_segments; ++i)
2625 		op->turbo_enc.input.length +=
2626 				entry->segments[i].length;
2627 }
2628 
2629 static void
2630 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2631 {
2632 	unsigned int i;
2633 	struct op_data_entries *entry;
2634 
2635 	op->ldpc_enc = test_vector.ldpc_enc;
2636 	entry = &test_vector.entries[DATA_INPUT];
2637 	for (i = 0; i < entry->nb_segments; ++i)
2638 		op->ldpc_enc.input.length +=
2639 				entry->segments[i].length;
2640 }
2641 
2642 static uint32_t
2643 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2644 {
2645 	uint8_t i;
2646 	uint32_t c, r, tb_size = 0;
2647 
2648 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2649 		tb_size = op->turbo_dec.tb_params.k_neg;
2650 	} else {
2651 		c = op->turbo_dec.tb_params.c;
2652 		r = op->turbo_dec.tb_params.r;
2653 		for (i = 0; i < c-r; i++)
2654 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2655 				op->turbo_dec.tb_params.k_neg :
2656 				op->turbo_dec.tb_params.k_pos;
2657 	}
2658 	return tb_size;
2659 }
2660 
2661 static uint32_t
2662 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2663 {
2664 	uint8_t num_cbs = 0;
2665 	uint32_t tb_size = 0;
2666 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2667 
2668 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
2669 		num_cbs = 1;
2670 	else
2671 		num_cbs = op->ldpc_dec.tb_params.c - op->ldpc_dec.tb_params.r;
2672 
2673 	tb_size = (sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler) * num_cbs;
2674 	return tb_size;
2675 }
2676 
2677 static uint32_t
2678 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2679 {
2680 	uint8_t i;
2681 	uint32_t c, r, tb_size = 0;
2682 
2683 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2684 		tb_size = op->turbo_enc.tb_params.k_neg;
2685 	} else {
2686 		c = op->turbo_enc.tb_params.c;
2687 		r = op->turbo_enc.tb_params.r;
2688 		for (i = 0; i < c-r; i++)
2689 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2690 				op->turbo_enc.tb_params.k_neg :
2691 				op->turbo_enc.tb_params.k_pos;
2692 	}
2693 	return tb_size;
2694 }
2695 
2696 static uint32_t
2697 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2698 {
2699 	uint8_t num_cbs = 0;
2700 	uint32_t tb_size = 0;
2701 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2702 
2703 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK)
2704 		num_cbs = 1;
2705 	else
2706 		num_cbs = op->ldpc_enc.tb_params.c - op->ldpc_enc.tb_params.r;
2707 
2708 	tb_size = (sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler) * num_cbs;
2709 	return tb_size;
2710 }
2711 
2712 static uint32_t
2713 calc_fft_size(struct rte_bbdev_fft_op *op)
2714 {
2715 	uint32_t output_size;
2716 	int num_cs = 0, i;
2717 	for (i = 0; i < 12; i++)
2718 		if (check_bit(op->fft.cs_bitmap, 1 << i))
2719 			num_cs++;
2720 	output_size = (num_cs * op->fft.output_sequence_size * 4) << op->fft.num_antennas_log2;
2721 	return output_size;
2722 }
2723 
2724 static int
2725 init_test_op_params(struct test_op_params *op_params,
2726 		enum rte_bbdev_op_type op_type, const int expected_status,
2727 		const int vector_mask, struct rte_mempool *ops_mp,
2728 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2729 {
2730 	int ret = 0;
2731 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2732 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2733 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2734 				&op_params->ref_dec_op, 1);
2735 	else if (op_type == RTE_BBDEV_OP_FFT)
2736 		ret = rte_bbdev_fft_op_alloc_bulk(ops_mp,
2737 				&op_params->ref_fft_op, 1);
2738 	else
2739 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2740 				&op_params->ref_enc_op, 1);
2741 
2742 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2743 
2744 	op_params->mp = ops_mp;
2745 	op_params->burst_sz = burst_sz;
2746 	op_params->num_to_process = num_to_process;
2747 	op_params->num_lcores = num_lcores;
2748 	op_params->vector_mask = vector_mask;
2749 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2750 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2751 		op_params->ref_dec_op->status = expected_status;
2752 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2753 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2754 		op_params->ref_enc_op->status = expected_status;
2755 	else if (op_type == RTE_BBDEV_OP_FFT)
2756 		op_params->ref_fft_op->status = expected_status;
2757 	return 0;
2758 }
2759 
2760 static int
2761 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2762 		struct test_op_params *op_params)
2763 {
2764 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2765 	unsigned int i;
2766 	struct active_device *ad;
2767 	unsigned int burst_sz = get_burst_sz();
2768 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2769 	const struct rte_bbdev_op_cap *capabilities = NULL;
2770 
2771 	ad = &active_devs[dev_id];
2772 
2773 	/* Check if device supports op_type */
2774 	if (!is_avail_op(ad, test_vector.op_type))
2775 		return TEST_SUCCESS;
2776 
2777 	struct rte_bbdev_info info;
2778 	rte_bbdev_info_get(ad->dev_id, &info);
2779 	socket_id = GET_SOCKET(info.socket_id);
2780 
2781 	f_ret = create_mempools(ad, socket_id, op_type,
2782 			get_num_ops());
2783 	if (f_ret != TEST_SUCCESS) {
2784 		printf("Couldn't create mempools");
2785 		goto fail;
2786 	}
2787 	if (op_type == RTE_BBDEV_OP_NONE)
2788 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2789 
2790 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2791 			test_vector.expected_status,
2792 			test_vector.mask,
2793 			ad->ops_mempool,
2794 			burst_sz,
2795 			get_num_ops(),
2796 			get_num_lcores());
2797 	if (f_ret != TEST_SUCCESS) {
2798 		printf("Couldn't init test op params");
2799 		goto fail;
2800 	}
2801 
2802 
2803 	/* Find capabilities */
2804 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2805 	do {
2806 		if (cap->type == test_vector.op_type) {
2807 			capabilities = cap;
2808 			break;
2809 		}
2810 		cap++;
2811 	} while (cap->type != RTE_BBDEV_OP_NONE);
2812 	TEST_ASSERT_NOT_NULL(capabilities,
2813 			"Couldn't find capabilities");
2814 
2815 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2816 		create_reference_dec_op(op_params->ref_dec_op);
2817 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2818 		create_reference_enc_op(op_params->ref_enc_op);
2819 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2820 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2821 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2822 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2823 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
2824 		create_reference_fft_op(op_params->ref_fft_op);
2825 
2826 	for (i = 0; i < ad->nb_queues; ++i) {
2827 		f_ret = fill_queue_buffers(op_params,
2828 				ad->in_mbuf_pool,
2829 				ad->hard_out_mbuf_pool,
2830 				ad->soft_out_mbuf_pool,
2831 				ad->harq_in_mbuf_pool,
2832 				ad->harq_out_mbuf_pool,
2833 				ad->queue_ids[i],
2834 				capabilities,
2835 				info.drv.min_alignment,
2836 				socket_id);
2837 		if (f_ret != TEST_SUCCESS) {
2838 			printf("Couldn't init queue buffers");
2839 			goto fail;
2840 		}
2841 	}
2842 
2843 	/* Run test case function */
2844 	t_ret = test_case_func(ad, op_params);
2845 
2846 	/* Free active device resources and return */
2847 	free_buffers(ad, op_params);
2848 	return t_ret;
2849 
2850 fail:
2851 	free_buffers(ad, op_params);
2852 	return TEST_FAILED;
2853 }
2854 
2855 /* Run given test function per active device per supported op type
2856  * per burst size.
2857  */
2858 static int
2859 run_test_case(test_case_function *test_case_func)
2860 {
2861 	int ret = 0;
2862 	uint8_t dev;
2863 
2864 	/* Alloc op_params */
2865 	struct test_op_params *op_params = rte_zmalloc(NULL,
2866 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2867 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2868 			RTE_ALIGN(sizeof(struct test_op_params),
2869 				RTE_CACHE_LINE_SIZE));
2870 
2871 	/* For each device run test case function */
2872 	for (dev = 0; dev < nb_active_devs; ++dev)
2873 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2874 
2875 	rte_free(op_params);
2876 
2877 	return ret;
2878 }
2879 
2880 
2881 /* Push back the HARQ output from DDR to host */
2882 static void
2883 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2884 		struct rte_bbdev_dec_op **ops,
2885 		const uint16_t n)
2886 {
2887 	uint16_t j;
2888 	int save_status, ret;
2889 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2890 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2891 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2892 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2893 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2894 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2895 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2896 	for (j = 0; j < n; ++j) {
2897 		if ((loopback && mem_out) || hc_out) {
2898 			save_status = ops[j]->status;
2899 			ops[j]->ldpc_dec.op_flags =
2900 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2901 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2902 			if (h_comp)
2903 				ops[j]->ldpc_dec.op_flags +=
2904 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2905 			ops[j]->ldpc_dec.harq_combined_input.offset =
2906 					harq_offset;
2907 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2908 			harq_offset += HARQ_INCR;
2909 			if (!loopback)
2910 				ops[j]->ldpc_dec.harq_combined_input.length =
2911 				ops[j]->ldpc_dec.harq_combined_output.length;
2912 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2913 					&ops[j], 1);
2914 			ret = 0;
2915 			while (ret == 0)
2916 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2917 						dev_id, queue_id,
2918 						&ops_deq[j], 1);
2919 			ops[j]->ldpc_dec.op_flags = flags;
2920 			ops[j]->status = save_status;
2921 		}
2922 	}
2923 }
2924 
2925 /*
2926  * Push back the HARQ output from HW DDR to Host
2927  * Preload HARQ memory input and adjust HARQ offset
2928  */
2929 static void
2930 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2931 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2932 		bool preload)
2933 {
2934 	uint16_t j;
2935 	int deq;
2936 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2937 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2938 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2939 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2940 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2941 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2942 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2943 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2944 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2945 	if ((mem_in || hc_in) && preload) {
2946 		for (j = 0; j < n; ++j) {
2947 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2948 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2949 			ops[j]->ldpc_dec.op_flags =
2950 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2951 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2952 			if (h_comp)
2953 				ops[j]->ldpc_dec.op_flags +=
2954 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2955 			ops[j]->ldpc_dec.harq_combined_output.offset =
2956 					harq_offset;
2957 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2958 			harq_offset += HARQ_INCR;
2959 		}
2960 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2961 		deq = 0;
2962 		while (deq != n)
2963 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2964 					dev_id, queue_id, &ops_deq[deq],
2965 					n - deq);
2966 		/* Restore the operations */
2967 		for (j = 0; j < n; ++j) {
2968 			ops[j]->ldpc_dec.op_flags = flags;
2969 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2970 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2971 		}
2972 	}
2973 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2974 	for (j = 0; j < n; ++j) {
2975 		/* Adjust HARQ offset when we reach external DDR */
2976 		if (mem_in || hc_in)
2977 			ops[j]->ldpc_dec.harq_combined_input.offset
2978 				= harq_offset;
2979 		if (mem_out || hc_out)
2980 			ops[j]->ldpc_dec.harq_combined_output.offset
2981 				= harq_offset;
2982 		harq_offset += HARQ_INCR;
2983 	}
2984 }
2985 
2986 static void
2987 dequeue_event_callback(uint16_t dev_id,
2988 		enum rte_bbdev_event_type event, void *cb_arg,
2989 		void *ret_param)
2990 {
2991 	int ret;
2992 	uint16_t i;
2993 	uint64_t total_time;
2994 	uint16_t deq, burst_sz, num_ops;
2995 	uint16_t queue_id = *(uint16_t *) ret_param;
2996 	struct rte_bbdev_info info;
2997 	double tb_len_bits;
2998 	struct thread_params *tp = cb_arg;
2999 
3000 	/* Find matching thread params using queue_id */
3001 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
3002 		if (tp->queue_id == queue_id)
3003 			break;
3004 
3005 	if (i == MAX_QUEUES) {
3006 		printf("%s: Queue_id from interrupt details was not found!\n",
3007 				__func__);
3008 		return;
3009 	}
3010 
3011 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
3012 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3013 		printf(
3014 			"Dequeue interrupt handler called for incorrect event!\n");
3015 		return;
3016 	}
3017 
3018 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
3019 	num_ops = tp->op_params->num_to_process;
3020 
3021 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3022 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3023 				&tp->dec_ops[
3024 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3025 				burst_sz);
3026 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3027 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3028 				&tp->dec_ops[
3029 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3030 				burst_sz);
3031 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3032 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3033 				&tp->enc_ops[
3034 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3035 				burst_sz);
3036 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
3037 		deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
3038 				&tp->fft_ops[
3039 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3040 				burst_sz);
3041 	else /*RTE_BBDEV_OP_TURBO_ENC*/
3042 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3043 				&tp->enc_ops[
3044 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
3045 				burst_sz);
3046 
3047 	if (deq < burst_sz) {
3048 		printf(
3049 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
3050 			burst_sz, deq);
3051 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3052 		return;
3053 	}
3054 
3055 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
3056 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
3057 		return;
3058 	}
3059 
3060 	total_time = rte_rdtsc_precise() - tp->start_time;
3061 
3062 	rte_bbdev_info_get(dev_id, &info);
3063 
3064 	ret = TEST_SUCCESS;
3065 
3066 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
3067 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3068 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op);
3069 		/* get the max of iter_count for all dequeued ops */
3070 		for (i = 0; i < num_ops; ++i)
3071 			tp->iter_count = RTE_MAX(
3072 					tp->dec_ops[i]->turbo_dec.iter_count,
3073 					tp->iter_count);
3074 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
3075 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
3076 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3077 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
3078 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3079 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
3080 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3081 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
3082 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3083 	} else if (test_vector.op_type == RTE_BBDEV_OP_FFT) {
3084 		struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
3085 		ret = validate_fft_op(tp->fft_ops, num_ops, ref_op);
3086 		rte_bbdev_fft_op_free_bulk(tp->fft_ops, deq);
3087 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
3088 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3089 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
3090 				tp->op_params->vector_mask);
3091 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
3092 	}
3093 
3094 	if (ret) {
3095 		printf("Buffers validation failed\n");
3096 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3097 	}
3098 
3099 	switch (test_vector.op_type) {
3100 	case RTE_BBDEV_OP_TURBO_DEC:
3101 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
3102 		break;
3103 	case RTE_BBDEV_OP_TURBO_ENC:
3104 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
3105 		break;
3106 	case RTE_BBDEV_OP_LDPC_DEC:
3107 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
3108 		break;
3109 	case RTE_BBDEV_OP_FFT:
3110 		tb_len_bits = calc_fft_size(tp->op_params->ref_fft_op);
3111 		break;
3112 	case RTE_BBDEV_OP_LDPC_ENC:
3113 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
3114 		break;
3115 	case RTE_BBDEV_OP_NONE:
3116 		tb_len_bits = 0.0;
3117 		break;
3118 	default:
3119 		printf("Unknown op type: %d\n", test_vector.op_type);
3120 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3121 		return;
3122 	}
3123 
3124 	tp->ops_per_sec += ((double)num_ops) /
3125 			((double)total_time / (double)rte_get_tsc_hz());
3126 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
3127 			((double)total_time / (double)rte_get_tsc_hz());
3128 
3129 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
3130 }
3131 
3132 static int
3133 throughput_intr_lcore_ldpc_dec(void *arg)
3134 {
3135 	struct thread_params *tp = arg;
3136 	unsigned int enqueued;
3137 	const uint16_t queue_id = tp->queue_id;
3138 	const uint16_t burst_sz = tp->op_params->burst_sz;
3139 	const uint16_t num_to_process = tp->op_params->num_to_process;
3140 	struct rte_bbdev_dec_op *ops[num_to_process];
3141 	struct test_buffers *bufs = NULL;
3142 	struct rte_bbdev_info info;
3143 	int ret, i, j;
3144 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3145 	uint16_t num_to_enq, enq;
3146 
3147 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3148 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3149 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3150 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3151 
3152 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3153 			"BURST_SIZE should be <= %u", MAX_BURST);
3154 
3155 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3156 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3157 			tp->dev_id, queue_id);
3158 
3159 	rte_bbdev_info_get(tp->dev_id, &info);
3160 
3161 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3162 			"NUM_OPS cannot exceed %u for this device",
3163 			info.drv.queue_size_lim);
3164 
3165 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3166 
3167 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3168 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3169 
3170 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3171 
3172 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3173 				num_to_process);
3174 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3175 			num_to_process);
3176 	ref_op->ldpc_dec.iter_max = get_iter_max();
3177 
3178 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3179 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
3180 				bufs->hard_outputs, bufs->soft_outputs,
3181 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3182 
3183 	/* Set counter to validate the ordering */
3184 	for (j = 0; j < num_to_process; ++j)
3185 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3186 
3187 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3188 		for (i = 0; i < num_to_process; ++i) {
3189 			if (!loopback)
3190 				mbuf_reset(ops[i]->ldpc_dec.hard_output.data);
3191 			if (hc_out || loopback)
3192 				mbuf_reset(ops[i]->ldpc_dec.harq_combined_output.data);
3193 			if (ops[i]->ldpc_dec.soft_output.data != NULL)
3194 				mbuf_reset(ops[i]->ldpc_dec.soft_output.data);
3195 		}
3196 
3197 		tp->start_time = rte_rdtsc_precise();
3198 		for (enqueued = 0; enqueued < num_to_process;) {
3199 			num_to_enq = burst_sz;
3200 
3201 			if (unlikely(num_to_process - enqueued < num_to_enq))
3202 				num_to_enq = num_to_process - enqueued;
3203 
3204 			enq = 0;
3205 			do {
3206 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
3207 						tp->dev_id,
3208 						queue_id, &ops[enqueued],
3209 						num_to_enq);
3210 			} while (unlikely(num_to_enq != enq));
3211 			enqueued += enq;
3212 
3213 			/* Write to thread burst_sz current number of enqueued
3214 			 * descriptors. It ensures that proper number of
3215 			 * descriptors will be dequeued in callback
3216 			 * function - needed for last batch in case where
3217 			 * the number of operations is not a multiple of
3218 			 * burst size.
3219 			 */
3220 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3221 
3222 			/* Wait until processing of previous batch is
3223 			 * completed
3224 			 */
3225 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3226 		}
3227 		if (j != TEST_REPETITIONS - 1)
3228 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3229 	}
3230 
3231 	return TEST_SUCCESS;
3232 }
3233 
3234 static int
3235 throughput_intr_lcore_dec(void *arg)
3236 {
3237 	struct thread_params *tp = arg;
3238 	unsigned int enqueued;
3239 	const uint16_t queue_id = tp->queue_id;
3240 	const uint16_t burst_sz = tp->op_params->burst_sz;
3241 	const uint16_t num_to_process = tp->op_params->num_to_process;
3242 	struct rte_bbdev_dec_op *ops[num_to_process];
3243 	struct test_buffers *bufs = NULL;
3244 	struct rte_bbdev_info info;
3245 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3246 	int ret, i, j;
3247 	uint16_t num_to_enq, enq;
3248 
3249 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3250 			"BURST_SIZE should be <= %u", MAX_BURST);
3251 
3252 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3253 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3254 			tp->dev_id, queue_id);
3255 
3256 	rte_bbdev_info_get(tp->dev_id, &info);
3257 
3258 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3259 			"NUM_OPS cannot exceed %u for this device",
3260 			info.drv.queue_size_lim);
3261 
3262 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3263 
3264 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3265 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3266 
3267 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3268 
3269 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3270 				num_to_process);
3271 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_to_process);
3272 	ref_op->turbo_dec.iter_max = get_iter_max();
3273 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3274 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
3275 				bufs->hard_outputs, bufs->soft_outputs,
3276 				tp->op_params->ref_dec_op);
3277 
3278 	/* Set counter to validate the ordering. */
3279 	for (j = 0; j < num_to_process; ++j)
3280 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3281 
3282 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3283 		for (i = 0; i < num_to_process; ++i) {
3284 			mbuf_reset(ops[i]->turbo_dec.hard_output.data);
3285 			if (ops[i]->turbo_dec.soft_output.data != NULL)
3286 				mbuf_reset(ops[i]->turbo_dec.soft_output.data);
3287 		}
3288 
3289 		tp->start_time = rte_rdtsc_precise();
3290 		for (enqueued = 0; enqueued < num_to_process;) {
3291 			num_to_enq = burst_sz;
3292 
3293 			if (unlikely(num_to_process - enqueued < num_to_enq))
3294 				num_to_enq = num_to_process - enqueued;
3295 
3296 			enq = 0;
3297 			do {
3298 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3299 						queue_id, &ops[enqueued],
3300 						num_to_enq);
3301 			} while (unlikely(num_to_enq != enq));
3302 			enqueued += enq;
3303 
3304 			/* Write to thread burst_sz current number of enqueued
3305 			 * descriptors. It ensures that proper number of
3306 			 * descriptors will be dequeued in callback
3307 			 * function - needed for last batch in case where
3308 			 * the number of operations is not a multiple of
3309 			 * burst size.
3310 			 */
3311 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3312 
3313 			/* Wait until processing of previous batch is
3314 			 * completed
3315 			 */
3316 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3317 		}
3318 		if (j != TEST_REPETITIONS - 1)
3319 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3320 	}
3321 
3322 	return TEST_SUCCESS;
3323 }
3324 
3325 static int
3326 throughput_intr_lcore_enc(void *arg)
3327 {
3328 	struct thread_params *tp = arg;
3329 	unsigned int enqueued;
3330 	const uint16_t queue_id = tp->queue_id;
3331 	const uint16_t burst_sz = tp->op_params->burst_sz;
3332 	const uint16_t num_to_process = tp->op_params->num_to_process;
3333 	struct rte_bbdev_enc_op *ops[num_to_process];
3334 	struct test_buffers *bufs = NULL;
3335 	struct rte_bbdev_info info;
3336 	int ret, i, j;
3337 	uint16_t num_to_enq, enq;
3338 
3339 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3340 			"BURST_SIZE should be <= %u", MAX_BURST);
3341 
3342 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3343 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3344 			tp->dev_id, queue_id);
3345 
3346 	rte_bbdev_info_get(tp->dev_id, &info);
3347 
3348 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3349 			"NUM_OPS cannot exceed %u for this device",
3350 			info.drv.queue_size_lim);
3351 
3352 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3353 
3354 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3355 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3356 
3357 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3358 
3359 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3360 			num_to_process);
3361 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3362 			num_to_process);
3363 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3364 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
3365 				bufs->hard_outputs, tp->op_params->ref_enc_op);
3366 
3367 	/* Set counter to validate the ordering */
3368 	for (j = 0; j < num_to_process; ++j)
3369 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3370 
3371 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3372 		for (i = 0; i < num_to_process; ++i)
3373 			mbuf_reset(ops[i]->turbo_enc.output.data);
3374 
3375 		tp->start_time = rte_rdtsc_precise();
3376 		for (enqueued = 0; enqueued < num_to_process;) {
3377 			num_to_enq = burst_sz;
3378 
3379 			if (unlikely(num_to_process - enqueued < num_to_enq))
3380 				num_to_enq = num_to_process - enqueued;
3381 
3382 			enq = 0;
3383 			do {
3384 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3385 						queue_id, &ops[enqueued],
3386 						num_to_enq);
3387 			} while (unlikely(enq != num_to_enq));
3388 			enqueued += enq;
3389 
3390 			/* Write to thread burst_sz current number of enqueued
3391 			 * descriptors. It ensures that proper number of
3392 			 * descriptors will be dequeued in callback
3393 			 * function - needed for last batch in case where
3394 			 * the number of operations is not a multiple of
3395 			 * burst size.
3396 			 */
3397 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3398 
3399 			/* Wait until processing of previous batch is
3400 			 * completed
3401 			 */
3402 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3403 		}
3404 		if (j != TEST_REPETITIONS - 1)
3405 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3406 	}
3407 
3408 	return TEST_SUCCESS;
3409 }
3410 
3411 
3412 static int
3413 throughput_intr_lcore_ldpc_enc(void *arg)
3414 {
3415 	struct thread_params *tp = arg;
3416 	unsigned int enqueued;
3417 	const uint16_t queue_id = tp->queue_id;
3418 	const uint16_t burst_sz = tp->op_params->burst_sz;
3419 	const uint16_t num_to_process = tp->op_params->num_to_process;
3420 	struct rte_bbdev_enc_op *ops[num_to_process];
3421 	struct test_buffers *bufs = NULL;
3422 	struct rte_bbdev_info info;
3423 	int ret, i, j;
3424 	uint16_t num_to_enq, enq;
3425 
3426 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3427 			"BURST_SIZE should be <= %u", MAX_BURST);
3428 
3429 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3430 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3431 			tp->dev_id, queue_id);
3432 
3433 	rte_bbdev_info_get(tp->dev_id, &info);
3434 
3435 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3436 			"NUM_OPS cannot exceed %u for this device",
3437 			info.drv.queue_size_lim);
3438 
3439 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3440 
3441 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3442 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3443 
3444 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3445 
3446 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3447 			num_to_process);
3448 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3449 			num_to_process);
3450 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3451 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3452 				bufs->inputs, bufs->hard_outputs,
3453 				tp->op_params->ref_enc_op);
3454 
3455 	/* Set counter to validate the ordering */
3456 	for (j = 0; j < num_to_process; ++j)
3457 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3458 
3459 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3460 		for (i = 0; i < num_to_process; ++i)
3461 			mbuf_reset(ops[i]->turbo_enc.output.data);
3462 
3463 		tp->start_time = rte_rdtsc_precise();
3464 		for (enqueued = 0; enqueued < num_to_process;) {
3465 			num_to_enq = burst_sz;
3466 
3467 			if (unlikely(num_to_process - enqueued < num_to_enq))
3468 				num_to_enq = num_to_process - enqueued;
3469 
3470 			enq = 0;
3471 			do {
3472 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3473 						tp->dev_id,
3474 						queue_id, &ops[enqueued],
3475 						num_to_enq);
3476 			} while (unlikely(enq != num_to_enq));
3477 			enqueued += enq;
3478 
3479 			/* Write to thread burst_sz current number of enqueued
3480 			 * descriptors. It ensures that proper number of
3481 			 * descriptors will be dequeued in callback
3482 			 * function - needed for last batch in case where
3483 			 * the number of operations is not a multiple of
3484 			 * burst size.
3485 			 */
3486 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3487 
3488 			/* Wait until processing of previous batch is
3489 			 * completed
3490 			 */
3491 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3492 		}
3493 		if (j != TEST_REPETITIONS - 1)
3494 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3495 	}
3496 
3497 	return TEST_SUCCESS;
3498 }
3499 
3500 
3501 static int
3502 throughput_intr_lcore_fft(void *arg)
3503 {
3504 	struct thread_params *tp = arg;
3505 	unsigned int enqueued;
3506 	const uint16_t queue_id = tp->queue_id;
3507 	const uint16_t burst_sz = tp->op_params->burst_sz;
3508 	const uint16_t num_to_process = tp->op_params->num_to_process;
3509 	struct rte_bbdev_fft_op *ops[num_to_process];
3510 	struct test_buffers *bufs = NULL;
3511 	struct rte_bbdev_info info;
3512 	int ret, i, j;
3513 	uint16_t num_to_enq, enq;
3514 
3515 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3516 			"BURST_SIZE should be <= %u", MAX_BURST);
3517 
3518 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3519 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3520 			tp->dev_id, queue_id);
3521 
3522 	rte_bbdev_info_get(tp->dev_id, &info);
3523 
3524 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3525 			"NUM_OPS cannot exceed %u for this device",
3526 			info.drv.queue_size_lim);
3527 
3528 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3529 
3530 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3531 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3532 
3533 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3534 
3535 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops,
3536 			num_to_process);
3537 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3538 			num_to_process);
3539 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3540 		copy_reference_fft_op(ops, num_to_process, 0, bufs->inputs,
3541 				bufs->hard_outputs, bufs->soft_outputs, tp->op_params->ref_fft_op);
3542 
3543 	/* Set counter to validate the ordering */
3544 	for (j = 0; j < num_to_process; ++j)
3545 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3546 
3547 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3548 		for (i = 0; i < num_to_process; ++i)
3549 			mbuf_reset(ops[i]->fft.base_output.data);
3550 
3551 		tp->start_time = rte_rdtsc_precise();
3552 		for (enqueued = 0; enqueued < num_to_process;) {
3553 			num_to_enq = burst_sz;
3554 
3555 			if (unlikely(num_to_process - enqueued < num_to_enq))
3556 				num_to_enq = num_to_process - enqueued;
3557 
3558 			enq = 0;
3559 			do {
3560 				enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
3561 						queue_id, &ops[enqueued],
3562 						num_to_enq);
3563 			} while (unlikely(enq != num_to_enq));
3564 			enqueued += enq;
3565 
3566 			/* Write to thread burst_sz current number of enqueued
3567 			 * descriptors. It ensures that proper number of
3568 			 * descriptors will be dequeued in callback
3569 			 * function - needed for last batch in case where
3570 			 * the number of operations is not a multiple of
3571 			 * burst size.
3572 			 */
3573 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3574 
3575 			/* Wait until processing of previous batch is
3576 			 * completed
3577 			 */
3578 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3579 		}
3580 		if (j != TEST_REPETITIONS - 1)
3581 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3582 	}
3583 
3584 	return TEST_SUCCESS;
3585 }
3586 
3587 static int
3588 throughput_pmd_lcore_dec(void *arg)
3589 {
3590 	struct thread_params *tp = arg;
3591 	uint16_t enq, deq;
3592 	uint64_t total_time = 0, start_time;
3593 	const uint16_t queue_id = tp->queue_id;
3594 	const uint16_t burst_sz = tp->op_params->burst_sz;
3595 	const uint16_t num_ops = tp->op_params->num_to_process;
3596 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3597 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3598 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3599 	struct test_buffers *bufs = NULL;
3600 	int i, j, ret;
3601 	struct rte_bbdev_info info;
3602 	uint16_t num_to_enq;
3603 	bool so_enable;
3604 
3605 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3606 			"BURST_SIZE should be <= %u", MAX_BURST);
3607 
3608 	rte_bbdev_info_get(tp->dev_id, &info);
3609 
3610 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3611 			"NUM_OPS cannot exceed %u for this device",
3612 			info.drv.queue_size_lim);
3613 
3614 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3615 
3616 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3617 
3618 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3619 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3620 	ref_op->turbo_dec.iter_max = get_iter_max();
3621 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3622 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3623 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3624 
3625 	so_enable = check_bit(ops_enq[0]->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT);
3626 
3627 	/* Set counter to validate the ordering */
3628 	for (j = 0; j < num_ops; ++j)
3629 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3630 
3631 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3632 		uint32_t time_out = 0;
3633 		for (j = 0; j < num_ops; ++j)
3634 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3635 		if (so_enable)
3636 			for (j = 0; j < num_ops; ++j)
3637 				mbuf_reset(ops_enq[j]->turbo_dec.soft_output.data);
3638 
3639 		start_time = rte_rdtsc_precise();
3640 
3641 		for (enq = 0, deq = 0; enq < num_ops;) {
3642 			num_to_enq = burst_sz;
3643 
3644 			if (unlikely(num_ops - enq < num_to_enq))
3645 				num_to_enq = num_ops - enq;
3646 
3647 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3648 					queue_id, &ops_enq[enq], num_to_enq);
3649 
3650 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3651 					queue_id, &ops_deq[deq], enq - deq);
3652 			time_out++;
3653 			if (time_out >= TIME_OUT_POLL) {
3654 				timeout_exit(tp->dev_id);
3655 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3656 			}
3657 		}
3658 
3659 		/* dequeue the remaining */
3660 		time_out = 0;
3661 		while (deq < enq) {
3662 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3663 					queue_id, &ops_deq[deq], enq - deq);
3664 			time_out++;
3665 			if (time_out >= TIME_OUT_POLL) {
3666 				timeout_exit(tp->dev_id);
3667 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3668 			}
3669 		}
3670 
3671 		total_time += rte_rdtsc_precise() - start_time;
3672 	}
3673 
3674 	tp->iter_count = 0;
3675 	/* get the max of iter_count for all dequeued ops */
3676 	for (i = 0; i < num_ops; ++i) {
3677 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3678 				tp->iter_count);
3679 	}
3680 
3681 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3682 		ret = validate_dec_op(ops_deq, num_ops, ref_op);
3683 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3684 	}
3685 
3686 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3687 
3688 	double tb_len_bits = calc_dec_TB_size(ref_op);
3689 
3690 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3691 			((double)total_time / (double)rte_get_tsc_hz());
3692 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3693 			1000000.0) / ((double)total_time /
3694 			(double)rte_get_tsc_hz());
3695 
3696 	return TEST_SUCCESS;
3697 }
3698 
3699 static int
3700 bler_pmd_lcore_ldpc_dec(void *arg)
3701 {
3702 	struct thread_params *tp = arg;
3703 	uint16_t enq, deq;
3704 	uint64_t total_time = 0, start_time;
3705 	const uint16_t queue_id = tp->queue_id;
3706 	const uint16_t burst_sz = tp->op_params->burst_sz;
3707 	const uint16_t num_ops = tp->op_params->num_to_process;
3708 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3709 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3710 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3711 	struct test_buffers *bufs = NULL;
3712 	int i, j, ret;
3713 	float parity_bler = 0;
3714 	struct rte_bbdev_info info;
3715 	uint16_t num_to_enq;
3716 	bool extDdr = check_bit(ldpc_cap_flags,
3717 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3718 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3719 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3720 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3721 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3722 
3723 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3724 			"BURST_SIZE should be <= %u", MAX_BURST);
3725 
3726 	rte_bbdev_info_get(tp->dev_id, &info);
3727 
3728 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3729 			"NUM_OPS cannot exceed %u for this device",
3730 			info.drv.queue_size_lim);
3731 
3732 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3733 
3734 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3735 
3736 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3737 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3738 
3739 	/* For BLER tests we need to enable early termination */
3740 	if (!check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3741 		ref_op->ldpc_dec.op_flags += RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3742 
3743 	ref_op->ldpc_dec.iter_max = get_iter_max();
3744 
3745 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3746 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3747 				bufs->hard_outputs, bufs->soft_outputs,
3748 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3749 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3750 
3751 	/* Set counter to validate the ordering */
3752 	for (j = 0; j < num_ops; ++j)
3753 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3754 
3755 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3756 		uint32_t time_out = 0;
3757 		for (j = 0; j < num_ops; ++j) {
3758 			if (!loopback)
3759 				mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
3760 			if (hc_out || loopback)
3761 				mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data);
3762 			if (ops_enq[j]->ldpc_dec.soft_output.data != NULL)
3763 				mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data);
3764 		}
3765 		if (extDdr)
3766 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3767 					num_ops, true);
3768 		start_time = rte_rdtsc_precise();
3769 
3770 		for (enq = 0, deq = 0; enq < num_ops;) {
3771 			num_to_enq = burst_sz;
3772 
3773 			if (unlikely(num_ops - enq < num_to_enq))
3774 				num_to_enq = num_ops - enq;
3775 
3776 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3777 					queue_id, &ops_enq[enq], num_to_enq);
3778 
3779 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3780 					queue_id, &ops_deq[deq], enq - deq);
3781 			time_out++;
3782 			if (time_out >= TIME_OUT_POLL) {
3783 				timeout_exit(tp->dev_id);
3784 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3785 			}
3786 		}
3787 
3788 		/* dequeue the remaining */
3789 		time_out = 0;
3790 		while (deq < enq) {
3791 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3792 					queue_id, &ops_deq[deq], enq - deq);
3793 			time_out++;
3794 			if (time_out >= TIME_OUT_POLL) {
3795 				timeout_exit(tp->dev_id);
3796 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3797 			}
3798 		}
3799 
3800 		total_time += rte_rdtsc_precise() - start_time;
3801 	}
3802 
3803 	tp->iter_count = 0;
3804 	tp->iter_average = 0;
3805 	/* get the max of iter_count for all dequeued ops */
3806 	for (i = 0; i < num_ops; ++i) {
3807 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3808 				tp->iter_count);
3809 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3810 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3811 			parity_bler += 1.0;
3812 	}
3813 
3814 	parity_bler /= num_ops; /* This one is based on SYND */
3815 	tp->iter_average /= num_ops;
3816 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3817 
3818 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3819 			&& tp->bler == 0
3820 			&& parity_bler == 0
3821 			&& !hc_out) {
3822 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3823 				tp->op_params->vector_mask);
3824 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3825 	}
3826 
3827 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3828 
3829 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3830 	tp->ops_per_sec = ((double)num_ops * 1) /
3831 			((double)total_time / (double)rte_get_tsc_hz());
3832 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3833 			1000000.0) / ((double)total_time /
3834 			(double)rte_get_tsc_hz());
3835 
3836 	return TEST_SUCCESS;
3837 }
3838 
3839 
3840 static int
3841 bler_pmd_lcore_turbo_dec(void *arg)
3842 {
3843 	struct thread_params *tp = arg;
3844 	uint16_t enq, deq;
3845 	uint64_t total_time = 0, start_time;
3846 	const uint16_t queue_id = tp->queue_id;
3847 	const uint16_t burst_sz = tp->op_params->burst_sz;
3848 	const uint16_t num_ops = tp->op_params->num_to_process;
3849 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3850 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3851 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3852 	struct test_buffers *bufs = NULL;
3853 	int i, j, ret;
3854 	struct rte_bbdev_info info;
3855 	uint16_t num_to_enq;
3856 
3857 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3858 			"BURST_SIZE should be <= %u", MAX_BURST);
3859 
3860 	rte_bbdev_info_get(tp->dev_id, &info);
3861 
3862 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3863 			"NUM_OPS cannot exceed %u for this device",
3864 			info.drv.queue_size_lim);
3865 
3866 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3867 
3868 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3869 
3870 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3871 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3872 
3873 	/* For BLER tests we need to enable early termination */
3874 	if (!check_bit(ref_op->turbo_dec.op_flags, RTE_BBDEV_TURBO_EARLY_TERMINATION))
3875 		ref_op->turbo_dec.op_flags += RTE_BBDEV_TURBO_EARLY_TERMINATION;
3876 
3877 	ref_op->turbo_dec.iter_max = get_iter_max();
3878 
3879 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3880 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3881 				bufs->hard_outputs, bufs->soft_outputs,
3882 				ref_op);
3883 	generate_turbo_llr_input(num_ops, bufs->inputs, ref_op);
3884 
3885 	/* Set counter to validate the ordering */
3886 	for (j = 0; j < num_ops; ++j)
3887 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3888 
3889 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3890 		uint32_t time_out = 0;
3891 		for (j = 0; j < num_ops; ++j) {
3892 			mbuf_reset(
3893 			ops_enq[j]->turbo_dec.hard_output.data);
3894 		}
3895 
3896 		start_time = rte_rdtsc_precise();
3897 
3898 		for (enq = 0, deq = 0; enq < num_ops;) {
3899 			num_to_enq = burst_sz;
3900 
3901 			if (unlikely(num_ops - enq < num_to_enq))
3902 				num_to_enq = num_ops - enq;
3903 
3904 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3905 					queue_id, &ops_enq[enq], num_to_enq);
3906 
3907 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3908 					queue_id, &ops_deq[deq], enq - deq);
3909 			time_out++;
3910 			if (time_out >= TIME_OUT_POLL) {
3911 				timeout_exit(tp->dev_id);
3912 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
3913 			}
3914 		}
3915 
3916 		/* dequeue the remaining */
3917 		time_out = 0;
3918 		while (deq < enq) {
3919 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3920 					queue_id, &ops_deq[deq], enq - deq);
3921 			time_out++;
3922 			if (time_out >= TIME_OUT_POLL) {
3923 				timeout_exit(tp->dev_id);
3924 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
3925 			}
3926 		}
3927 
3928 		total_time += rte_rdtsc_precise() - start_time;
3929 	}
3930 
3931 	tp->iter_count = 0;
3932 	tp->iter_average = 0;
3933 	/* get the max of iter_count for all dequeued ops */
3934 	for (i = 0; i < num_ops; ++i) {
3935 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3936 				tp->iter_count);
3937 		tp->iter_average += (double) ops_enq[i]->turbo_dec.iter_count;
3938 	}
3939 
3940 	tp->iter_average /= num_ops;
3941 	tp->bler = (double) validate_turbo_bler(ops_deq, num_ops) / num_ops;
3942 
3943 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3944 
3945 	double tb_len_bits = calc_dec_TB_size(ref_op);
3946 	tp->ops_per_sec = ((double)num_ops * 1) /
3947 			((double)total_time / (double)rte_get_tsc_hz());
3948 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3949 			1000000.0) / ((double)total_time /
3950 			(double)rte_get_tsc_hz());
3951 	printf("TBS %.0f Time %.0f\n", tb_len_bits, 1000000.0 *
3952 			((double)total_time / (double)rte_get_tsc_hz()));
3953 
3954 	return TEST_SUCCESS;
3955 }
3956 
3957 static int
3958 throughput_pmd_lcore_ldpc_dec(void *arg)
3959 {
3960 	struct thread_params *tp = arg;
3961 	uint16_t enq, deq;
3962 	uint64_t total_time = 0, start_time;
3963 	const uint16_t queue_id = tp->queue_id;
3964 	const uint16_t burst_sz = tp->op_params->burst_sz;
3965 	const uint16_t num_ops = tp->op_params->num_to_process;
3966 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3967 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3968 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3969 	struct test_buffers *bufs = NULL;
3970 	int i, j, ret;
3971 	struct rte_bbdev_info info;
3972 	uint16_t num_to_enq;
3973 	bool extDdr = check_bit(ldpc_cap_flags,
3974 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3975 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3976 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3977 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3978 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3979 
3980 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3981 			"BURST_SIZE should be <= %u", MAX_BURST);
3982 
3983 	rte_bbdev_info_get(tp->dev_id, &info);
3984 
3985 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3986 			"NUM_OPS cannot exceed %u for this device",
3987 			info.drv.queue_size_lim);
3988 
3989 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3990 
3991 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3992 
3993 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3994 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3995 
3996 	/* For throughput tests we need to disable early termination */
3997 	if (check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3998 		ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3999 
4000 	ref_op->ldpc_dec.iter_max = get_iter_max();
4001 	/* Since ET is disabled, the expected iter_count is iter_max */
4002 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4003 
4004 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4005 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
4006 				bufs->hard_outputs, bufs->soft_outputs,
4007 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
4008 
4009 	/* Set counter to validate the ordering */
4010 	for (j = 0; j < num_ops; ++j)
4011 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4012 
4013 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4014 		uint32_t time_out = 0;
4015 		for (j = 0; j < num_ops; ++j) {
4016 			if (!loopback)
4017 				mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
4018 			if (hc_out || loopback)
4019 				mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data);
4020 			if (ops_enq[j]->ldpc_dec.soft_output.data != NULL)
4021 				mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data);
4022 		}
4023 		if (extDdr)
4024 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
4025 					num_ops, true);
4026 		start_time = rte_rdtsc_precise();
4027 
4028 		for (enq = 0, deq = 0; enq < num_ops;) {
4029 			num_to_enq = burst_sz;
4030 
4031 			if (unlikely(num_ops - enq < num_to_enq))
4032 				num_to_enq = num_ops - enq;
4033 
4034 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
4035 					queue_id, &ops_enq[enq], num_to_enq);
4036 
4037 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
4038 					queue_id, &ops_deq[deq], enq - deq);
4039 			time_out++;
4040 			if (time_out >= TIME_OUT_POLL) {
4041 				timeout_exit(tp->dev_id);
4042 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4043 			}
4044 		}
4045 
4046 		/* dequeue the remaining */
4047 		time_out = 0;
4048 		while (deq < enq) {
4049 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
4050 					queue_id, &ops_deq[deq], enq - deq);
4051 			time_out++;
4052 			if (time_out >= TIME_OUT_POLL) {
4053 				timeout_exit(tp->dev_id);
4054 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4055 			}
4056 		}
4057 
4058 		total_time += rte_rdtsc_precise() - start_time;
4059 	}
4060 
4061 	tp->iter_count = 0;
4062 	/* get the max of iter_count for all dequeued ops */
4063 	for (i = 0; i < num_ops; ++i) {
4064 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
4065 				tp->iter_count);
4066 	}
4067 	if (extDdr) {
4068 		/* Read loopback is not thread safe */
4069 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
4070 	}
4071 
4072 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4073 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
4074 				tp->op_params->vector_mask);
4075 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4076 	}
4077 
4078 	ret = rte_bbdev_queue_stop(tp->dev_id, queue_id);
4079 	if (ret != 0)
4080 		printf("Failed to stop queue on dev %u q_id: %u\n", tp->dev_id, queue_id);
4081 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
4082 
4083 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
4084 
4085 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4086 			((double)total_time / (double)rte_get_tsc_hz());
4087 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
4088 			1000000.0) / ((double)total_time /
4089 			(double)rte_get_tsc_hz());
4090 
4091 	return TEST_SUCCESS;
4092 }
4093 
4094 static int
4095 throughput_pmd_lcore_enc(void *arg)
4096 {
4097 	struct thread_params *tp = arg;
4098 	uint16_t enq, deq;
4099 	uint64_t total_time = 0, start_time;
4100 	const uint16_t queue_id = tp->queue_id;
4101 	const uint16_t burst_sz = tp->op_params->burst_sz;
4102 	const uint16_t num_ops = tp->op_params->num_to_process;
4103 	struct rte_bbdev_enc_op *ops_enq[num_ops];
4104 	struct rte_bbdev_enc_op *ops_deq[num_ops];
4105 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
4106 	struct test_buffers *bufs = NULL;
4107 	int i, j, ret;
4108 	struct rte_bbdev_info info;
4109 	uint16_t num_to_enq;
4110 
4111 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4112 			"BURST_SIZE should be <= %u", MAX_BURST);
4113 
4114 	rte_bbdev_info_get(tp->dev_id, &info);
4115 
4116 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4117 			"NUM_OPS cannot exceed %u for this device",
4118 			info.drv.queue_size_lim);
4119 
4120 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4121 
4122 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4123 
4124 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
4125 			num_ops);
4126 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
4127 			num_ops);
4128 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4129 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
4130 				bufs->hard_outputs, ref_op);
4131 
4132 	/* Set counter to validate the ordering */
4133 	for (j = 0; j < num_ops; ++j)
4134 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4135 
4136 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4137 		uint32_t time_out = 0;
4138 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4139 			for (j = 0; j < num_ops; ++j)
4140 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
4141 
4142 		start_time = rte_rdtsc_precise();
4143 
4144 		for (enq = 0, deq = 0; enq < num_ops;) {
4145 			num_to_enq = burst_sz;
4146 
4147 			if (unlikely(num_ops - enq < num_to_enq))
4148 				num_to_enq = num_ops - enq;
4149 
4150 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
4151 					queue_id, &ops_enq[enq], num_to_enq);
4152 
4153 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
4154 					queue_id, &ops_deq[deq], enq - deq);
4155 			time_out++;
4156 			if (time_out >= TIME_OUT_POLL) {
4157 				timeout_exit(tp->dev_id);
4158 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4159 			}
4160 		}
4161 
4162 		/* dequeue the remaining */
4163 		time_out = 0;
4164 		while (deq < enq) {
4165 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
4166 					queue_id, &ops_deq[deq], enq - deq);
4167 			time_out++;
4168 			if (time_out >= TIME_OUT_POLL) {
4169 				timeout_exit(tp->dev_id);
4170 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4171 			}
4172 		}
4173 
4174 		total_time += rte_rdtsc_precise() - start_time;
4175 	}
4176 
4177 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4178 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
4179 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4180 	}
4181 
4182 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
4183 
4184 	double tb_len_bits = calc_enc_TB_size(ref_op);
4185 
4186 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4187 			((double)total_time / (double)rte_get_tsc_hz());
4188 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
4189 			/ 1000000.0) / ((double)total_time /
4190 			(double)rte_get_tsc_hz());
4191 
4192 	return TEST_SUCCESS;
4193 }
4194 
4195 static int
4196 throughput_pmd_lcore_ldpc_enc(void *arg)
4197 {
4198 	struct thread_params *tp = arg;
4199 	uint16_t enq, deq;
4200 	uint64_t total_time = 0, start_time;
4201 	const uint16_t queue_id = tp->queue_id;
4202 	const uint16_t burst_sz = tp->op_params->burst_sz;
4203 	const uint16_t num_ops = tp->op_params->num_to_process;
4204 	struct rte_bbdev_enc_op *ops_enq[num_ops];
4205 	struct rte_bbdev_enc_op *ops_deq[num_ops];
4206 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
4207 	struct test_buffers *bufs = NULL;
4208 	int i, j, ret;
4209 	struct rte_bbdev_info info;
4210 	uint16_t num_to_enq;
4211 
4212 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4213 			"BURST_SIZE should be <= %u", MAX_BURST);
4214 
4215 	rte_bbdev_info_get(tp->dev_id, &info);
4216 
4217 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4218 			"NUM_OPS cannot exceed %u for this device",
4219 			info.drv.queue_size_lim);
4220 
4221 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4222 
4223 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4224 
4225 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
4226 			num_ops);
4227 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
4228 			num_ops);
4229 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4230 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
4231 				bufs->hard_outputs, ref_op);
4232 
4233 	/* Set counter to validate the ordering */
4234 	for (j = 0; j < num_ops; ++j)
4235 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4236 
4237 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4238 		uint32_t time_out = 0;
4239 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4240 			for (j = 0; j < num_ops; ++j)
4241 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
4242 
4243 		start_time = rte_rdtsc_precise();
4244 
4245 		for (enq = 0, deq = 0; enq < num_ops;) {
4246 			num_to_enq = burst_sz;
4247 
4248 			if (unlikely(num_ops - enq < num_to_enq))
4249 				num_to_enq = num_ops - enq;
4250 
4251 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
4252 					queue_id, &ops_enq[enq], num_to_enq);
4253 
4254 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4255 					queue_id, &ops_deq[deq], enq - deq);
4256 			time_out++;
4257 			if (time_out >= TIME_OUT_POLL) {
4258 				timeout_exit(tp->dev_id);
4259 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4260 			}
4261 		}
4262 
4263 		/* dequeue the remaining */
4264 		time_out = 0;
4265 		while (deq < enq) {
4266 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4267 					queue_id, &ops_deq[deq], enq - deq);
4268 			time_out++;
4269 			if (time_out >= TIME_OUT_POLL) {
4270 				timeout_exit(tp->dev_id);
4271 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4272 			}
4273 		}
4274 
4275 		total_time += rte_rdtsc_precise() - start_time;
4276 	}
4277 
4278 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4279 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
4280 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4281 	}
4282 
4283 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
4284 
4285 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
4286 
4287 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4288 			((double)total_time / (double)rte_get_tsc_hz());
4289 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
4290 			/ 1000000.0) / ((double)total_time /
4291 			(double)rte_get_tsc_hz());
4292 
4293 	return TEST_SUCCESS;
4294 }
4295 
4296 static int
4297 throughput_pmd_lcore_fft(void *arg)
4298 {
4299 	struct thread_params *tp = arg;
4300 	uint16_t enq, deq;
4301 	uint64_t total_time = 0, start_time;
4302 	const uint16_t queue_id = tp->queue_id;
4303 	const uint16_t burst_sz = tp->op_params->burst_sz;
4304 	const uint16_t num_ops = tp->op_params->num_to_process;
4305 	struct rte_bbdev_fft_op *ops_enq[num_ops];
4306 	struct rte_bbdev_fft_op *ops_deq[num_ops];
4307 	struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
4308 	struct test_buffers *bufs = NULL;
4309 	int i, j, ret;
4310 	struct rte_bbdev_info info;
4311 	uint16_t num_to_enq;
4312 
4313 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4314 			"BURST_SIZE should be <= %u", MAX_BURST);
4315 
4316 	rte_bbdev_info_get(tp->dev_id, &info);
4317 
4318 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4319 			"NUM_OPS cannot exceed %u for this device",
4320 			info.drv.queue_size_lim);
4321 
4322 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4323 
4324 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4325 
4326 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
4327 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
4328 
4329 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4330 		copy_reference_fft_op(ops_enq, num_ops, 0, bufs->inputs,
4331 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
4332 
4333 	/* Set counter to validate the ordering */
4334 	for (j = 0; j < num_ops; ++j)
4335 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4336 
4337 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4338 		uint32_t time_out = 0;
4339 		for (j = 0; j < num_ops; ++j)
4340 			mbuf_reset(ops_enq[j]->fft.base_output.data);
4341 
4342 		start_time = rte_rdtsc_precise();
4343 
4344 		for (enq = 0, deq = 0; enq < num_ops;) {
4345 			num_to_enq = burst_sz;
4346 
4347 			if (unlikely(num_ops - enq < num_to_enq))
4348 				num_to_enq = num_ops - enq;
4349 
4350 			enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
4351 					queue_id, &ops_enq[enq], num_to_enq);
4352 
4353 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4354 					queue_id, &ops_deq[deq], enq - deq);
4355 			time_out++;
4356 			if (time_out >= TIME_OUT_POLL) {
4357 				timeout_exit(tp->dev_id);
4358 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!");
4359 			}
4360 		}
4361 
4362 		/* dequeue the remaining */
4363 		time_out = 0;
4364 		while (deq < enq) {
4365 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4366 					queue_id, &ops_deq[deq], enq - deq);
4367 			time_out++;
4368 			if (time_out >= TIME_OUT_POLL) {
4369 				timeout_exit(tp->dev_id);
4370 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4371 			}
4372 		}
4373 
4374 		total_time += rte_rdtsc_precise() - start_time;
4375 	}
4376 
4377 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4378 		ret = validate_fft_op(ops_deq, num_ops, ref_op);
4379 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4380 	}
4381 
4382 	rte_bbdev_fft_op_free_bulk(ops_enq, num_ops);
4383 
4384 	double tb_len_bits = calc_fft_size(ref_op);
4385 
4386 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4387 			((double)total_time / (double)rte_get_tsc_hz());
4388 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
4389 			1000000.0) / ((double)total_time /
4390 			(double)rte_get_tsc_hz());
4391 
4392 	return TEST_SUCCESS;
4393 }
4394 
4395 static void
4396 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
4397 {
4398 	unsigned int iter = 0;
4399 	double total_mops = 0, total_mbps = 0;
4400 
4401 	for (iter = 0; iter < used_cores; iter++) {
4402 		printf(
4403 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
4404 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
4405 			t_params[iter].mbps);
4406 		total_mops += t_params[iter].ops_per_sec;
4407 		total_mbps += t_params[iter].mbps;
4408 	}
4409 	printf(
4410 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
4411 		used_cores, total_mops, total_mbps);
4412 }
4413 
4414 /* Aggregate the performance results over the number of cores used */
4415 static void
4416 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
4417 {
4418 	unsigned int core_idx = 0;
4419 	double total_mops = 0, total_mbps = 0;
4420 	uint8_t iter_count = 0;
4421 
4422 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4423 		printf(
4424 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
4425 			t_params[core_idx].lcore_id,
4426 			t_params[core_idx].ops_per_sec,
4427 			t_params[core_idx].mbps,
4428 			t_params[core_idx].iter_count);
4429 		total_mops += t_params[core_idx].ops_per_sec;
4430 		total_mbps += t_params[core_idx].mbps;
4431 		iter_count = RTE_MAX(iter_count,
4432 				t_params[core_idx].iter_count);
4433 	}
4434 	printf(
4435 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
4436 		used_cores, total_mops, total_mbps, iter_count);
4437 }
4438 
4439 /* Aggregate the performance results over the number of cores used */
4440 static void
4441 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
4442 {
4443 	unsigned int core_idx = 0;
4444 	double total_mbps = 0, total_bler = 0, total_iter = 0;
4445 	double snr = get_snr();
4446 
4447 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4448 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
4449 				t_params[core_idx].lcore_id,
4450 				t_params[core_idx].bler * 100,
4451 				t_params[core_idx].iter_average,
4452 				t_params[core_idx].mbps,
4453 				get_vector_filename());
4454 		total_mbps += t_params[core_idx].mbps;
4455 		total_bler += t_params[core_idx].bler;
4456 		total_iter += t_params[core_idx].iter_average;
4457 	}
4458 	total_bler /= used_cores;
4459 	total_iter /= used_cores;
4460 
4461 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.3f Mbps %s\n",
4462 			snr, total_bler * 100, total_iter, get_iter_max(),
4463 			total_mbps, get_vector_filename());
4464 }
4465 
4466 /*
4467  * Test function that determines BLER wireless performance
4468  */
4469 static int
4470 bler_test(struct active_device *ad,
4471 		struct test_op_params *op_params)
4472 {
4473 	int ret;
4474 	unsigned int lcore_id, used_cores = 0;
4475 	struct thread_params *t_params;
4476 	struct rte_bbdev_info info;
4477 	lcore_function_t *bler_function;
4478 	uint16_t num_lcores;
4479 	const char *op_type_str;
4480 
4481 	rte_bbdev_info_get(ad->dev_id, &info);
4482 
4483 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4484 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4485 			test_vector.op_type);
4486 
4487 	printf("+ ------------------------------------------------------- +\n");
4488 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4489 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4490 			op_params->num_to_process, op_params->num_lcores,
4491 			op_type_str,
4492 			intr_enabled ? "Interrupt mode" : "PMD mode",
4493 			(double)rte_get_tsc_hz() / 1000000000.0);
4494 
4495 	/* Set number of lcores */
4496 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4497 			? ad->nb_queues
4498 			: op_params->num_lcores;
4499 
4500 	/* Allocate memory for thread parameters structure */
4501 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4502 			RTE_CACHE_LINE_SIZE);
4503 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4504 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4505 				RTE_CACHE_LINE_SIZE));
4506 
4507 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
4508 			!check_bit(test_vector.ldpc_dec.op_flags,
4509 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
4510 			&& !check_bit(test_vector.ldpc_dec.op_flags,
4511 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
4512 		bler_function = bler_pmd_lcore_ldpc_dec;
4513 	else if ((test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) &&
4514 			!check_bit(test_vector.turbo_dec.op_flags,
4515 			RTE_BBDEV_TURBO_SOFT_OUTPUT))
4516 		bler_function = bler_pmd_lcore_turbo_dec;
4517 	else
4518 		return TEST_SKIPPED;
4519 
4520 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4521 
4522 	/* Main core is set at first entry */
4523 	t_params[0].dev_id = ad->dev_id;
4524 	t_params[0].lcore_id = rte_lcore_id();
4525 	t_params[0].op_params = op_params;
4526 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4527 	t_params[0].iter_count = 0;
4528 
4529 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4530 		if (used_cores >= num_lcores)
4531 			break;
4532 
4533 		t_params[used_cores].dev_id = ad->dev_id;
4534 		t_params[used_cores].lcore_id = lcore_id;
4535 		t_params[used_cores].op_params = op_params;
4536 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4537 		t_params[used_cores].iter_count = 0;
4538 
4539 		rte_eal_remote_launch(bler_function,
4540 				&t_params[used_cores++], lcore_id);
4541 	}
4542 
4543 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4544 	ret = bler_function(&t_params[0]);
4545 
4546 	/* Main core is always used */
4547 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4548 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4549 
4550 	print_dec_bler(t_params, num_lcores);
4551 
4552 	/* Return if test failed */
4553 	if (ret) {
4554 		rte_free(t_params);
4555 		return ret;
4556 	}
4557 
4558 	/* Function to print something  here*/
4559 	rte_free(t_params);
4560 	return ret;
4561 }
4562 
4563 /*
4564  * Test function that determines how long an enqueue + dequeue of a burst
4565  * takes on available lcores.
4566  */
4567 static int
4568 throughput_test(struct active_device *ad,
4569 		struct test_op_params *op_params)
4570 {
4571 	int ret;
4572 	unsigned int lcore_id, used_cores = 0;
4573 	struct thread_params *t_params, *tp;
4574 	struct rte_bbdev_info info;
4575 	lcore_function_t *throughput_function;
4576 	uint16_t num_lcores;
4577 	const char *op_type_str;
4578 
4579 	rte_bbdev_info_get(ad->dev_id, &info);
4580 
4581 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4582 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4583 			test_vector.op_type);
4584 
4585 	printf("+ ------------------------------------------------------- +\n");
4586 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4587 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4588 			op_params->num_to_process, op_params->num_lcores,
4589 			op_type_str,
4590 			intr_enabled ? "Interrupt mode" : "PMD mode",
4591 			(double)rte_get_tsc_hz() / 1000000000.0);
4592 
4593 	/* Set number of lcores */
4594 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4595 			? ad->nb_queues
4596 			: op_params->num_lcores;
4597 
4598 	/* Allocate memory for thread parameters structure */
4599 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4600 			RTE_CACHE_LINE_SIZE);
4601 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4602 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4603 				RTE_CACHE_LINE_SIZE));
4604 
4605 	if (intr_enabled) {
4606 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4607 			throughput_function = throughput_intr_lcore_dec;
4608 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4609 			throughput_function = throughput_intr_lcore_ldpc_dec;
4610 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4611 			throughput_function = throughput_intr_lcore_enc;
4612 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4613 			throughput_function = throughput_intr_lcore_ldpc_enc;
4614 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4615 			throughput_function = throughput_intr_lcore_fft;
4616 		else
4617 			throughput_function = throughput_intr_lcore_enc;
4618 
4619 		/* Dequeue interrupt callback registration */
4620 		ret = rte_bbdev_callback_register(ad->dev_id,
4621 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
4622 				t_params);
4623 		if (ret < 0) {
4624 			rte_free(t_params);
4625 			return ret;
4626 		}
4627 	} else {
4628 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4629 			throughput_function = throughput_pmd_lcore_dec;
4630 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4631 			throughput_function = throughput_pmd_lcore_ldpc_dec;
4632 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4633 			throughput_function = throughput_pmd_lcore_enc;
4634 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4635 			throughput_function = throughput_pmd_lcore_ldpc_enc;
4636 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4637 			throughput_function = throughput_pmd_lcore_fft;
4638 		else
4639 			throughput_function = throughput_pmd_lcore_enc;
4640 	}
4641 
4642 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4643 
4644 	/* Main core is set at first entry */
4645 	t_params[0].dev_id = ad->dev_id;
4646 	t_params[0].lcore_id = rte_lcore_id();
4647 	t_params[0].op_params = op_params;
4648 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4649 	t_params[0].iter_count = 0;
4650 
4651 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4652 		if (used_cores >= num_lcores)
4653 			break;
4654 
4655 		t_params[used_cores].dev_id = ad->dev_id;
4656 		t_params[used_cores].lcore_id = lcore_id;
4657 		t_params[used_cores].op_params = op_params;
4658 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4659 		t_params[used_cores].iter_count = 0;
4660 
4661 		rte_eal_remote_launch(throughput_function,
4662 				&t_params[used_cores++], lcore_id);
4663 	}
4664 
4665 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4666 	ret = throughput_function(&t_params[0]);
4667 
4668 	/* Main core is always used */
4669 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4670 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4671 
4672 	/* Return if test failed */
4673 	if (ret) {
4674 		rte_free(t_params);
4675 		return ret;
4676 	}
4677 
4678 	/* Print throughput if interrupts are disabled and test passed */
4679 	if (!intr_enabled) {
4680 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4681 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4682 			print_dec_throughput(t_params, num_lcores);
4683 		else
4684 			print_enc_throughput(t_params, num_lcores);
4685 		rte_free(t_params);
4686 		return ret;
4687 	}
4688 
4689 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
4690 	 * all pending operations. Skip waiting for queues which reported an
4691 	 * error using processing_status variable.
4692 	 * Wait for main lcore operations.
4693 	 */
4694 	tp = &t_params[0];
4695 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4696 		op_params->num_to_process) &&
4697 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4698 		TEST_FAILED))
4699 		rte_pause();
4700 
4701 	tp->ops_per_sec /= TEST_REPETITIONS;
4702 	tp->mbps /= TEST_REPETITIONS;
4703 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4704 
4705 	/* Wait for worker lcores operations */
4706 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
4707 		tp = &t_params[used_cores];
4708 
4709 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4710 			op_params->num_to_process) &&
4711 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4712 			TEST_FAILED))
4713 			rte_pause();
4714 
4715 		tp->ops_per_sec /= TEST_REPETITIONS;
4716 		tp->mbps /= TEST_REPETITIONS;
4717 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4718 	}
4719 
4720 	/* Print throughput if test passed */
4721 	if (!ret) {
4722 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4723 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4724 			print_dec_throughput(t_params, num_lcores);
4725 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
4726 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4727 			print_enc_throughput(t_params, num_lcores);
4728 	}
4729 
4730 	rte_free(t_params);
4731 	return ret;
4732 }
4733 
4734 static int
4735 latency_test_dec(struct rte_mempool *mempool,
4736 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4737 		uint16_t dev_id, uint16_t queue_id,
4738 		const uint16_t num_to_process, uint16_t burst_sz,
4739 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, bool disable_et)
4740 {
4741 	int ret = TEST_SUCCESS;
4742 	uint16_t i, j, dequeued;
4743 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4744 	uint64_t start_time = 0, last_time = 0;
4745 
4746 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4747 		uint16_t enq = 0, deq = 0;
4748 		uint32_t time_out = 0;
4749 		bool first_time = true;
4750 		last_time = 0;
4751 
4752 		if (unlikely(num_to_process - dequeued < burst_sz))
4753 			burst_sz = num_to_process - dequeued;
4754 
4755 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4756 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_dec_op_alloc_bulk() failed");
4757 
4758 		ref_op->turbo_dec.iter_max = get_iter_max();
4759 		/* For validation tests we want to enable early termination */
4760 		if (!disable_et && !check_bit(ref_op->turbo_dec.op_flags,
4761 				RTE_BBDEV_TURBO_EARLY_TERMINATION))
4762 			ref_op->turbo_dec.op_flags |= RTE_BBDEV_TURBO_EARLY_TERMINATION;
4763 
4764 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4765 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4766 					bufs->inputs,
4767 					bufs->hard_outputs,
4768 					bufs->soft_outputs,
4769 					ref_op);
4770 
4771 		/* Set counter to validate the ordering */
4772 		for (j = 0; j < burst_sz; ++j)
4773 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4774 
4775 		start_time = rte_rdtsc_precise();
4776 
4777 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4778 				burst_sz);
4779 		TEST_ASSERT(enq == burst_sz,
4780 				"Error enqueueing burst, expected %u, got %u",
4781 				burst_sz, enq);
4782 
4783 		/* Dequeue */
4784 		do {
4785 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4786 					&ops_deq[deq], burst_sz - deq);
4787 			if (likely(first_time && (deq > 0))) {
4788 				last_time = rte_rdtsc_precise() - start_time;
4789 				first_time = false;
4790 			}
4791 			time_out++;
4792 			if (time_out >= TIME_OUT_POLL) {
4793 				timeout_exit(dev_id);
4794 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4795 			}
4796 		} while (unlikely(burst_sz != deq));
4797 
4798 		*max_time = RTE_MAX(*max_time, last_time);
4799 		*min_time = RTE_MIN(*min_time, last_time);
4800 		*total_time += last_time;
4801 
4802 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4803 			ret = validate_dec_op(ops_deq, burst_sz, ref_op);
4804 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4805 		}
4806 
4807 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4808 		dequeued += deq;
4809 	}
4810 
4811 	return i;
4812 }
4813 
4814 /* Test case for latency/validation for LDPC Decoder */
4815 static int
4816 latency_test_ldpc_dec(struct rte_mempool *mempool,
4817 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4818 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4819 		const uint16_t num_to_process, uint16_t burst_sz,
4820 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4821 		bool disable_et)
4822 {
4823 	int ret = TEST_SUCCESS;
4824 	uint16_t i, j, dequeued;
4825 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4826 	uint64_t start_time = 0, last_time = 0;
4827 	bool extDdr = ldpc_cap_flags &
4828 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4829 
4830 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4831 		uint16_t enq = 0, deq = 0;
4832 		uint32_t time_out = 0;
4833 		bool first_time = true;
4834 		last_time = 0;
4835 
4836 		if (unlikely(num_to_process - dequeued < burst_sz))
4837 			burst_sz = num_to_process - dequeued;
4838 
4839 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4840 		TEST_ASSERT_SUCCESS(ret,
4841 				"rte_bbdev_dec_op_alloc_bulk() failed");
4842 
4843 		/* For latency tests we need to disable early termination */
4844 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4845 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4846 			ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4847 
4848 		ref_op->ldpc_dec.iter_max = get_iter_max();
4849 		/* When ET is disabled, the expected iter_count is iter_max */
4850 		if (disable_et)
4851 			ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4852 
4853 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4854 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4855 					bufs->inputs,
4856 					bufs->hard_outputs,
4857 					bufs->soft_outputs,
4858 					bufs->harq_inputs,
4859 					bufs->harq_outputs,
4860 					ref_op);
4861 
4862 		if (extDdr)
4863 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4864 					burst_sz, true);
4865 
4866 		/* Set counter to validate the ordering */
4867 		for (j = 0; j < burst_sz; ++j)
4868 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4869 
4870 		start_time = rte_rdtsc_precise();
4871 
4872 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4873 				&ops_enq[enq], burst_sz);
4874 		TEST_ASSERT(enq == burst_sz,
4875 				"Error enqueueing burst, expected %u, got %u",
4876 				burst_sz, enq);
4877 
4878 		/* Dequeue */
4879 		do {
4880 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4881 					&ops_deq[deq], burst_sz - deq);
4882 			if (likely(first_time && (deq > 0))) {
4883 				last_time = rte_rdtsc_precise() - start_time;
4884 				first_time = false;
4885 			}
4886 			time_out++;
4887 			if (time_out >= TIME_OUT_POLL) {
4888 				timeout_exit(dev_id);
4889 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4890 			}
4891 		} while (unlikely(burst_sz != deq));
4892 
4893 		*max_time = RTE_MAX(*max_time, last_time);
4894 		*min_time = RTE_MIN(*min_time, last_time);
4895 		*total_time += last_time;
4896 
4897 		if (extDdr)
4898 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4899 
4900 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4901 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, vector_mask);
4902 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4903 		}
4904 
4905 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4906 		dequeued += deq;
4907 	}
4908 	return i;
4909 }
4910 
4911 static int
4912 latency_test_enc(struct rte_mempool *mempool,
4913 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4914 		uint16_t dev_id, uint16_t queue_id,
4915 		const uint16_t num_to_process, uint16_t burst_sz,
4916 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4917 {
4918 	int ret = TEST_SUCCESS;
4919 	uint16_t i, j, dequeued;
4920 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4921 	uint64_t start_time = 0, last_time = 0;
4922 
4923 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4924 		uint16_t enq = 0, deq = 0;
4925 		uint32_t time_out = 0;
4926 		bool first_time = true;
4927 		last_time = 0;
4928 
4929 		if (unlikely(num_to_process - dequeued < burst_sz))
4930 			burst_sz = num_to_process - dequeued;
4931 
4932 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4933 		TEST_ASSERT_SUCCESS(ret,
4934 				"rte_bbdev_enc_op_alloc_bulk() failed");
4935 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4936 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4937 					bufs->inputs,
4938 					bufs->hard_outputs,
4939 					ref_op);
4940 
4941 		/* Set counter to validate the ordering */
4942 		for (j = 0; j < burst_sz; ++j)
4943 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4944 
4945 		start_time = rte_rdtsc_precise();
4946 
4947 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4948 				burst_sz);
4949 		TEST_ASSERT(enq == burst_sz,
4950 				"Error enqueueing burst, expected %u, got %u",
4951 				burst_sz, enq);
4952 
4953 		/* Dequeue */
4954 		do {
4955 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4956 					&ops_deq[deq], burst_sz - deq);
4957 			if (likely(first_time && (deq > 0))) {
4958 				last_time += rte_rdtsc_precise() - start_time;
4959 				first_time = false;
4960 			}
4961 			time_out++;
4962 			if (time_out >= TIME_OUT_POLL) {
4963 				timeout_exit(dev_id);
4964 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
4965 			}
4966 		} while (unlikely(burst_sz != deq));
4967 
4968 		*max_time = RTE_MAX(*max_time, last_time);
4969 		*min_time = RTE_MIN(*min_time, last_time);
4970 		*total_time += last_time;
4971 
4972 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4973 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4974 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4975 		}
4976 
4977 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4978 		dequeued += deq;
4979 	}
4980 
4981 	return i;
4982 }
4983 
4984 static int
4985 latency_test_ldpc_enc(struct rte_mempool *mempool,
4986 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4987 		uint16_t dev_id, uint16_t queue_id,
4988 		const uint16_t num_to_process, uint16_t burst_sz,
4989 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4990 {
4991 	int ret = TEST_SUCCESS;
4992 	uint16_t i, j, dequeued;
4993 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4994 	uint64_t start_time = 0, last_time = 0;
4995 
4996 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4997 		uint16_t enq = 0, deq = 0;
4998 		uint32_t time_out = 0;
4999 		bool first_time = true;
5000 		last_time = 0;
5001 
5002 		if (unlikely(num_to_process - dequeued < burst_sz))
5003 			burst_sz = num_to_process - dequeued;
5004 
5005 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5006 		TEST_ASSERT_SUCCESS(ret,
5007 				"rte_bbdev_enc_op_alloc_bulk() failed");
5008 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5009 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
5010 					bufs->inputs,
5011 					bufs->hard_outputs,
5012 					ref_op);
5013 
5014 		/* Set counter to validate the ordering */
5015 		for (j = 0; j < burst_sz; ++j)
5016 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
5017 
5018 		start_time = rte_rdtsc_precise();
5019 
5020 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
5021 				&ops_enq[enq], burst_sz);
5022 		TEST_ASSERT(enq == burst_sz,
5023 				"Error enqueueing burst, expected %u, got %u",
5024 				burst_sz, enq);
5025 
5026 		/* Dequeue */
5027 		do {
5028 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5029 					&ops_deq[deq], burst_sz - deq);
5030 			if (likely(first_time && (deq > 0))) {
5031 				last_time += rte_rdtsc_precise() - start_time;
5032 				first_time = false;
5033 			}
5034 			time_out++;
5035 			if (time_out >= TIME_OUT_POLL) {
5036 				timeout_exit(dev_id);
5037 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
5038 			}
5039 		} while (unlikely(burst_sz != deq));
5040 
5041 		*max_time = RTE_MAX(*max_time, last_time);
5042 		*min_time = RTE_MIN(*min_time, last_time);
5043 		*total_time += last_time;
5044 
5045 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
5046 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
5047 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
5048 		}
5049 
5050 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5051 		dequeued += deq;
5052 	}
5053 
5054 	return i;
5055 }
5056 
5057 
5058 static int
5059 latency_test_fft(struct rte_mempool *mempool,
5060 		struct test_buffers *bufs, struct rte_bbdev_fft_op *ref_op,
5061 		uint16_t dev_id, uint16_t queue_id,
5062 		const uint16_t num_to_process, uint16_t burst_sz,
5063 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
5064 {
5065 	int ret = TEST_SUCCESS;
5066 	uint16_t i, j, dequeued;
5067 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5068 	uint64_t start_time = 0, last_time = 0;
5069 
5070 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5071 		uint16_t enq = 0, deq = 0;
5072 		uint32_t time_out = 0;
5073 		bool first_time = true;
5074 		last_time = 0;
5075 
5076 		if (unlikely(num_to_process - dequeued < burst_sz))
5077 			burst_sz = num_to_process - dequeued;
5078 
5079 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
5080 		TEST_ASSERT_SUCCESS(ret,
5081 				"rte_bbdev_fft_op_alloc_bulk() failed");
5082 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5083 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
5084 					bufs->inputs,
5085 					bufs->hard_outputs, bufs->soft_outputs,
5086 					ref_op);
5087 
5088 		/* Set counter to validate the ordering */
5089 		for (j = 0; j < burst_sz; ++j)
5090 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
5091 
5092 		start_time = rte_rdtsc_precise();
5093 
5094 		enq = rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
5095 				&ops_enq[enq], burst_sz);
5096 		TEST_ASSERT(enq == burst_sz,
5097 				"Error enqueueing burst, expected %u, got %u",
5098 				burst_sz, enq);
5099 
5100 		/* Dequeue */
5101 		do {
5102 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5103 					&ops_deq[deq], burst_sz - deq);
5104 			if (likely(first_time && (deq > 0))) {
5105 				last_time += rte_rdtsc_precise() - start_time;
5106 				first_time = false;
5107 			}
5108 			time_out++;
5109 			if (time_out >= TIME_OUT_POLL) {
5110 				timeout_exit(dev_id);
5111 				TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!");
5112 			}
5113 		} while (unlikely(burst_sz != deq));
5114 
5115 		*max_time = RTE_MAX(*max_time, last_time);
5116 		*min_time = RTE_MIN(*min_time, last_time);
5117 		*total_time += last_time;
5118 
5119 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
5120 			ret = validate_fft_op(ops_deq, burst_sz, ref_op);
5121 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
5122 		}
5123 
5124 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
5125 		dequeued += deq;
5126 	}
5127 
5128 	return i;
5129 }
5130 
5131 /* Common function for running validation and latency test cases */
5132 static int
5133 validation_latency_test(struct active_device *ad,
5134 		struct test_op_params *op_params, bool latency_flag)
5135 {
5136 	int iter;
5137 	uint16_t burst_sz = op_params->burst_sz;
5138 	const uint16_t num_to_process = op_params->num_to_process;
5139 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5140 	const uint16_t queue_id = ad->queue_ids[0];
5141 	struct test_buffers *bufs = NULL;
5142 	struct rte_bbdev_info info;
5143 	uint64_t total_time, min_time, max_time;
5144 	const char *op_type_str;
5145 
5146 	total_time = max_time = 0;
5147 	min_time = UINT64_MAX;
5148 
5149 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5150 			"BURST_SIZE should be <= %u", MAX_BURST);
5151 
5152 	rte_bbdev_info_get(ad->dev_id, &info);
5153 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
5154 
5155 	op_type_str = rte_bbdev_op_type_str(op_type);
5156 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5157 
5158 	printf("+ ------------------------------------------------------- +\n");
5159 	if (latency_flag)
5160 		printf("== test: latency\ndev:");
5161 	else
5162 		printf("== test: validation\ndev:");
5163 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
5164 			info.dev_name, burst_sz, num_to_process, op_type_str);
5165 
5166 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
5167 		iter = latency_test_dec(op_params->mp, bufs,
5168 				op_params->ref_dec_op, ad->dev_id, queue_id,
5169 				num_to_process, burst_sz, &total_time,
5170 				&min_time, &max_time, latency_flag);
5171 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5172 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
5173 				op_params->ref_enc_op, ad->dev_id, queue_id,
5174 				num_to_process, burst_sz, &total_time,
5175 				&min_time, &max_time);
5176 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5177 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
5178 				op_params->ref_dec_op, op_params->vector_mask,
5179 				ad->dev_id, queue_id, num_to_process,
5180 				burst_sz, &total_time, &min_time, &max_time,
5181 				latency_flag);
5182 	else if (op_type == RTE_BBDEV_OP_FFT)
5183 		iter = latency_test_fft(op_params->mp, bufs,
5184 				op_params->ref_fft_op,
5185 				ad->dev_id, queue_id,
5186 				num_to_process, burst_sz, &total_time,
5187 				&min_time, &max_time);
5188 	else /* RTE_BBDEV_OP_TURBO_ENC */
5189 		iter = latency_test_enc(op_params->mp, bufs,
5190 				op_params->ref_enc_op,
5191 				ad->dev_id, queue_id,
5192 				num_to_process, burst_sz, &total_time,
5193 				&min_time, &max_time);
5194 
5195 	if (iter <= 0)
5196 		return TEST_FAILED;
5197 
5198 	printf("Operation latency:\n"
5199 			"\tavg: %lg cycles, %lg us\n"
5200 			"\tmin: %lg cycles, %lg us\n"
5201 			"\tmax: %lg cycles, %lg us\n",
5202 			(double)total_time / (double)iter,
5203 			(double)(total_time * 1000000) / (double)iter /
5204 			(double)rte_get_tsc_hz(), (double)min_time,
5205 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
5206 			(double)max_time, (double)(max_time * 1000000) /
5207 			(double)rte_get_tsc_hz());
5208 
5209 	return TEST_SUCCESS;
5210 }
5211 
5212 static int
5213 latency_test(struct active_device *ad, struct test_op_params *op_params)
5214 {
5215 	return validation_latency_test(ad, op_params, true);
5216 }
5217 
5218 static int
5219 validation_test(struct active_device *ad, struct test_op_params *op_params)
5220 {
5221 	return validation_latency_test(ad, op_params, false);
5222 }
5223 
5224 static int
5225 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
5226 		struct rte_bbdev_stats *stats)
5227 {
5228 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
5229 	struct rte_bbdev_stats *q_stats;
5230 
5231 	if (queue_id >= dev->data->num_queues)
5232 		return -1;
5233 
5234 	q_stats = &dev->data->queues[queue_id].queue_stats;
5235 
5236 	stats->enqueued_count = q_stats->enqueued_count;
5237 	stats->dequeued_count = q_stats->dequeued_count;
5238 	stats->enqueue_err_count = q_stats->enqueue_err_count;
5239 	stats->dequeue_err_count = q_stats->dequeue_err_count;
5240 	stats->enqueue_warn_count = q_stats->enqueue_warn_count;
5241 	stats->dequeue_warn_count = q_stats->dequeue_warn_count;
5242 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
5243 
5244 	return 0;
5245 }
5246 
5247 static int
5248 offload_latency_test_fft(struct rte_mempool *mempool, struct test_buffers *bufs,
5249 		struct rte_bbdev_fft_op *ref_op, uint16_t dev_id,
5250 		uint16_t queue_id, const uint16_t num_to_process,
5251 		uint16_t burst_sz, struct test_time_stats *time_st)
5252 {
5253 	int i, dequeued, ret;
5254 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5255 	uint64_t enq_start_time, deq_start_time;
5256 	uint64_t enq_sw_last_time, deq_last_time;
5257 	struct rte_bbdev_stats stats;
5258 
5259 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5260 		uint16_t enq = 0, deq = 0;
5261 
5262 		if (unlikely(num_to_process - dequeued < burst_sz))
5263 			burst_sz = num_to_process - dequeued;
5264 
5265 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
5266 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5267 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5268 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
5269 					bufs->inputs,
5270 					bufs->hard_outputs, bufs->soft_outputs,
5271 					ref_op);
5272 
5273 		/* Start time meas for enqueue function offload latency */
5274 		enq_start_time = rte_rdtsc_precise();
5275 		do {
5276 			enq += rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
5277 					&ops_enq[enq], burst_sz - enq);
5278 		} while (unlikely(burst_sz != enq));
5279 
5280 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5281 		TEST_ASSERT_SUCCESS(ret,
5282 				"Failed to get stats for queue (%u) of device (%u)",
5283 				queue_id, dev_id);
5284 
5285 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
5286 				stats.acc_offload_cycles;
5287 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5288 				enq_sw_last_time);
5289 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5290 				enq_sw_last_time);
5291 		time_st->enq_sw_total_time += enq_sw_last_time;
5292 
5293 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5294 				stats.acc_offload_cycles);
5295 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5296 				stats.acc_offload_cycles);
5297 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5298 
5299 		/* give time for device to process ops */
5300 		rte_delay_us(WAIT_OFFLOAD_US);
5301 
5302 		/* Start time meas for dequeue function offload latency */
5303 		deq_start_time = rte_rdtsc_precise();
5304 		/* Dequeue one operation */
5305 		do {
5306 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5307 					&ops_deq[deq], enq);
5308 		} while (unlikely(deq == 0));
5309 
5310 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5311 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5312 				deq_last_time);
5313 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5314 				deq_last_time);
5315 		time_st->deq_total_time += deq_last_time;
5316 
5317 		/* Dequeue remaining operations if needed*/
5318 		while (burst_sz != deq)
5319 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5320 					&ops_deq[deq], burst_sz - deq);
5321 
5322 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
5323 		dequeued += deq;
5324 	}
5325 
5326 	return i;
5327 }
5328 
5329 static int
5330 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
5331 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5332 		uint16_t queue_id, const uint16_t num_to_process,
5333 		uint16_t burst_sz, struct test_time_stats *time_st)
5334 {
5335 	int i, dequeued, ret;
5336 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5337 	uint64_t enq_start_time, deq_start_time;
5338 	uint64_t enq_sw_last_time, deq_last_time;
5339 	struct rte_bbdev_stats stats;
5340 
5341 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5342 		uint16_t enq = 0, deq = 0;
5343 
5344 		if (unlikely(num_to_process - dequeued < burst_sz))
5345 			burst_sz = num_to_process - dequeued;
5346 
5347 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5348 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5349 		ref_op->turbo_dec.iter_max = get_iter_max();
5350 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5351 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
5352 					bufs->inputs,
5353 					bufs->hard_outputs,
5354 					bufs->soft_outputs,
5355 					ref_op);
5356 
5357 		/* Start time meas for enqueue function offload latency */
5358 		enq_start_time = rte_rdtsc_precise();
5359 		do {
5360 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
5361 					&ops_enq[enq], burst_sz - enq);
5362 		} while (unlikely(burst_sz != enq));
5363 
5364 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5365 		TEST_ASSERT_SUCCESS(ret,
5366 				"Failed to get stats for queue (%u) of device (%u)",
5367 				queue_id, dev_id);
5368 
5369 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
5370 				stats.acc_offload_cycles;
5371 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5372 				enq_sw_last_time);
5373 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5374 				enq_sw_last_time);
5375 		time_st->enq_sw_total_time += enq_sw_last_time;
5376 
5377 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5378 				stats.acc_offload_cycles);
5379 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5380 				stats.acc_offload_cycles);
5381 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5382 
5383 		/* give time for device to process ops */
5384 		rte_delay_us(WAIT_OFFLOAD_US);
5385 
5386 		/* Start time meas for dequeue function offload latency */
5387 		deq_start_time = rte_rdtsc_precise();
5388 		/* Dequeue one operation */
5389 		do {
5390 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5391 					&ops_deq[deq], enq);
5392 		} while (unlikely(deq == 0));
5393 
5394 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5395 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5396 				deq_last_time);
5397 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5398 				deq_last_time);
5399 		time_st->deq_total_time += deq_last_time;
5400 
5401 		/* Dequeue remaining operations if needed*/
5402 		while (burst_sz != deq)
5403 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5404 					&ops_deq[deq], burst_sz - deq);
5405 
5406 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5407 		dequeued += deq;
5408 	}
5409 
5410 	return i;
5411 }
5412 
5413 static int
5414 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
5415 		struct test_buffers *bufs,
5416 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5417 		uint16_t queue_id, const uint16_t num_to_process,
5418 		uint16_t burst_sz, struct test_time_stats *time_st)
5419 {
5420 	int i, dequeued, ret;
5421 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5422 	uint64_t enq_start_time, deq_start_time;
5423 	uint64_t enq_sw_last_time, deq_last_time;
5424 	struct rte_bbdev_stats stats;
5425 	bool extDdr = ldpc_cap_flags &
5426 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
5427 
5428 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5429 		uint16_t enq = 0, deq = 0;
5430 
5431 		if (unlikely(num_to_process - dequeued < burst_sz))
5432 			burst_sz = num_to_process - dequeued;
5433 
5434 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5435 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5436 		ref_op->ldpc_dec.iter_max = get_iter_max();
5437 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5438 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
5439 					bufs->inputs,
5440 					bufs->hard_outputs,
5441 					bufs->soft_outputs,
5442 					bufs->harq_inputs,
5443 					bufs->harq_outputs,
5444 					ref_op);
5445 
5446 		if (extDdr)
5447 			preload_harq_ddr(dev_id, queue_id, ops_enq,
5448 					burst_sz, true);
5449 
5450 		/* Start time meas for enqueue function offload latency */
5451 		enq_start_time = rte_rdtsc_precise();
5452 		do {
5453 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
5454 					&ops_enq[enq], burst_sz - enq);
5455 		} while (unlikely(burst_sz != enq));
5456 
5457 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5458 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5459 		TEST_ASSERT_SUCCESS(ret,
5460 				"Failed to get stats for queue (%u) of device (%u)",
5461 				queue_id, dev_id);
5462 
5463 		enq_sw_last_time -= stats.acc_offload_cycles;
5464 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5465 				enq_sw_last_time);
5466 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5467 				enq_sw_last_time);
5468 		time_st->enq_sw_total_time += enq_sw_last_time;
5469 
5470 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5471 				stats.acc_offload_cycles);
5472 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5473 				stats.acc_offload_cycles);
5474 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5475 
5476 		/* give time for device to process ops */
5477 		rte_delay_us(WAIT_OFFLOAD_US);
5478 
5479 		/* Start time meas for dequeue function offload latency */
5480 		deq_start_time = rte_rdtsc_precise();
5481 		/* Dequeue one operation */
5482 		do {
5483 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5484 					&ops_deq[deq], enq);
5485 		} while (unlikely(deq == 0));
5486 
5487 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5488 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5489 				deq_last_time);
5490 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5491 				deq_last_time);
5492 		time_st->deq_total_time += deq_last_time;
5493 
5494 		/* Dequeue remaining operations if needed*/
5495 		while (burst_sz != deq)
5496 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5497 					&ops_deq[deq], burst_sz - deq);
5498 
5499 		if (extDdr) {
5500 			/* Read loopback is not thread safe */
5501 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
5502 		}
5503 
5504 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5505 		dequeued += deq;
5506 	}
5507 
5508 	return i;
5509 }
5510 
5511 static int
5512 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
5513 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5514 		uint16_t queue_id, const uint16_t num_to_process,
5515 		uint16_t burst_sz, struct test_time_stats *time_st)
5516 {
5517 	int i, dequeued, ret;
5518 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5519 	uint64_t enq_start_time, deq_start_time;
5520 	uint64_t enq_sw_last_time, deq_last_time;
5521 	struct rte_bbdev_stats stats;
5522 
5523 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5524 		uint16_t enq = 0, deq = 0;
5525 
5526 		if (unlikely(num_to_process - dequeued < burst_sz))
5527 			burst_sz = num_to_process - dequeued;
5528 
5529 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5530 		TEST_ASSERT_SUCCESS(ret,
5531 				"rte_bbdev_enc_op_alloc_bulk() failed");
5532 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5533 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
5534 					bufs->inputs,
5535 					bufs->hard_outputs,
5536 					ref_op);
5537 
5538 		/* Start time meas for enqueue function offload latency */
5539 		enq_start_time = rte_rdtsc_precise();
5540 		do {
5541 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
5542 					&ops_enq[enq], burst_sz - enq);
5543 		} while (unlikely(burst_sz != enq));
5544 
5545 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5546 
5547 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5548 		TEST_ASSERT_SUCCESS(ret,
5549 				"Failed to get stats for queue (%u) of device (%u)",
5550 				queue_id, dev_id);
5551 		enq_sw_last_time -= stats.acc_offload_cycles;
5552 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5553 				enq_sw_last_time);
5554 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5555 				enq_sw_last_time);
5556 		time_st->enq_sw_total_time += enq_sw_last_time;
5557 
5558 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5559 				stats.acc_offload_cycles);
5560 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5561 				stats.acc_offload_cycles);
5562 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5563 
5564 		/* give time for device to process ops */
5565 		rte_delay_us(WAIT_OFFLOAD_US);
5566 
5567 		/* Start time meas for dequeue function offload latency */
5568 		deq_start_time = rte_rdtsc_precise();
5569 		/* Dequeue one operation */
5570 		do {
5571 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5572 					&ops_deq[deq], enq);
5573 		} while (unlikely(deq == 0));
5574 
5575 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5576 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5577 				deq_last_time);
5578 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5579 				deq_last_time);
5580 		time_st->deq_total_time += deq_last_time;
5581 
5582 		while (burst_sz != deq)
5583 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5584 					&ops_deq[deq], burst_sz - deq);
5585 
5586 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5587 		dequeued += deq;
5588 	}
5589 
5590 	return i;
5591 }
5592 
5593 static int
5594 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
5595 		struct test_buffers *bufs,
5596 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5597 		uint16_t queue_id, const uint16_t num_to_process,
5598 		uint16_t burst_sz, struct test_time_stats *time_st)
5599 {
5600 	int i, dequeued, ret;
5601 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5602 	uint64_t enq_start_time, deq_start_time;
5603 	uint64_t enq_sw_last_time, deq_last_time;
5604 	struct rte_bbdev_stats stats;
5605 
5606 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5607 		uint16_t enq = 0, deq = 0;
5608 
5609 		if (unlikely(num_to_process - dequeued < burst_sz))
5610 			burst_sz = num_to_process - dequeued;
5611 
5612 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5613 		TEST_ASSERT_SUCCESS(ret,
5614 				"rte_bbdev_enc_op_alloc_bulk() failed");
5615 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5616 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
5617 					bufs->inputs,
5618 					bufs->hard_outputs,
5619 					ref_op);
5620 
5621 		/* Start time meas for enqueue function offload latency */
5622 		enq_start_time = rte_rdtsc_precise();
5623 		do {
5624 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
5625 					&ops_enq[enq], burst_sz - enq);
5626 		} while (unlikely(burst_sz != enq));
5627 
5628 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5629 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5630 		TEST_ASSERT_SUCCESS(ret,
5631 				"Failed to get stats for queue (%u) of device (%u)",
5632 				queue_id, dev_id);
5633 
5634 		enq_sw_last_time -= stats.acc_offload_cycles;
5635 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5636 				enq_sw_last_time);
5637 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5638 				enq_sw_last_time);
5639 		time_st->enq_sw_total_time += enq_sw_last_time;
5640 
5641 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5642 				stats.acc_offload_cycles);
5643 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5644 				stats.acc_offload_cycles);
5645 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5646 
5647 		/* give time for device to process ops */
5648 		rte_delay_us(WAIT_OFFLOAD_US);
5649 
5650 		/* Start time meas for dequeue function offload latency */
5651 		deq_start_time = rte_rdtsc_precise();
5652 		/* Dequeue one operation */
5653 		do {
5654 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5655 					&ops_deq[deq], enq);
5656 		} while (unlikely(deq == 0));
5657 
5658 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5659 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5660 				deq_last_time);
5661 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5662 				deq_last_time);
5663 		time_st->deq_total_time += deq_last_time;
5664 
5665 		while (burst_sz != deq)
5666 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5667 					&ops_deq[deq], burst_sz - deq);
5668 
5669 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5670 		dequeued += deq;
5671 	}
5672 
5673 	return i;
5674 }
5675 
5676 static int
5677 offload_cost_test(struct active_device *ad,
5678 		struct test_op_params *op_params)
5679 {
5680 	int iter;
5681 	uint16_t burst_sz = op_params->burst_sz;
5682 	const uint16_t num_to_process = op_params->num_to_process;
5683 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5684 	const uint16_t queue_id = ad->queue_ids[0];
5685 	struct test_buffers *bufs = NULL;
5686 	struct rte_bbdev_info info;
5687 	const char *op_type_str;
5688 	struct test_time_stats time_st;
5689 
5690 	memset(&time_st, 0, sizeof(struct test_time_stats));
5691 	time_st.enq_sw_min_time = UINT64_MAX;
5692 	time_st.enq_acc_min_time = UINT64_MAX;
5693 	time_st.deq_min_time = UINT64_MAX;
5694 
5695 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5696 			"BURST_SIZE should be <= %u", MAX_BURST);
5697 
5698 	rte_bbdev_info_get(ad->dev_id, &info);
5699 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
5700 
5701 	op_type_str = rte_bbdev_op_type_str(op_type);
5702 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5703 
5704 	printf("+ ------------------------------------------------------- +\n");
5705 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5706 			info.dev_name, burst_sz, num_to_process, op_type_str);
5707 
5708 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
5709 		iter = offload_latency_test_dec(op_params->mp, bufs,
5710 				op_params->ref_dec_op, ad->dev_id, queue_id,
5711 				num_to_process, burst_sz, &time_st);
5712 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
5713 		iter = offload_latency_test_enc(op_params->mp, bufs,
5714 				op_params->ref_enc_op, ad->dev_id, queue_id,
5715 				num_to_process, burst_sz, &time_st);
5716 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5717 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
5718 				op_params->ref_enc_op, ad->dev_id, queue_id,
5719 				num_to_process, burst_sz, &time_st);
5720 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5721 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
5722 			op_params->ref_dec_op, ad->dev_id, queue_id,
5723 			num_to_process, burst_sz, &time_st);
5724 	else if (op_type == RTE_BBDEV_OP_FFT)
5725 		iter = offload_latency_test_fft(op_params->mp, bufs,
5726 			op_params->ref_fft_op, ad->dev_id, queue_id,
5727 			num_to_process, burst_sz, &time_st);
5728 	else
5729 		iter = offload_latency_test_enc(op_params->mp, bufs,
5730 				op_params->ref_enc_op, ad->dev_id, queue_id,
5731 				num_to_process, burst_sz, &time_st);
5732 
5733 	if (iter <= 0)
5734 		return TEST_FAILED;
5735 
5736 	printf("Enqueue driver offload cost latency:\n"
5737 			"\tavg: %lg cycles, %lg us\n"
5738 			"\tmin: %lg cycles, %lg us\n"
5739 			"\tmax: %lg cycles, %lg us\n"
5740 			"Enqueue accelerator offload cost latency:\n"
5741 			"\tavg: %lg cycles, %lg us\n"
5742 			"\tmin: %lg cycles, %lg us\n"
5743 			"\tmax: %lg cycles, %lg us\n",
5744 			(double)time_st.enq_sw_total_time / (double)iter,
5745 			(double)(time_st.enq_sw_total_time * 1000000) /
5746 			(double)iter / (double)rte_get_tsc_hz(),
5747 			(double)time_st.enq_sw_min_time,
5748 			(double)(time_st.enq_sw_min_time * 1000000) /
5749 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
5750 			(double)(time_st.enq_sw_max_time * 1000000) /
5751 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
5752 			(double)iter,
5753 			(double)(time_st.enq_acc_total_time * 1000000) /
5754 			(double)iter / (double)rte_get_tsc_hz(),
5755 			(double)time_st.enq_acc_min_time,
5756 			(double)(time_st.enq_acc_min_time * 1000000) /
5757 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
5758 			(double)(time_st.enq_acc_max_time * 1000000) /
5759 			rte_get_tsc_hz());
5760 
5761 	printf("Dequeue offload cost latency - one op:\n"
5762 			"\tavg: %lg cycles, %lg us\n"
5763 			"\tmin: %lg cycles, %lg us\n"
5764 			"\tmax: %lg cycles, %lg us\n",
5765 			(double)time_st.deq_total_time / (double)iter,
5766 			(double)(time_st.deq_total_time * 1000000) /
5767 			(double)iter / (double)rte_get_tsc_hz(),
5768 			(double)time_st.deq_min_time,
5769 			(double)(time_st.deq_min_time * 1000000) /
5770 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
5771 			(double)(time_st.deq_max_time * 1000000) /
5772 			rte_get_tsc_hz());
5773 
5774 	struct rte_bbdev_stats stats = {0};
5775 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
5776 	if (stats.enqueue_warn_count > 0)
5777 		printf("Warning reported on the queue : %10"PRIu64"\n",
5778 			stats.enqueue_warn_count);
5779 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
5780 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
5781 				"Mismatch in enqueue count %10"PRIu64" %d",
5782 				stats.enqueued_count, num_to_process);
5783 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
5784 				"Mismatch in dequeue count %10"PRIu64" %d",
5785 				stats.dequeued_count, num_to_process);
5786 	}
5787 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
5788 			"Enqueue count Error %10"PRIu64"",
5789 			stats.enqueue_err_count);
5790 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
5791 			"Dequeue count Error (%10"PRIu64"",
5792 			stats.dequeue_err_count);
5793 
5794 	return TEST_SUCCESS;
5795 }
5796 
5797 static int
5798 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
5799 		const uint16_t num_to_process, uint16_t burst_sz,
5800 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5801 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5802 {
5803 	int i, deq_total;
5804 	struct rte_bbdev_dec_op *ops[MAX_BURST];
5805 	uint64_t deq_start_time, deq_last_time;
5806 
5807 	/* Test deq offload latency from an empty queue */
5808 
5809 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5810 			++i, deq_total += burst_sz) {
5811 		deq_start_time = rte_rdtsc_precise();
5812 
5813 		if (unlikely(num_to_process - deq_total < burst_sz))
5814 			burst_sz = num_to_process - deq_total;
5815 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5816 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
5817 					burst_sz);
5818 		else
5819 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
5820 					burst_sz);
5821 
5822 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5823 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5824 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5825 		*deq_total_time += deq_last_time;
5826 	}
5827 
5828 	return i;
5829 }
5830 
5831 static int
5832 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
5833 		const uint16_t num_to_process, uint16_t burst_sz,
5834 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5835 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5836 {
5837 	int i, deq_total;
5838 	struct rte_bbdev_enc_op *ops[MAX_BURST];
5839 	uint64_t deq_start_time, deq_last_time;
5840 
5841 	/* Test deq offload latency from an empty queue */
5842 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5843 			++i, deq_total += burst_sz) {
5844 		deq_start_time = rte_rdtsc_precise();
5845 
5846 		if (unlikely(num_to_process - deq_total < burst_sz))
5847 			burst_sz = num_to_process - deq_total;
5848 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5849 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
5850 					burst_sz);
5851 		else
5852 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
5853 					burst_sz);
5854 
5855 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5856 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5857 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5858 		*deq_total_time += deq_last_time;
5859 	}
5860 
5861 	return i;
5862 }
5863 
5864 static int
5865 offload_latency_empty_q_test(struct active_device *ad,
5866 		struct test_op_params *op_params)
5867 {
5868 	int iter;
5869 	uint64_t deq_total_time, deq_min_time, deq_max_time;
5870 	uint16_t burst_sz = op_params->burst_sz;
5871 	const uint16_t num_to_process = op_params->num_to_process;
5872 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5873 	const uint16_t queue_id = ad->queue_ids[0];
5874 	struct rte_bbdev_info info;
5875 	const char *op_type_str;
5876 
5877 	deq_total_time = deq_max_time = 0;
5878 	deq_min_time = UINT64_MAX;
5879 
5880 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5881 			"BURST_SIZE should be <= %u", MAX_BURST);
5882 
5883 	rte_bbdev_info_get(ad->dev_id, &info);
5884 
5885 	op_type_str = rte_bbdev_op_type_str(op_type);
5886 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5887 
5888 	printf("+ ------------------------------------------------------- +\n");
5889 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5890 			info.dev_name, burst_sz, num_to_process, op_type_str);
5891 
5892 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
5893 			op_type == RTE_BBDEV_OP_LDPC_DEC)
5894 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
5895 				num_to_process, burst_sz, &deq_total_time,
5896 				&deq_min_time, &deq_max_time, op_type);
5897 	else
5898 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
5899 				num_to_process, burst_sz, &deq_total_time,
5900 				&deq_min_time, &deq_max_time, op_type);
5901 
5902 	if (iter <= 0)
5903 		return TEST_FAILED;
5904 
5905 	printf("Empty dequeue offload:\n"
5906 			"\tavg: %lg cycles, %lg us\n"
5907 			"\tmin: %lg cycles, %lg us\n"
5908 			"\tmax: %lg cycles, %lg us\n",
5909 			(double)deq_total_time / (double)iter,
5910 			(double)(deq_total_time * 1000000) / (double)iter /
5911 			(double)rte_get_tsc_hz(), (double)deq_min_time,
5912 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
5913 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
5914 			rte_get_tsc_hz());
5915 
5916 	return TEST_SUCCESS;
5917 }
5918 
5919 static int
5920 bler_tc(void)
5921 {
5922 	return run_test_case(bler_test);
5923 }
5924 
5925 static int
5926 throughput_tc(void)
5927 {
5928 	return run_test_case(throughput_test);
5929 }
5930 
5931 static int
5932 offload_cost_tc(void)
5933 {
5934 	return run_test_case(offload_cost_test);
5935 }
5936 
5937 static int
5938 offload_latency_empty_q_tc(void)
5939 {
5940 	return run_test_case(offload_latency_empty_q_test);
5941 }
5942 
5943 static int
5944 latency_tc(void)
5945 {
5946 	return run_test_case(latency_test);
5947 }
5948 
5949 static int
5950 validation_tc(void)
5951 {
5952 	return run_test_case(validation_test);
5953 }
5954 
5955 static int
5956 interrupt_tc(void)
5957 {
5958 	return run_test_case(throughput_test);
5959 }
5960 
5961 static struct unit_test_suite bbdev_bler_testsuite = {
5962 	.suite_name = "BBdev BLER Tests",
5963 	.setup = testsuite_setup,
5964 	.teardown = testsuite_teardown,
5965 	.unit_test_cases = {
5966 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5967 		TEST_CASES_END() /**< NULL terminate unit test array */
5968 	}
5969 };
5970 
5971 static struct unit_test_suite bbdev_throughput_testsuite = {
5972 	.suite_name = "BBdev Throughput Tests",
5973 	.setup = testsuite_setup,
5974 	.teardown = testsuite_teardown,
5975 	.unit_test_cases = {
5976 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5977 		TEST_CASES_END() /**< NULL terminate unit test array */
5978 	}
5979 };
5980 
5981 static struct unit_test_suite bbdev_validation_testsuite = {
5982 	.suite_name = "BBdev Validation Tests",
5983 	.setup = testsuite_setup,
5984 	.teardown = testsuite_teardown,
5985 	.unit_test_cases = {
5986 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5987 		TEST_CASES_END() /**< NULL terminate unit test array */
5988 	}
5989 };
5990 
5991 static struct unit_test_suite bbdev_latency_testsuite = {
5992 	.suite_name = "BBdev Latency Tests",
5993 	.setup = testsuite_setup,
5994 	.teardown = testsuite_teardown,
5995 	.unit_test_cases = {
5996 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5997 		TEST_CASES_END() /**< NULL terminate unit test array */
5998 	}
5999 };
6000 
6001 static struct unit_test_suite bbdev_offload_cost_testsuite = {
6002 	.suite_name = "BBdev Offload Cost Tests",
6003 	.setup = testsuite_setup,
6004 	.teardown = testsuite_teardown,
6005 	.unit_test_cases = {
6006 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
6007 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
6008 		TEST_CASES_END() /**< NULL terminate unit test array */
6009 	}
6010 };
6011 
6012 static struct unit_test_suite bbdev_interrupt_testsuite = {
6013 	.suite_name = "BBdev Interrupt Tests",
6014 	.setup = interrupt_testsuite_setup,
6015 	.teardown = testsuite_teardown,
6016 	.unit_test_cases = {
6017 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
6018 		TEST_CASES_END() /**< NULL terminate unit test array */
6019 	}
6020 };
6021 
6022 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
6023 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
6024 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
6025 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
6026 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
6027 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
6028