xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision f8dbaebbf1c9efcbb2e2354b341ed62175466a57)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 100
28 #define WAIT_OFFLOAD_US 1000
29 
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42 
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #define FLR_5G_TIMEOUT 610
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67 
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70 
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74 
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89 
90 static struct test_bbdev_vector test_vector;
91 
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94 
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100 
101 /* Represents tested active devices */
102 static struct active_device {
103 	const char *driver_name;
104 	uint8_t dev_id;
105 	uint16_t supported_ops;
106 	uint16_t queue_ids[MAX_QUEUES];
107 	uint16_t nb_queues;
108 	struct rte_mempool *ops_mempool;
109 	struct rte_mempool *in_mbuf_pool;
110 	struct rte_mempool *hard_out_mbuf_pool;
111 	struct rte_mempool *soft_out_mbuf_pool;
112 	struct rte_mempool *harq_in_mbuf_pool;
113 	struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115 
116 static uint8_t nb_active_devs;
117 
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120 	struct rte_bbdev_op_data *inputs;
121 	struct rte_bbdev_op_data *hard_outputs;
122 	struct rte_bbdev_op_data *soft_outputs;
123 	struct rte_bbdev_op_data *harq_inputs;
124 	struct rte_bbdev_op_data *harq_outputs;
125 };
126 
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129 	struct rte_mempool *mp;
130 	struct rte_bbdev_dec_op *ref_dec_op;
131 	struct rte_bbdev_enc_op *ref_enc_op;
132 	uint16_t burst_sz;
133 	uint16_t num_to_process;
134 	uint16_t num_lcores;
135 	int vector_mask;
136 	uint16_t sync;
137 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139 
140 /* Contains per lcore params */
141 struct thread_params {
142 	uint8_t dev_id;
143 	uint16_t queue_id;
144 	uint32_t lcore_id;
145 	uint64_t start_time;
146 	double ops_per_sec;
147 	double mbps;
148 	uint8_t iter_count;
149 	double iter_average;
150 	double bler;
151 	uint16_t nb_dequeued;
152 	int16_t processing_status;
153 	uint16_t burst_sz;
154 	struct test_op_params *op_params;
155 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158 
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162 	/* Stores software enqueue total working time */
163 	uint64_t enq_sw_total_time;
164 	/* Stores minimum value of software enqueue working time */
165 	uint64_t enq_sw_min_time;
166 	/* Stores maximum value of software enqueue working time */
167 	uint64_t enq_sw_max_time;
168 	/* Stores turbo enqueue total working time */
169 	uint64_t enq_acc_total_time;
170 	/* Stores minimum value of accelerator enqueue working time */
171 	uint64_t enq_acc_min_time;
172 	/* Stores maximum value of accelerator enqueue working time */
173 	uint64_t enq_acc_max_time;
174 	/* Stores dequeue total working time */
175 	uint64_t deq_total_time;
176 	/* Stores minimum value of dequeue working time */
177 	uint64_t deq_min_time;
178 	/* Stores maximum value of dequeue working time */
179 	uint64_t deq_max_time;
180 };
181 #endif
182 
183 typedef int (test_case_function)(struct active_device *ad,
184 		struct test_op_params *op_params);
185 
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189 	m->pkt_len = 0;
190 
191 	do {
192 		m->data_len = 0;
193 		m = m->next;
194 	} while (m != NULL);
195 }
196 
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201 	return bitmap & bitmask;
202 }
203 
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207 	ad->supported_ops |= (1 << op_type);
208 }
209 
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213 	return ad->supported_ops & (1 << op_type);
214 }
215 
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219 	return (flags_req & flags_present) == flags_req;
220 }
221 
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229 
230 /* This API is to convert all the test vector op data entries
231  * to big endian format. It is used when the device supports
232  * the input in the big endian format.
233  */
234 static inline void
235 convert_op_data_to_be(void)
236 {
237 	struct op_data_entries *op;
238 	enum op_data_type type;
239 	uint8_t nb_segs, *rem_data, temp;
240 	uint32_t *data, len;
241 	int complete, rem, i, j;
242 
243 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
244 		nb_segs = test_vector.entries[type].nb_segments;
245 		op = &test_vector.entries[type];
246 
247 		/* Invert byte endianness for all the segments */
248 		for (i = 0; i < nb_segs; ++i) {
249 			len = op->segments[i].length;
250 			data = op->segments[i].addr;
251 
252 			/* Swap complete u32 bytes */
253 			complete = len / 4;
254 			for (j = 0; j < complete; j++)
255 				data[j] = rte_bswap32(data[j]);
256 
257 			/* Swap any remaining bytes */
258 			rem = len % 4;
259 			rem_data = (uint8_t *)&data[j];
260 			for (j = 0; j < rem/2; j++) {
261 				temp = rem_data[j];
262 				rem_data[j] = rem_data[rem - j - 1];
263 				rem_data[rem - j - 1] = temp;
264 			}
265 		}
266 	}
267 }
268 
269 static int
270 check_dev_cap(const struct rte_bbdev_info *dev_info)
271 {
272 	unsigned int i;
273 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
274 		nb_harq_inputs, nb_harq_outputs;
275 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
276 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
277 
278 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
279 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
280 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
281 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
282 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
283 
284 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
285 		if (op_cap->type != test_vector.op_type)
286 			continue;
287 
288 		if (dev_data_endianness == RTE_BIG_ENDIAN)
289 			convert_op_data_to_be();
290 
291 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
292 			const struct rte_bbdev_op_cap_turbo_dec *cap =
293 					&op_cap->cap.turbo_dec;
294 			/* Ignore lack of soft output capability, just skip
295 			 * checking if soft output is valid.
296 			 */
297 			if ((test_vector.turbo_dec.op_flags &
298 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
299 					!(cap->capability_flags &
300 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
301 				printf(
302 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
303 					dev_info->dev_name);
304 				clear_soft_out_cap(
305 					&test_vector.turbo_dec.op_flags);
306 			}
307 
308 			if (!flags_match(test_vector.turbo_dec.op_flags,
309 					cap->capability_flags))
310 				return TEST_FAILED;
311 			if (nb_inputs > cap->num_buffers_src) {
312 				printf("Too many inputs defined: %u, max: %u\n",
313 					nb_inputs, cap->num_buffers_src);
314 				return TEST_FAILED;
315 			}
316 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
317 					(test_vector.turbo_dec.op_flags &
318 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319 				printf(
320 					"Too many soft outputs defined: %u, max: %u\n",
321 						nb_soft_outputs,
322 						cap->num_buffers_soft_out);
323 				return TEST_FAILED;
324 			}
325 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
326 				printf(
327 					"Too many hard outputs defined: %u, max: %u\n",
328 						nb_hard_outputs,
329 						cap->num_buffers_hard_out);
330 				return TEST_FAILED;
331 			}
332 			if (intr_enabled && !(cap->capability_flags &
333 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
334 				printf(
335 					"Dequeue interrupts are not supported!\n");
336 				return TEST_FAILED;
337 			}
338 
339 			return TEST_SUCCESS;
340 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
341 			const struct rte_bbdev_op_cap_turbo_enc *cap =
342 					&op_cap->cap.turbo_enc;
343 
344 			if (!flags_match(test_vector.turbo_enc.op_flags,
345 					cap->capability_flags))
346 				return TEST_FAILED;
347 			if (nb_inputs > cap->num_buffers_src) {
348 				printf("Too many inputs defined: %u, max: %u\n",
349 					nb_inputs, cap->num_buffers_src);
350 				return TEST_FAILED;
351 			}
352 			if (nb_hard_outputs > cap->num_buffers_dst) {
353 				printf(
354 					"Too many hard outputs defined: %u, max: %u\n",
355 					nb_hard_outputs, cap->num_buffers_dst);
356 				return TEST_FAILED;
357 			}
358 			if (intr_enabled && !(cap->capability_flags &
359 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
360 				printf(
361 					"Dequeue interrupts are not supported!\n");
362 				return TEST_FAILED;
363 			}
364 
365 			return TEST_SUCCESS;
366 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
367 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
368 					&op_cap->cap.ldpc_enc;
369 
370 			if (!flags_match(test_vector.ldpc_enc.op_flags,
371 					cap->capability_flags)){
372 				printf("Flag Mismatch\n");
373 				return TEST_FAILED;
374 			}
375 			if (nb_inputs > cap->num_buffers_src) {
376 				printf("Too many inputs defined: %u, max: %u\n",
377 					nb_inputs, cap->num_buffers_src);
378 				return TEST_FAILED;
379 			}
380 			if (nb_hard_outputs > cap->num_buffers_dst) {
381 				printf(
382 					"Too many hard outputs defined: %u, max: %u\n",
383 					nb_hard_outputs, cap->num_buffers_dst);
384 				return TEST_FAILED;
385 			}
386 			if (intr_enabled && !(cap->capability_flags &
387 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
388 				printf(
389 					"Dequeue interrupts are not supported!\n");
390 				return TEST_FAILED;
391 			}
392 
393 			return TEST_SUCCESS;
394 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
395 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
396 					&op_cap->cap.ldpc_dec;
397 
398 			if (!flags_match(test_vector.ldpc_dec.op_flags,
399 					cap->capability_flags)){
400 				printf("Flag Mismatch\n");
401 				return TEST_FAILED;
402 			}
403 			if (nb_inputs > cap->num_buffers_src) {
404 				printf("Too many inputs defined: %u, max: %u\n",
405 					nb_inputs, cap->num_buffers_src);
406 				return TEST_FAILED;
407 			}
408 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
409 				printf(
410 					"Too many hard outputs defined: %u, max: %u\n",
411 					nb_hard_outputs,
412 					cap->num_buffers_hard_out);
413 				return TEST_FAILED;
414 			}
415 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
416 				printf(
417 					"Too many HARQ inputs defined: %u, max: %u\n",
418 					nb_harq_inputs,
419 					cap->num_buffers_hard_out);
420 				return TEST_FAILED;
421 			}
422 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
423 				printf(
424 					"Too many HARQ outputs defined: %u, max: %u\n",
425 					nb_harq_outputs,
426 					cap->num_buffers_hard_out);
427 				return TEST_FAILED;
428 			}
429 			if (intr_enabled && !(cap->capability_flags &
430 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
431 				printf(
432 					"Dequeue interrupts are not supported!\n");
433 				return TEST_FAILED;
434 			}
435 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
436 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
437 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
438 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
439 					))) {
440 				printf("Skip loop-back with interrupt\n");
441 				return TEST_FAILED;
442 			}
443 			return TEST_SUCCESS;
444 		}
445 	}
446 
447 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
448 		return TEST_SUCCESS; /* Special case for NULL device */
449 
450 	return TEST_FAILED;
451 }
452 
453 /* calculates optimal mempool size not smaller than the val */
454 static unsigned int
455 optimal_mempool_size(unsigned int val)
456 {
457 	return rte_align32pow2(val + 1) - 1;
458 }
459 
460 /* allocates mbuf mempool for inputs and outputs */
461 static struct rte_mempool *
462 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
463 		int socket_id, unsigned int mbuf_pool_size,
464 		const char *op_type_str)
465 {
466 	unsigned int i;
467 	uint32_t max_seg_sz = 0;
468 	char pool_name[RTE_MEMPOOL_NAMESIZE];
469 
470 	/* find max input segment size */
471 	for (i = 0; i < entries->nb_segments; ++i)
472 		if (entries->segments[i].length > max_seg_sz)
473 			max_seg_sz = entries->segments[i].length;
474 
475 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476 			dev_id);
477 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
478 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
479 					+ FILLER_HEADROOM,
480 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
481 }
482 
483 static int
484 create_mempools(struct active_device *ad, int socket_id,
485 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
486 {
487 	struct rte_mempool *mp;
488 	unsigned int ops_pool_size, mbuf_pool_size = 0;
489 	char pool_name[RTE_MEMPOOL_NAMESIZE];
490 	const char *op_type_str;
491 	enum rte_bbdev_op_type op_type = org_op_type;
492 
493 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
494 	struct op_data_entries *hard_out =
495 			&test_vector.entries[DATA_HARD_OUTPUT];
496 	struct op_data_entries *soft_out =
497 			&test_vector.entries[DATA_SOFT_OUTPUT];
498 	struct op_data_entries *harq_in =
499 			&test_vector.entries[DATA_HARQ_INPUT];
500 	struct op_data_entries *harq_out =
501 			&test_vector.entries[DATA_HARQ_OUTPUT];
502 
503 	/* allocate ops mempool */
504 	ops_pool_size = optimal_mempool_size(RTE_MAX(
505 			/* Ops used plus 1 reference op */
506 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
507 			/* Minimal cache size plus 1 reference op */
508 			(unsigned int)(1.5 * rte_lcore_count() *
509 					OPS_CACHE_SIZE + 1)),
510 			OPS_POOL_SIZE_MIN));
511 
512 	if (org_op_type == RTE_BBDEV_OP_NONE)
513 		op_type = RTE_BBDEV_OP_TURBO_ENC;
514 
515 	op_type_str = rte_bbdev_op_type_str(op_type);
516 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
517 
518 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
519 			ad->dev_id);
520 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
521 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
522 	TEST_ASSERT_NOT_NULL(mp,
523 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
524 			ops_pool_size,
525 			ad->dev_id,
526 			socket_id);
527 	ad->ops_mempool = mp;
528 
529 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
530 	if (org_op_type == RTE_BBDEV_OP_NONE)
531 		return TEST_SUCCESS;
532 
533 	/* Inputs */
534 	if (in->nb_segments > 0) {
535 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
536 				in->nb_segments);
537 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
538 				mbuf_pool_size, "in");
539 		TEST_ASSERT_NOT_NULL(mp,
540 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
541 				mbuf_pool_size,
542 				ad->dev_id,
543 				socket_id);
544 		ad->in_mbuf_pool = mp;
545 	}
546 
547 	/* Hard outputs */
548 	if (hard_out->nb_segments > 0) {
549 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
550 				hard_out->nb_segments);
551 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
552 				mbuf_pool_size,
553 				"hard_out");
554 		TEST_ASSERT_NOT_NULL(mp,
555 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
556 				mbuf_pool_size,
557 				ad->dev_id,
558 				socket_id);
559 		ad->hard_out_mbuf_pool = mp;
560 	}
561 
562 	/* Soft outputs */
563 	if (soft_out->nb_segments > 0) {
564 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
565 				soft_out->nb_segments);
566 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
567 				mbuf_pool_size,
568 				"soft_out");
569 		TEST_ASSERT_NOT_NULL(mp,
570 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
571 				mbuf_pool_size,
572 				ad->dev_id,
573 				socket_id);
574 		ad->soft_out_mbuf_pool = mp;
575 	}
576 
577 	/* HARQ inputs */
578 	if (harq_in->nb_segments > 0) {
579 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
580 				harq_in->nb_segments);
581 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
582 				mbuf_pool_size,
583 				"harq_in");
584 		TEST_ASSERT_NOT_NULL(mp,
585 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
586 				mbuf_pool_size,
587 				ad->dev_id,
588 				socket_id);
589 		ad->harq_in_mbuf_pool = mp;
590 	}
591 
592 	/* HARQ outputs */
593 	if (harq_out->nb_segments > 0) {
594 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
595 				harq_out->nb_segments);
596 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
597 				mbuf_pool_size,
598 				"harq_out");
599 		TEST_ASSERT_NOT_NULL(mp,
600 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
601 				mbuf_pool_size,
602 				ad->dev_id,
603 				socket_id);
604 		ad->harq_out_mbuf_pool = mp;
605 	}
606 
607 	return TEST_SUCCESS;
608 }
609 
610 static int
611 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
612 		struct test_bbdev_vector *vector)
613 {
614 	int ret;
615 	unsigned int queue_id;
616 	struct rte_bbdev_queue_conf qconf;
617 	struct active_device *ad = &active_devs[nb_active_devs];
618 	unsigned int nb_queues;
619 	enum rte_bbdev_op_type op_type = vector->op_type;
620 
621 /* Configure fpga lte fec with PF & VF values
622  * if '-i' flag is set and using fpga device
623  */
624 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
625 	if ((get_init_device() == true) &&
626 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
627 		struct rte_fpga_lte_fec_conf conf;
628 		unsigned int i;
629 
630 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
631 				info->drv.driver_name);
632 
633 		/* clear default configuration before initialization */
634 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
635 
636 		/* Set PF mode :
637 		 * true if PF is used for data plane
638 		 * false for VFs
639 		 */
640 		conf.pf_mode_en = true;
641 
642 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
643 			/* Number of UL queues per VF (fpga supports 8 VFs) */
644 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
645 			/* Number of DL queues per VF (fpga supports 8 VFs) */
646 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
647 		}
648 
649 		/* UL bandwidth. Needed for schedule algorithm */
650 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
651 		/* DL bandwidth */
652 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
653 
654 		/* UL & DL load Balance Factor to 64 */
655 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
656 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
657 
658 		/**< FLR timeout value */
659 		conf.flr_time_out = FLR_4G_TIMEOUT;
660 
661 		/* setup FPGA PF with configuration information */
662 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
663 		TEST_ASSERT_SUCCESS(ret,
664 				"Failed to configure 4G FPGA PF for bbdev %s",
665 				info->dev_name);
666 	}
667 #endif
668 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
669 	if ((get_init_device() == true) &&
670 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
671 		struct rte_fpga_5gnr_fec_conf conf;
672 		unsigned int i;
673 
674 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
675 				info->drv.driver_name);
676 
677 		/* clear default configuration before initialization */
678 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
679 
680 		/* Set PF mode :
681 		 * true if PF is used for data plane
682 		 * false for VFs
683 		 */
684 		conf.pf_mode_en = true;
685 
686 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
687 			/* Number of UL queues per VF (fpga supports 8 VFs) */
688 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
689 			/* Number of DL queues per VF (fpga supports 8 VFs) */
690 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
691 		}
692 
693 		/* UL bandwidth. Needed for schedule algorithm */
694 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
695 		/* DL bandwidth */
696 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
697 
698 		/* UL & DL load Balance Factor to 64 */
699 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
700 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
701 
702 		/**< FLR timeout value */
703 		conf.flr_time_out = FLR_5G_TIMEOUT;
704 
705 		/* setup FPGA PF with configuration information */
706 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
707 		TEST_ASSERT_SUCCESS(ret,
708 				"Failed to configure 5G FPGA PF for bbdev %s",
709 				info->dev_name);
710 	}
711 #endif
712 #ifdef RTE_BASEBAND_ACC100
713 	if ((get_init_device() == true) &&
714 		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
715 		struct rte_acc100_conf conf;
716 		unsigned int i;
717 
718 		printf("Configure ACC100 FEC Driver %s with default values\n",
719 				info->drv.driver_name);
720 
721 		/* clear default configuration before initialization */
722 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
723 
724 		/* Always set in PF mode for built-in configuration */
725 		conf.pf_mode_en = true;
726 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
727 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
728 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
729 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
730 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
731 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
732 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
733 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
734 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
735 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
736 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
737 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
738 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
739 		}
740 
741 		conf.input_pos_llr_1_bit = true;
742 		conf.output_pos_llr_1_bit = true;
743 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
744 
745 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
746 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
747 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
748 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
749 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
750 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
751 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
752 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
753 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
754 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
755 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
756 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
757 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
758 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
759 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
760 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
761 
762 		/* setup PF with configuration information */
763 		ret = rte_acc100_configure(info->dev_name, &conf);
764 		TEST_ASSERT_SUCCESS(ret,
765 				"Failed to configure ACC100 PF for bbdev %s",
766 				info->dev_name);
767 	}
768 #endif
769 	/* Let's refresh this now this is configured */
770 	rte_bbdev_info_get(dev_id, info);
771 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
772 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
773 
774 	/* setup device */
775 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
776 	if (ret < 0) {
777 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
778 				dev_id, nb_queues, info->socket_id, ret);
779 		return TEST_FAILED;
780 	}
781 
782 	/* configure interrupts if needed */
783 	if (intr_enabled) {
784 		ret = rte_bbdev_intr_enable(dev_id);
785 		if (ret < 0) {
786 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
787 					ret);
788 			return TEST_FAILED;
789 		}
790 	}
791 
792 	/* setup device queues */
793 	qconf.socket = info->socket_id;
794 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
795 	qconf.priority = 0;
796 	qconf.deferred_start = 0;
797 	qconf.op_type = op_type;
798 
799 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
800 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
801 		if (ret != 0) {
802 			printf(
803 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
804 					queue_id, qconf.priority, dev_id);
805 			qconf.priority++;
806 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
807 					&qconf);
808 		}
809 		if (ret != 0) {
810 			printf("All queues on dev %u allocated: %u\n",
811 					dev_id, queue_id);
812 			break;
813 		}
814 		ad->queue_ids[queue_id] = queue_id;
815 	}
816 	TEST_ASSERT(queue_id != 0,
817 			"ERROR Failed to configure any queues on dev %u",
818 			dev_id);
819 	ad->nb_queues = queue_id;
820 
821 	set_avail_op(ad, op_type);
822 
823 	return TEST_SUCCESS;
824 }
825 
826 static int
827 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
828 		struct test_bbdev_vector *vector)
829 {
830 	int ret;
831 
832 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
833 	active_devs[nb_active_devs].dev_id = dev_id;
834 
835 	ret = add_bbdev_dev(dev_id, info, vector);
836 	if (ret == TEST_SUCCESS)
837 		++nb_active_devs;
838 	return ret;
839 }
840 
841 static uint8_t
842 populate_active_devices(void)
843 {
844 	int ret;
845 	uint8_t dev_id;
846 	uint8_t nb_devs_added = 0;
847 	struct rte_bbdev_info info;
848 
849 	RTE_BBDEV_FOREACH(dev_id) {
850 		rte_bbdev_info_get(dev_id, &info);
851 
852 		if (check_dev_cap(&info)) {
853 			printf(
854 				"Device %d (%s) does not support specified capabilities\n",
855 					dev_id, info.dev_name);
856 			continue;
857 		}
858 
859 		ret = add_active_device(dev_id, &info, &test_vector);
860 		if (ret != 0) {
861 			printf("Adding active bbdev %s skipped\n",
862 					info.dev_name);
863 			continue;
864 		}
865 		nb_devs_added++;
866 	}
867 
868 	return nb_devs_added;
869 }
870 
871 static int
872 read_test_vector(void)
873 {
874 	int ret;
875 
876 	memset(&test_vector, 0, sizeof(test_vector));
877 	printf("Test vector file = %s\n", get_vector_filename());
878 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
879 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
880 			get_vector_filename());
881 
882 	return TEST_SUCCESS;
883 }
884 
885 static int
886 testsuite_setup(void)
887 {
888 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
889 
890 	if (populate_active_devices() == 0) {
891 		printf("No suitable devices found!\n");
892 		return TEST_SKIPPED;
893 	}
894 
895 	return TEST_SUCCESS;
896 }
897 
898 static int
899 interrupt_testsuite_setup(void)
900 {
901 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
902 
903 	/* Enable interrupts */
904 	intr_enabled = true;
905 
906 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
907 	if (populate_active_devices() == 0 ||
908 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
909 		intr_enabled = false;
910 		printf("No suitable devices found!\n");
911 		return TEST_SKIPPED;
912 	}
913 
914 	return TEST_SUCCESS;
915 }
916 
917 static void
918 testsuite_teardown(void)
919 {
920 	uint8_t dev_id;
921 
922 	/* Unconfigure devices */
923 	RTE_BBDEV_FOREACH(dev_id)
924 		rte_bbdev_close(dev_id);
925 
926 	/* Clear active devices structs. */
927 	memset(active_devs, 0, sizeof(active_devs));
928 	nb_active_devs = 0;
929 
930 	/* Disable interrupts */
931 	intr_enabled = false;
932 }
933 
934 static int
935 ut_setup(void)
936 {
937 	uint8_t i, dev_id;
938 
939 	for (i = 0; i < nb_active_devs; i++) {
940 		dev_id = active_devs[i].dev_id;
941 		/* reset bbdev stats */
942 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
943 				"Failed to reset stats of bbdev %u", dev_id);
944 		/* start the device */
945 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
946 				"Failed to start bbdev %u", dev_id);
947 	}
948 
949 	return TEST_SUCCESS;
950 }
951 
952 static void
953 ut_teardown(void)
954 {
955 	uint8_t i, dev_id;
956 	struct rte_bbdev_stats stats;
957 
958 	for (i = 0; i < nb_active_devs; i++) {
959 		dev_id = active_devs[i].dev_id;
960 		/* read stats and print */
961 		rte_bbdev_stats_get(dev_id, &stats);
962 		/* Stop the device */
963 		rte_bbdev_stop(dev_id);
964 	}
965 }
966 
967 static int
968 init_op_data_objs(struct rte_bbdev_op_data *bufs,
969 		struct op_data_entries *ref_entries,
970 		struct rte_mempool *mbuf_pool, const uint16_t n,
971 		enum op_data_type op_type, uint16_t min_alignment)
972 {
973 	int ret;
974 	unsigned int i, j;
975 	bool large_input = false;
976 
977 	for (i = 0; i < n; ++i) {
978 		char *data;
979 		struct op_data_buf *seg = &ref_entries->segments[0];
980 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
981 		TEST_ASSERT_NOT_NULL(m_head,
982 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
983 				op_type, n * ref_entries->nb_segments,
984 				mbuf_pool->size);
985 
986 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
987 			/*
988 			 * Special case when DPDK mbuf cannot handle
989 			 * the required input size
990 			 */
991 			printf("Warning: Larger input size than DPDK mbuf %d\n",
992 					seg->length);
993 			large_input = true;
994 		}
995 		bufs[i].data = m_head;
996 		bufs[i].offset = 0;
997 		bufs[i].length = 0;
998 
999 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1000 			if ((op_type == DATA_INPUT) && large_input) {
1001 				/* Allocate a fake overused mbuf */
1002 				data = rte_malloc(NULL, seg->length, 0);
1003 				TEST_ASSERT_NOT_NULL(data,
1004 					"rte malloc failed with %u bytes",
1005 					seg->length);
1006 				memcpy(data, seg->addr, seg->length);
1007 				m_head->buf_addr = data;
1008 				m_head->buf_iova = rte_malloc_virt2iova(data);
1009 				m_head->data_off = 0;
1010 				m_head->data_len = seg->length;
1011 			} else {
1012 				data = rte_pktmbuf_append(m_head, seg->length);
1013 				TEST_ASSERT_NOT_NULL(data,
1014 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1015 					seg->length, op_type);
1016 
1017 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1018 						data, min_alignment),
1019 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1020 					data, min_alignment);
1021 				rte_memcpy(data, seg->addr, seg->length);
1022 			}
1023 
1024 			bufs[i].length += seg->length;
1025 
1026 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1027 				struct rte_mbuf *m_tail =
1028 						rte_pktmbuf_alloc(mbuf_pool);
1029 				TEST_ASSERT_NOT_NULL(m_tail,
1030 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1031 						op_type,
1032 						n * ref_entries->nb_segments,
1033 						mbuf_pool->size);
1034 				seg += 1;
1035 
1036 				data = rte_pktmbuf_append(m_tail, seg->length);
1037 				TEST_ASSERT_NOT_NULL(data,
1038 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1039 						seg->length, op_type);
1040 
1041 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1042 						min_alignment),
1043 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1044 						data, min_alignment);
1045 				rte_memcpy(data, seg->addr, seg->length);
1046 				bufs[i].length += seg->length;
1047 
1048 				ret = rte_pktmbuf_chain(m_head, m_tail);
1049 				TEST_ASSERT_SUCCESS(ret,
1050 						"Couldn't chain mbufs from %d data type mbuf pool",
1051 						op_type);
1052 			}
1053 		} else {
1054 
1055 			/* allocate chained-mbuf for output buffer */
1056 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1057 				struct rte_mbuf *m_tail =
1058 						rte_pktmbuf_alloc(mbuf_pool);
1059 				TEST_ASSERT_NOT_NULL(m_tail,
1060 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1061 						op_type,
1062 						n * ref_entries->nb_segments,
1063 						mbuf_pool->size);
1064 
1065 				ret = rte_pktmbuf_chain(m_head, m_tail);
1066 				TEST_ASSERT_SUCCESS(ret,
1067 						"Couldn't chain mbufs from %d data type mbuf pool",
1068 						op_type);
1069 			}
1070 		}
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 static int
1077 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1078 		const int socket)
1079 {
1080 	int i;
1081 
1082 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1083 	if (*buffers == NULL) {
1084 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1085 				socket);
1086 		/* try to allocate memory on other detected sockets */
1087 		for (i = 0; i < socket; i++) {
1088 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1089 			if (*buffers != NULL)
1090 				break;
1091 		}
1092 	}
1093 
1094 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1095 }
1096 
1097 static void
1098 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1099 		const uint16_t n, const int8_t max_llr_modulus)
1100 {
1101 	uint16_t i, byte_idx;
1102 
1103 	for (i = 0; i < n; ++i) {
1104 		struct rte_mbuf *m = input_ops[i].data;
1105 		while (m != NULL) {
1106 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1107 					input_ops[i].offset);
1108 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1109 					++byte_idx)
1110 				llr[byte_idx] = round((double)max_llr_modulus *
1111 						llr[byte_idx] / INT8_MAX);
1112 
1113 			m = m->next;
1114 		}
1115 	}
1116 }
1117 
1118 /*
1119  * We may have to insert filler bits
1120  * when they are required by the HARQ assumption
1121  */
1122 static void
1123 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1124 		const uint16_t n, struct test_op_params *op_params)
1125 {
1126 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1127 
1128 	if (input_ops == NULL)
1129 		return;
1130 	/* No need to add filler if not required by device */
1131 	if (!(ldpc_cap_flags &
1132 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1133 		return;
1134 	/* No need to add filler for loopback operation */
1135 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1136 		return;
1137 
1138 	uint16_t i, j, parity_offset;
1139 	for (i = 0; i < n; ++i) {
1140 		struct rte_mbuf *m = input_ops[i].data;
1141 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1142 				input_ops[i].offset);
1143 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1144 				* dec.z_c - dec.n_filler;
1145 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1146 		m->data_len = new_hin_size;
1147 		input_ops[i].length = new_hin_size;
1148 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1149 				j--)
1150 			llr[j] = llr[j - dec.n_filler];
1151 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1152 		for (j = 0; j < dec.n_filler; j++)
1153 			llr[parity_offset + j] = llr_max_pre_scaling;
1154 	}
1155 }
1156 
1157 static void
1158 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1159 		const uint16_t n, const int8_t llr_size,
1160 		const int8_t llr_decimals)
1161 {
1162 	if (input_ops == NULL)
1163 		return;
1164 
1165 	uint16_t i, byte_idx;
1166 
1167 	int16_t llr_max, llr_min, llr_tmp;
1168 	llr_max = (1 << (llr_size - 1)) - 1;
1169 	llr_min = -llr_max;
1170 	for (i = 0; i < n; ++i) {
1171 		struct rte_mbuf *m = input_ops[i].data;
1172 		while (m != NULL) {
1173 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1174 					input_ops[i].offset);
1175 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1176 					++byte_idx) {
1177 
1178 				llr_tmp = llr[byte_idx];
1179 				if (llr_decimals == 4)
1180 					llr_tmp *= 8;
1181 				else if (llr_decimals == 2)
1182 					llr_tmp *= 2;
1183 				else if (llr_decimals == 0)
1184 					llr_tmp /= 2;
1185 				llr_tmp = RTE_MIN(llr_max,
1186 						RTE_MAX(llr_min, llr_tmp));
1187 				llr[byte_idx] = (int8_t) llr_tmp;
1188 			}
1189 
1190 			m = m->next;
1191 		}
1192 	}
1193 }
1194 
1195 
1196 
1197 static int
1198 fill_queue_buffers(struct test_op_params *op_params,
1199 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1200 		struct rte_mempool *soft_out_mp,
1201 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1202 		uint16_t queue_id,
1203 		const struct rte_bbdev_op_cap *capabilities,
1204 		uint16_t min_alignment, const int socket_id)
1205 {
1206 	int ret;
1207 	enum op_data_type type;
1208 	const uint16_t n = op_params->num_to_process;
1209 
1210 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1211 		in_mp,
1212 		soft_out_mp,
1213 		hard_out_mp,
1214 		harq_in_mp,
1215 		harq_out_mp,
1216 	};
1217 
1218 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1219 		&op_params->q_bufs[socket_id][queue_id].inputs,
1220 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1221 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1222 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1223 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1224 	};
1225 
1226 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1227 		struct op_data_entries *ref_entries =
1228 				&test_vector.entries[type];
1229 		if (ref_entries->nb_segments == 0)
1230 			continue;
1231 
1232 		ret = allocate_buffers_on_socket(queue_ops[type],
1233 				n * sizeof(struct rte_bbdev_op_data),
1234 				socket_id);
1235 		TEST_ASSERT_SUCCESS(ret,
1236 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1237 
1238 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1239 				mbuf_pools[type], n, type, min_alignment);
1240 		TEST_ASSERT_SUCCESS(ret,
1241 				"Couldn't init rte_bbdev_op_data structs");
1242 	}
1243 
1244 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1245 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1246 			capabilities->cap.turbo_dec.max_llr_modulus);
1247 
1248 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1249 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1250 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1251 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1252 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1253 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1254 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1255 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1256 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1257 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1258 		if (!loopback && !llr_comp)
1259 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1260 					ldpc_llr_size, ldpc_llr_decimals);
1261 		if (!loopback && !harq_comp)
1262 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1263 					ldpc_llr_size, ldpc_llr_decimals);
1264 		if (!loopback)
1265 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1266 					op_params);
1267 	}
1268 
1269 	return 0;
1270 }
1271 
1272 static void
1273 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1274 {
1275 	unsigned int i, j;
1276 
1277 	rte_mempool_free(ad->ops_mempool);
1278 	rte_mempool_free(ad->in_mbuf_pool);
1279 	rte_mempool_free(ad->hard_out_mbuf_pool);
1280 	rte_mempool_free(ad->soft_out_mbuf_pool);
1281 	rte_mempool_free(ad->harq_in_mbuf_pool);
1282 	rte_mempool_free(ad->harq_out_mbuf_pool);
1283 
1284 	for (i = 0; i < rte_lcore_count(); ++i) {
1285 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1286 			rte_free(op_params->q_bufs[j][i].inputs);
1287 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1288 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1289 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1290 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1291 		}
1292 	}
1293 }
1294 
1295 static void
1296 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1297 		unsigned int start_idx,
1298 		struct rte_bbdev_op_data *inputs,
1299 		struct rte_bbdev_op_data *hard_outputs,
1300 		struct rte_bbdev_op_data *soft_outputs,
1301 		struct rte_bbdev_dec_op *ref_op)
1302 {
1303 	unsigned int i;
1304 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1305 
1306 	for (i = 0; i < n; ++i) {
1307 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1308 			ops[i]->turbo_dec.tb_params.ea =
1309 					turbo_dec->tb_params.ea;
1310 			ops[i]->turbo_dec.tb_params.eb =
1311 					turbo_dec->tb_params.eb;
1312 			ops[i]->turbo_dec.tb_params.k_pos =
1313 					turbo_dec->tb_params.k_pos;
1314 			ops[i]->turbo_dec.tb_params.k_neg =
1315 					turbo_dec->tb_params.k_neg;
1316 			ops[i]->turbo_dec.tb_params.c =
1317 					turbo_dec->tb_params.c;
1318 			ops[i]->turbo_dec.tb_params.c_neg =
1319 					turbo_dec->tb_params.c_neg;
1320 			ops[i]->turbo_dec.tb_params.cab =
1321 					turbo_dec->tb_params.cab;
1322 			ops[i]->turbo_dec.tb_params.r =
1323 					turbo_dec->tb_params.r;
1324 		} else {
1325 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1326 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1327 		}
1328 
1329 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1330 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1331 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1332 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1333 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1334 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1335 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1336 
1337 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1338 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1339 		if (soft_outputs != NULL)
1340 			ops[i]->turbo_dec.soft_output =
1341 				soft_outputs[start_idx + i];
1342 	}
1343 }
1344 
1345 static void
1346 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1347 		unsigned int start_idx,
1348 		struct rte_bbdev_op_data *inputs,
1349 		struct rte_bbdev_op_data *outputs,
1350 		struct rte_bbdev_enc_op *ref_op)
1351 {
1352 	unsigned int i;
1353 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1354 	for (i = 0; i < n; ++i) {
1355 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1356 			ops[i]->turbo_enc.tb_params.ea =
1357 					turbo_enc->tb_params.ea;
1358 			ops[i]->turbo_enc.tb_params.eb =
1359 					turbo_enc->tb_params.eb;
1360 			ops[i]->turbo_enc.tb_params.k_pos =
1361 					turbo_enc->tb_params.k_pos;
1362 			ops[i]->turbo_enc.tb_params.k_neg =
1363 					turbo_enc->tb_params.k_neg;
1364 			ops[i]->turbo_enc.tb_params.c =
1365 					turbo_enc->tb_params.c;
1366 			ops[i]->turbo_enc.tb_params.c_neg =
1367 					turbo_enc->tb_params.c_neg;
1368 			ops[i]->turbo_enc.tb_params.cab =
1369 					turbo_enc->tb_params.cab;
1370 			ops[i]->turbo_enc.tb_params.ncb_pos =
1371 					turbo_enc->tb_params.ncb_pos;
1372 			ops[i]->turbo_enc.tb_params.ncb_neg =
1373 					turbo_enc->tb_params.ncb_neg;
1374 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1375 		} else {
1376 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1377 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1378 			ops[i]->turbo_enc.cb_params.ncb =
1379 					turbo_enc->cb_params.ncb;
1380 		}
1381 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1382 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1383 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1384 
1385 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1386 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1387 	}
1388 }
1389 
1390 
1391 /* Returns a random number drawn from a normal distribution
1392  * with mean of 0 and variance of 1
1393  * Marsaglia algorithm
1394  */
1395 static double
1396 randn(int n)
1397 {
1398 	double S, Z, U1, U2, u, v, fac;
1399 
1400 	do {
1401 		U1 = (double)rand() / RAND_MAX;
1402 		U2 = (double)rand() / RAND_MAX;
1403 		u = 2. * U1 - 1.;
1404 		v = 2. * U2 - 1.;
1405 		S = u * u + v * v;
1406 	} while (S >= 1 || S == 0);
1407 	fac = sqrt(-2. * log(S) / S);
1408 	Z = (n % 2) ? u * fac : v * fac;
1409 	return Z;
1410 }
1411 
1412 static inline double
1413 maxstar(double A, double B)
1414 {
1415 	if (fabs(A - B) > 5)
1416 		return RTE_MAX(A, B);
1417 	else
1418 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1419 }
1420 
1421 /*
1422  * Generate Qm LLRS for Qm==8
1423  * Modulation, AWGN and LLR estimation from max log development
1424  */
1425 static void
1426 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1427 {
1428 	int qm = 8;
1429 	int qam = 256;
1430 	int m, k;
1431 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1432 	/* 5.1.4 of TS38.211 */
1433 	const double symbols_I[256] = {
1434 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1435 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1436 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1437 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1438 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1439 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1440 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1441 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1442 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1443 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1444 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1445 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1446 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1447 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1448 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1449 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1450 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1451 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1452 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1453 			-13, -13, -15, -15, -13, -13, -15, -15};
1454 	const double symbols_Q[256] = {
1455 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1456 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1457 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1458 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1459 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1460 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1461 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1462 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1463 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1464 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1465 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1466 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1467 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1468 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1469 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1470 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1471 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1472 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1473 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1474 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1475 	/* Average constellation point energy */
1476 	N0 *= 170.0;
1477 	for (k = 0; k < qm; k++)
1478 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1479 	/* 5.1.4 of TS38.211 */
1480 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1481 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1482 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1483 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1484 	/* AWGN channel */
1485 	I += sqrt(N0 / 2) * randn(0);
1486 	Q += sqrt(N0 / 2) * randn(1);
1487 	/*
1488 	 * Calculate the log of the probability that each of
1489 	 * the constellation points was transmitted
1490 	 */
1491 	for (m = 0; m < qam; m++)
1492 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1493 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1494 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1495 	for (k = 0; k < qm; k++) {
1496 		p0 = -999999;
1497 		p1 = -999999;
1498 		/* For each constellation point */
1499 		for (m = 0; m < qam; m++) {
1500 			if ((m >> (qm - k - 1)) & 1)
1501 				p1 = maxstar(p1, log_syml_prob[m]);
1502 			else
1503 				p0 = maxstar(p0, log_syml_prob[m]);
1504 		}
1505 		/* Calculate the LLR */
1506 		llr_ = p0 - p1;
1507 		llr_ *= (1 << ldpc_llr_decimals);
1508 		llr_ = round(llr_);
1509 		if (llr_ > llr_max)
1510 			llr_ = llr_max;
1511 		if (llr_ < -llr_max)
1512 			llr_ = -llr_max;
1513 		llrs[qm * i + k] = (int8_t) llr_;
1514 	}
1515 }
1516 
1517 
1518 /*
1519  * Generate Qm LLRS for Qm==6
1520  * Modulation, AWGN and LLR estimation from max log development
1521  */
1522 static void
1523 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1524 {
1525 	int qm = 6;
1526 	int qam = 64;
1527 	int m, k;
1528 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1529 	/* 5.1.4 of TS38.211 */
1530 	const double symbols_I[64] = {
1531 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1532 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1533 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1534 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1535 			-5, -5, -7, -7, -5, -5, -7, -7};
1536 	const double symbols_Q[64] = {
1537 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1538 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1539 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1540 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1541 			-3, -1, -3, -1, -5, -7, -5, -7};
1542 	/* Average constellation point energy */
1543 	N0 *= 42.0;
1544 	for (k = 0; k < qm; k++)
1545 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1546 	/* 5.1.4 of TS38.211 */
1547 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1548 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1549 	/* AWGN channel */
1550 	I += sqrt(N0 / 2) * randn(0);
1551 	Q += sqrt(N0 / 2) * randn(1);
1552 	/*
1553 	 * Calculate the log of the probability that each of
1554 	 * the constellation points was transmitted
1555 	 */
1556 	for (m = 0; m < qam; m++)
1557 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1558 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1559 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1560 	for (k = 0; k < qm; k++) {
1561 		p0 = -999999;
1562 		p1 = -999999;
1563 		/* For each constellation point */
1564 		for (m = 0; m < qam; m++) {
1565 			if ((m >> (qm - k - 1)) & 1)
1566 				p1 = maxstar(p1, log_syml_prob[m]);
1567 			else
1568 				p0 = maxstar(p0, log_syml_prob[m]);
1569 		}
1570 		/* Calculate the LLR */
1571 		llr_ = p0 - p1;
1572 		llr_ *= (1 << ldpc_llr_decimals);
1573 		llr_ = round(llr_);
1574 		if (llr_ > llr_max)
1575 			llr_ = llr_max;
1576 		if (llr_ < -llr_max)
1577 			llr_ = -llr_max;
1578 		llrs[qm * i + k] = (int8_t) llr_;
1579 	}
1580 }
1581 
1582 /*
1583  * Generate Qm LLRS for Qm==4
1584  * Modulation, AWGN and LLR estimation from max log development
1585  */
1586 static void
1587 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1588 {
1589 	int qm = 4;
1590 	int qam = 16;
1591 	int m, k;
1592 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1593 	/* 5.1.4 of TS38.211 */
1594 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1595 			-1, -1, -3, -3, -1, -1, -3, -3};
1596 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1597 			1, 3, 1, 3, -1, -3, -1, -3};
1598 	/* Average constellation point energy */
1599 	N0 *= 10.0;
1600 	for (k = 0; k < qm; k++)
1601 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1602 	/* 5.1.4 of TS38.211 */
1603 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1604 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1605 	/* AWGN channel */
1606 	I += sqrt(N0 / 2) * randn(0);
1607 	Q += sqrt(N0 / 2) * randn(1);
1608 	/*
1609 	 * Calculate the log of the probability that each of
1610 	 * the constellation points was transmitted
1611 	 */
1612 	for (m = 0; m < qam; m++)
1613 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1614 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1615 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1616 	for (k = 0; k < qm; k++) {
1617 		p0 = -999999;
1618 		p1 = -999999;
1619 		/* For each constellation point */
1620 		for (m = 0; m < qam; m++) {
1621 			if ((m >> (qm - k - 1)) & 1)
1622 				p1 = maxstar(p1, log_syml_prob[m]);
1623 			else
1624 				p0 = maxstar(p0, log_syml_prob[m]);
1625 		}
1626 		/* Calculate the LLR */
1627 		llr_ = p0 - p1;
1628 		llr_ *= (1 << ldpc_llr_decimals);
1629 		llr_ = round(llr_);
1630 		if (llr_ > llr_max)
1631 			llr_ = llr_max;
1632 		if (llr_ < -llr_max)
1633 			llr_ = -llr_max;
1634 		llrs[qm * i + k] = (int8_t) llr_;
1635 	}
1636 }
1637 
1638 static void
1639 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1640 {
1641 	double b, b1, n;
1642 	double coeff = 2.0 * sqrt(N0);
1643 
1644 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1645 	if (llrs[j] < 8 && llrs[j] > -8)
1646 		return;
1647 
1648 	/* Note don't change sign here */
1649 	n = randn(j % 2);
1650 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1651 			+ coeff * n) / N0;
1652 	b = b1 * (1 << ldpc_llr_decimals);
1653 	b = round(b);
1654 	if (b > llr_max)
1655 		b = llr_max;
1656 	if (b < -llr_max)
1657 		b = -llr_max;
1658 	llrs[j] = (int8_t) b;
1659 }
1660 
1661 /* Generate LLR for a given SNR */
1662 static void
1663 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1664 		struct rte_bbdev_dec_op *ref_op)
1665 {
1666 	struct rte_mbuf *m;
1667 	uint16_t qm;
1668 	uint32_t i, j, e, range;
1669 	double N0, llr_max;
1670 
1671 	e = ref_op->ldpc_dec.cb_params.e;
1672 	qm = ref_op->ldpc_dec.q_m;
1673 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1674 	range = e / qm;
1675 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1676 
1677 	for (i = 0; i < n; ++i) {
1678 		m = inputs[i].data;
1679 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1680 		if (qm == 8) {
1681 			for (j = 0; j < range; ++j)
1682 				gen_qm8_llr(llrs, j, N0, llr_max);
1683 		} else if (qm == 6) {
1684 			for (j = 0; j < range; ++j)
1685 				gen_qm6_llr(llrs, j, N0, llr_max);
1686 		} else if (qm == 4) {
1687 			for (j = 0; j < range; ++j)
1688 				gen_qm4_llr(llrs, j, N0, llr_max);
1689 		} else {
1690 			for (j = 0; j < e; ++j)
1691 				gen_qm2_llr(llrs, j, N0, llr_max);
1692 		}
1693 	}
1694 }
1695 
1696 static void
1697 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1698 		unsigned int start_idx,
1699 		struct rte_bbdev_op_data *inputs,
1700 		struct rte_bbdev_op_data *hard_outputs,
1701 		struct rte_bbdev_op_data *soft_outputs,
1702 		struct rte_bbdev_op_data *harq_inputs,
1703 		struct rte_bbdev_op_data *harq_outputs,
1704 		struct rte_bbdev_dec_op *ref_op)
1705 {
1706 	unsigned int i;
1707 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1708 
1709 	for (i = 0; i < n; ++i) {
1710 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1711 			ops[i]->ldpc_dec.tb_params.ea =
1712 					ldpc_dec->tb_params.ea;
1713 			ops[i]->ldpc_dec.tb_params.eb =
1714 					ldpc_dec->tb_params.eb;
1715 			ops[i]->ldpc_dec.tb_params.c =
1716 					ldpc_dec->tb_params.c;
1717 			ops[i]->ldpc_dec.tb_params.cab =
1718 					ldpc_dec->tb_params.cab;
1719 			ops[i]->ldpc_dec.tb_params.r =
1720 					ldpc_dec->tb_params.r;
1721 		} else {
1722 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1723 		}
1724 
1725 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1726 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1727 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1728 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1729 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1730 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1731 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1732 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1733 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1734 
1735 		if (hard_outputs != NULL)
1736 			ops[i]->ldpc_dec.hard_output =
1737 					hard_outputs[start_idx + i];
1738 		if (inputs != NULL)
1739 			ops[i]->ldpc_dec.input =
1740 					inputs[start_idx + i];
1741 		if (soft_outputs != NULL)
1742 			ops[i]->ldpc_dec.soft_output =
1743 					soft_outputs[start_idx + i];
1744 		if (harq_inputs != NULL)
1745 			ops[i]->ldpc_dec.harq_combined_input =
1746 					harq_inputs[start_idx + i];
1747 		if (harq_outputs != NULL)
1748 			ops[i]->ldpc_dec.harq_combined_output =
1749 					harq_outputs[start_idx + i];
1750 	}
1751 }
1752 
1753 
1754 static void
1755 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1756 		unsigned int start_idx,
1757 		struct rte_bbdev_op_data *inputs,
1758 		struct rte_bbdev_op_data *outputs,
1759 		struct rte_bbdev_enc_op *ref_op)
1760 {
1761 	unsigned int i;
1762 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1763 	for (i = 0; i < n; ++i) {
1764 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1765 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1766 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1767 			ops[i]->ldpc_enc.tb_params.cab =
1768 					ldpc_enc->tb_params.cab;
1769 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1770 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1771 		} else {
1772 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1773 		}
1774 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1775 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1776 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1777 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1778 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1779 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1780 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1781 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1782 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1783 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1784 	}
1785 }
1786 
1787 static int
1788 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1789 		unsigned int order_idx, const int expected_status)
1790 {
1791 	int status = op->status;
1792 	/* ignore parity mismatch false alarms for long iterations */
1793 	if (get_iter_max() >= 10) {
1794 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1795 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1796 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1797 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1798 		}
1799 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1800 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1801 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1802 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1803 		}
1804 	}
1805 
1806 	TEST_ASSERT(status == expected_status,
1807 			"op_status (%d) != expected_status (%d)",
1808 			op->status, expected_status);
1809 
1810 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1811 			"Ordering error, expected %p, got %p",
1812 			(void *)(uintptr_t)order_idx, op->opaque_data);
1813 
1814 	return TEST_SUCCESS;
1815 }
1816 
1817 static int
1818 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1819 		unsigned int order_idx, const int expected_status)
1820 {
1821 	TEST_ASSERT(op->status == expected_status,
1822 			"op_status (%d) != expected_status (%d)",
1823 			op->status, expected_status);
1824 
1825 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1826 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1827 				"Ordering error, expected %p, got %p",
1828 				(void *)(uintptr_t)order_idx, op->opaque_data);
1829 
1830 	return TEST_SUCCESS;
1831 }
1832 
1833 static inline int
1834 validate_op_chain(struct rte_bbdev_op_data *op,
1835 		struct op_data_entries *orig_op)
1836 {
1837 	uint8_t i;
1838 	struct rte_mbuf *m = op->data;
1839 	uint8_t nb_dst_segments = orig_op->nb_segments;
1840 	uint32_t total_data_size = 0;
1841 
1842 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1843 			"Number of segments differ in original (%u) and filled (%u) op",
1844 			nb_dst_segments, m->nb_segs);
1845 
1846 	/* Validate each mbuf segment length */
1847 	for (i = 0; i < nb_dst_segments; ++i) {
1848 		/* Apply offset to the first mbuf segment */
1849 		uint16_t offset = (i == 0) ? op->offset : 0;
1850 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1851 		total_data_size += orig_op->segments[i].length;
1852 
1853 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1854 				"Length of segment differ in original (%u) and filled (%u) op",
1855 				orig_op->segments[i].length, data_len);
1856 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1857 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1858 				data_len,
1859 				"Output buffers (CB=%u) are not equal", i);
1860 		m = m->next;
1861 	}
1862 
1863 	/* Validate total mbuf pkt length */
1864 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1865 	TEST_ASSERT(total_data_size == pkt_len,
1866 			"Length of data differ in original (%u) and filled (%u) op",
1867 			total_data_size, pkt_len);
1868 
1869 	return TEST_SUCCESS;
1870 }
1871 
1872 /*
1873  * Compute K0 for a given configuration for HARQ output length computation
1874  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1875  */
1876 static inline uint16_t
1877 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1878 {
1879 	if (rv_index == 0)
1880 		return 0;
1881 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1882 	if (n_cb == n) {
1883 		if (rv_index == 1)
1884 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1885 		else if (rv_index == 2)
1886 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1887 		else
1888 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1889 	}
1890 	/* LBRM case - includes a division by N */
1891 	if (rv_index == 1)
1892 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1893 				/ n) * z_c;
1894 	else if (rv_index == 2)
1895 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1896 				/ n) * z_c;
1897 	else
1898 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1899 				/ n) * z_c;
1900 }
1901 
1902 /* HARQ output length including the Filler bits */
1903 static inline uint16_t
1904 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1905 {
1906 	uint16_t k0 = 0;
1907 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1908 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1909 	/* Compute RM out size and number of rows */
1910 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1911 			* ops_ld->z_c - ops_ld->n_filler;
1912 	uint16_t deRmOutSize = RTE_MIN(
1913 			k0 + ops_ld->cb_params.e +
1914 			((k0 > parity_offset) ?
1915 					0 : ops_ld->n_filler),
1916 					ops_ld->n_cb);
1917 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1918 			/ ops_ld->z_c);
1919 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1920 	return harq_output_len;
1921 }
1922 
1923 static inline int
1924 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1925 		struct op_data_entries *orig_op,
1926 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1927 {
1928 	uint8_t i;
1929 	uint32_t j, jj, k;
1930 	struct rte_mbuf *m = op->data;
1931 	uint8_t nb_dst_segments = orig_op->nb_segments;
1932 	uint32_t total_data_size = 0;
1933 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1934 	uint32_t byte_error = 0, cum_error = 0, error;
1935 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1936 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1937 	uint16_t parity_offset;
1938 
1939 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1940 			"Number of segments differ in original (%u) and filled (%u) op",
1941 			nb_dst_segments, m->nb_segs);
1942 
1943 	/* Validate each mbuf segment length */
1944 	for (i = 0; i < nb_dst_segments; ++i) {
1945 		/* Apply offset to the first mbuf segment */
1946 		uint16_t offset = (i == 0) ? op->offset : 0;
1947 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1948 		total_data_size += orig_op->segments[i].length;
1949 
1950 		TEST_ASSERT(orig_op->segments[i].length <
1951 				(uint32_t)(data_len + 64),
1952 				"Length of segment differ in original (%u) and filled (%u) op",
1953 				orig_op->segments[i].length, data_len);
1954 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1955 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1956 
1957 		if (!(ldpc_cap_flags &
1958 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1959 				) || (ops_ld->op_flags &
1960 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1961 			data_len -= ops_ld->z_c;
1962 			parity_offset = data_len;
1963 		} else {
1964 			/* Compute RM out size and number of rows */
1965 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1966 					* ops_ld->z_c - ops_ld->n_filler;
1967 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1968 					ops_ld->n_filler;
1969 			if (data_len > deRmOutSize)
1970 				data_len = deRmOutSize;
1971 			if (data_len > orig_op->segments[i].length)
1972 				data_len = orig_op->segments[i].length;
1973 		}
1974 		/*
1975 		 * HARQ output can have minor differences
1976 		 * due to integer representation and related scaling
1977 		 */
1978 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1979 			if (j == parity_offset) {
1980 				/* Special Handling of the filler bits */
1981 				for (k = 0; k < ops_ld->n_filler; k++) {
1982 					if (harq_out[jj] !=
1983 							llr_max_pre_scaling) {
1984 						printf("HARQ Filler issue %d: %d %d\n",
1985 							jj, harq_out[jj],
1986 							llr_max);
1987 						byte_error++;
1988 					}
1989 					jj++;
1990 				}
1991 			}
1992 			if (!(ops_ld->op_flags &
1993 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1994 				if (ldpc_llr_decimals > 1)
1995 					harq_out[jj] = (harq_out[jj] + 1)
1996 						>> (ldpc_llr_decimals - 1);
1997 				/* Saturated to S7 */
1998 				if (harq_orig[j] > llr_max)
1999 					harq_orig[j] = llr_max;
2000 				if (harq_orig[j] < -llr_max)
2001 					harq_orig[j] = -llr_max;
2002 			}
2003 			if (harq_orig[j] != harq_out[jj]) {
2004 				error = (harq_orig[j] > harq_out[jj]) ?
2005 						harq_orig[j] - harq_out[jj] :
2006 						harq_out[jj] - harq_orig[j];
2007 				abs_harq_origin = harq_orig[j] > 0 ?
2008 							harq_orig[j] :
2009 							-harq_orig[j];
2010 				/* Residual quantization error */
2011 				if ((error > 8 && (abs_harq_origin <
2012 						(llr_max - 16))) ||
2013 						(error > 16)) {
2014 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2015 							j, harq_orig[j],
2016 							harq_out[jj], error);
2017 					byte_error++;
2018 					cum_error += error;
2019 				}
2020 			}
2021 		}
2022 		m = m->next;
2023 	}
2024 
2025 	if (byte_error)
2026 		TEST_ASSERT(byte_error <= 1,
2027 				"HARQ output mismatch (%d) %d",
2028 				byte_error, cum_error);
2029 
2030 	/* Validate total mbuf pkt length */
2031 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2032 	TEST_ASSERT(total_data_size < pkt_len + 64,
2033 			"Length of data differ in original (%u) and filled (%u) op",
2034 			total_data_size, pkt_len);
2035 
2036 	return TEST_SUCCESS;
2037 }
2038 
2039 static int
2040 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2041 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2042 {
2043 	unsigned int i;
2044 	int ret;
2045 	struct op_data_entries *hard_data_orig =
2046 			&test_vector.entries[DATA_HARD_OUTPUT];
2047 	struct op_data_entries *soft_data_orig =
2048 			&test_vector.entries[DATA_SOFT_OUTPUT];
2049 	struct rte_bbdev_op_turbo_dec *ops_td;
2050 	struct rte_bbdev_op_data *hard_output;
2051 	struct rte_bbdev_op_data *soft_output;
2052 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2053 
2054 	for (i = 0; i < n; ++i) {
2055 		ops_td = &ops[i]->turbo_dec;
2056 		hard_output = &ops_td->hard_output;
2057 		soft_output = &ops_td->soft_output;
2058 
2059 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2060 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2061 					"Returned iter_count (%d) > expected iter_count (%d)",
2062 					ops_td->iter_count, ref_td->iter_count);
2063 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2064 		TEST_ASSERT_SUCCESS(ret,
2065 				"Checking status and ordering for decoder failed");
2066 
2067 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2068 				hard_data_orig),
2069 				"Hard output buffers (CB=%u) are not equal",
2070 				i);
2071 
2072 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2073 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2074 					soft_data_orig),
2075 					"Soft output buffers (CB=%u) are not equal",
2076 					i);
2077 	}
2078 
2079 	return TEST_SUCCESS;
2080 }
2081 
2082 /* Check Number of code blocks errors */
2083 static int
2084 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2085 {
2086 	unsigned int i;
2087 	struct op_data_entries *hard_data_orig =
2088 			&test_vector.entries[DATA_HARD_OUTPUT];
2089 	struct rte_bbdev_op_ldpc_dec *ops_td;
2090 	struct rte_bbdev_op_data *hard_output;
2091 	int errors = 0;
2092 	struct rte_mbuf *m;
2093 
2094 	for (i = 0; i < n; ++i) {
2095 		ops_td = &ops[i]->ldpc_dec;
2096 		hard_output = &ops_td->hard_output;
2097 		m = hard_output->data;
2098 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2099 				hard_data_orig->segments[0].addr,
2100 				hard_data_orig->segments[0].length))
2101 			errors++;
2102 	}
2103 	return errors;
2104 }
2105 
2106 static int
2107 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2108 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2109 {
2110 	unsigned int i;
2111 	int ret;
2112 	struct op_data_entries *hard_data_orig =
2113 			&test_vector.entries[DATA_HARD_OUTPUT];
2114 	struct op_data_entries *soft_data_orig =
2115 			&test_vector.entries[DATA_SOFT_OUTPUT];
2116 	struct op_data_entries *harq_data_orig =
2117 				&test_vector.entries[DATA_HARQ_OUTPUT];
2118 	struct rte_bbdev_op_ldpc_dec *ops_td;
2119 	struct rte_bbdev_op_data *hard_output;
2120 	struct rte_bbdev_op_data *harq_output;
2121 	struct rte_bbdev_op_data *soft_output;
2122 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2123 
2124 	for (i = 0; i < n; ++i) {
2125 		ops_td = &ops[i]->ldpc_dec;
2126 		hard_output = &ops_td->hard_output;
2127 		harq_output = &ops_td->harq_combined_output;
2128 		soft_output = &ops_td->soft_output;
2129 
2130 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2131 		TEST_ASSERT_SUCCESS(ret,
2132 				"Checking status and ordering for decoder failed");
2133 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2134 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2135 					"Returned iter_count (%d) > expected iter_count (%d)",
2136 					ops_td->iter_count, ref_td->iter_count);
2137 		/*
2138 		 * We can ignore output data when the decoding failed to
2139 		 * converge or for loop-back cases
2140 		 */
2141 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2142 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2143 				) && (
2144 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2145 						)) == 0)
2146 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2147 					hard_data_orig),
2148 					"Hard output buffers (CB=%u) are not equal",
2149 					i);
2150 
2151 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2152 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2153 					soft_data_orig),
2154 					"Soft output buffers (CB=%u) are not equal",
2155 					i);
2156 		if (ref_op->ldpc_dec.op_flags &
2157 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2158 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2159 					harq_data_orig, ops_td),
2160 					"HARQ output buffers (CB=%u) are not equal",
2161 					i);
2162 		}
2163 		if (ref_op->ldpc_dec.op_flags &
2164 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2165 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2166 					harq_data_orig, ops_td),
2167 					"HARQ output buffers (CB=%u) are not equal",
2168 					i);
2169 
2170 	}
2171 
2172 	return TEST_SUCCESS;
2173 }
2174 
2175 
2176 static int
2177 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2178 		struct rte_bbdev_enc_op *ref_op)
2179 {
2180 	unsigned int i;
2181 	int ret;
2182 	struct op_data_entries *hard_data_orig =
2183 			&test_vector.entries[DATA_HARD_OUTPUT];
2184 
2185 	for (i = 0; i < n; ++i) {
2186 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2187 		TEST_ASSERT_SUCCESS(ret,
2188 				"Checking status and ordering for encoder failed");
2189 		TEST_ASSERT_SUCCESS(validate_op_chain(
2190 				&ops[i]->turbo_enc.output,
2191 				hard_data_orig),
2192 				"Output buffers (CB=%u) are not equal",
2193 				i);
2194 	}
2195 
2196 	return TEST_SUCCESS;
2197 }
2198 
2199 static int
2200 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2201 		struct rte_bbdev_enc_op *ref_op)
2202 {
2203 	unsigned int i;
2204 	int ret;
2205 	struct op_data_entries *hard_data_orig =
2206 			&test_vector.entries[DATA_HARD_OUTPUT];
2207 
2208 	for (i = 0; i < n; ++i) {
2209 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2210 		TEST_ASSERT_SUCCESS(ret,
2211 				"Checking status and ordering for encoder failed");
2212 		TEST_ASSERT_SUCCESS(validate_op_chain(
2213 				&ops[i]->ldpc_enc.output,
2214 				hard_data_orig),
2215 				"Output buffers (CB=%u) are not equal",
2216 				i);
2217 	}
2218 
2219 	return TEST_SUCCESS;
2220 }
2221 
2222 static void
2223 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2224 {
2225 	unsigned int i;
2226 	struct op_data_entries *entry;
2227 
2228 	op->turbo_dec = test_vector.turbo_dec;
2229 	entry = &test_vector.entries[DATA_INPUT];
2230 	for (i = 0; i < entry->nb_segments; ++i)
2231 		op->turbo_dec.input.length +=
2232 				entry->segments[i].length;
2233 }
2234 
2235 static void
2236 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2237 {
2238 	unsigned int i;
2239 	struct op_data_entries *entry;
2240 
2241 	op->ldpc_dec = test_vector.ldpc_dec;
2242 	entry = &test_vector.entries[DATA_INPUT];
2243 	for (i = 0; i < entry->nb_segments; ++i)
2244 		op->ldpc_dec.input.length +=
2245 				entry->segments[i].length;
2246 	if (test_vector.ldpc_dec.op_flags &
2247 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2248 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2249 		for (i = 0; i < entry->nb_segments; ++i)
2250 			op->ldpc_dec.harq_combined_input.length +=
2251 				entry->segments[i].length;
2252 	}
2253 }
2254 
2255 
2256 static void
2257 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2258 {
2259 	unsigned int i;
2260 	struct op_data_entries *entry;
2261 
2262 	op->turbo_enc = test_vector.turbo_enc;
2263 	entry = &test_vector.entries[DATA_INPUT];
2264 	for (i = 0; i < entry->nb_segments; ++i)
2265 		op->turbo_enc.input.length +=
2266 				entry->segments[i].length;
2267 }
2268 
2269 static void
2270 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2271 {
2272 	unsigned int i;
2273 	struct op_data_entries *entry;
2274 
2275 	op->ldpc_enc = test_vector.ldpc_enc;
2276 	entry = &test_vector.entries[DATA_INPUT];
2277 	for (i = 0; i < entry->nb_segments; ++i)
2278 		op->ldpc_enc.input.length +=
2279 				entry->segments[i].length;
2280 }
2281 
2282 static uint32_t
2283 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2284 {
2285 	uint8_t i;
2286 	uint32_t c, r, tb_size = 0;
2287 
2288 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2289 		tb_size = op->turbo_dec.tb_params.k_neg;
2290 	} else {
2291 		c = op->turbo_dec.tb_params.c;
2292 		r = op->turbo_dec.tb_params.r;
2293 		for (i = 0; i < c-r; i++)
2294 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2295 				op->turbo_dec.tb_params.k_neg :
2296 				op->turbo_dec.tb_params.k_pos;
2297 	}
2298 	return tb_size;
2299 }
2300 
2301 static uint32_t
2302 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2303 {
2304 	uint8_t i;
2305 	uint32_t c, r, tb_size = 0;
2306 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2307 
2308 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2309 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2310 	} else {
2311 		c = op->ldpc_dec.tb_params.c;
2312 		r = op->ldpc_dec.tb_params.r;
2313 		for (i = 0; i < c-r; i++)
2314 			tb_size += sys_cols * op->ldpc_dec.z_c
2315 					- op->ldpc_dec.n_filler;
2316 	}
2317 	return tb_size;
2318 }
2319 
2320 static uint32_t
2321 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2322 {
2323 	uint8_t i;
2324 	uint32_t c, r, tb_size = 0;
2325 
2326 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2327 		tb_size = op->turbo_enc.tb_params.k_neg;
2328 	} else {
2329 		c = op->turbo_enc.tb_params.c;
2330 		r = op->turbo_enc.tb_params.r;
2331 		for (i = 0; i < c-r; i++)
2332 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2333 				op->turbo_enc.tb_params.k_neg :
2334 				op->turbo_enc.tb_params.k_pos;
2335 	}
2336 	return tb_size;
2337 }
2338 
2339 static uint32_t
2340 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2341 {
2342 	uint8_t i;
2343 	uint32_t c, r, tb_size = 0;
2344 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2345 
2346 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2347 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2348 	} else {
2349 		c = op->turbo_enc.tb_params.c;
2350 		r = op->turbo_enc.tb_params.r;
2351 		for (i = 0; i < c-r; i++)
2352 			tb_size += sys_cols * op->ldpc_enc.z_c
2353 					- op->ldpc_enc.n_filler;
2354 	}
2355 	return tb_size;
2356 }
2357 
2358 
2359 static int
2360 init_test_op_params(struct test_op_params *op_params,
2361 		enum rte_bbdev_op_type op_type, const int expected_status,
2362 		const int vector_mask, struct rte_mempool *ops_mp,
2363 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2364 {
2365 	int ret = 0;
2366 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2367 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2368 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2369 				&op_params->ref_dec_op, 1);
2370 	else
2371 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2372 				&op_params->ref_enc_op, 1);
2373 
2374 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2375 
2376 	op_params->mp = ops_mp;
2377 	op_params->burst_sz = burst_sz;
2378 	op_params->num_to_process = num_to_process;
2379 	op_params->num_lcores = num_lcores;
2380 	op_params->vector_mask = vector_mask;
2381 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2382 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2383 		op_params->ref_dec_op->status = expected_status;
2384 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2385 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2386 		op_params->ref_enc_op->status = expected_status;
2387 	return 0;
2388 }
2389 
2390 static int
2391 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2392 		struct test_op_params *op_params)
2393 {
2394 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2395 	unsigned int i;
2396 	struct active_device *ad;
2397 	unsigned int burst_sz = get_burst_sz();
2398 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2399 	const struct rte_bbdev_op_cap *capabilities = NULL;
2400 
2401 	ad = &active_devs[dev_id];
2402 
2403 	/* Check if device supports op_type */
2404 	if (!is_avail_op(ad, test_vector.op_type))
2405 		return TEST_SUCCESS;
2406 
2407 	struct rte_bbdev_info info;
2408 	rte_bbdev_info_get(ad->dev_id, &info);
2409 	socket_id = GET_SOCKET(info.socket_id);
2410 
2411 	f_ret = create_mempools(ad, socket_id, op_type,
2412 			get_num_ops());
2413 	if (f_ret != TEST_SUCCESS) {
2414 		printf("Couldn't create mempools");
2415 		goto fail;
2416 	}
2417 	if (op_type == RTE_BBDEV_OP_NONE)
2418 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2419 
2420 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2421 			test_vector.expected_status,
2422 			test_vector.mask,
2423 			ad->ops_mempool,
2424 			burst_sz,
2425 			get_num_ops(),
2426 			get_num_lcores());
2427 	if (f_ret != TEST_SUCCESS) {
2428 		printf("Couldn't init test op params");
2429 		goto fail;
2430 	}
2431 
2432 
2433 	/* Find capabilities */
2434 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2435 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2436 		if (cap->type == test_vector.op_type) {
2437 			capabilities = cap;
2438 			break;
2439 		}
2440 		cap++;
2441 	}
2442 	TEST_ASSERT_NOT_NULL(capabilities,
2443 			"Couldn't find capabilities");
2444 
2445 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2446 		create_reference_dec_op(op_params->ref_dec_op);
2447 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2448 		create_reference_enc_op(op_params->ref_enc_op);
2449 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2450 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2451 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2452 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2453 
2454 	for (i = 0; i < ad->nb_queues; ++i) {
2455 		f_ret = fill_queue_buffers(op_params,
2456 				ad->in_mbuf_pool,
2457 				ad->hard_out_mbuf_pool,
2458 				ad->soft_out_mbuf_pool,
2459 				ad->harq_in_mbuf_pool,
2460 				ad->harq_out_mbuf_pool,
2461 				ad->queue_ids[i],
2462 				capabilities,
2463 				info.drv.min_alignment,
2464 				socket_id);
2465 		if (f_ret != TEST_SUCCESS) {
2466 			printf("Couldn't init queue buffers");
2467 			goto fail;
2468 		}
2469 	}
2470 
2471 	/* Run test case function */
2472 	t_ret = test_case_func(ad, op_params);
2473 
2474 	/* Free active device resources and return */
2475 	free_buffers(ad, op_params);
2476 	return t_ret;
2477 
2478 fail:
2479 	free_buffers(ad, op_params);
2480 	return TEST_FAILED;
2481 }
2482 
2483 /* Run given test function per active device per supported op type
2484  * per burst size.
2485  */
2486 static int
2487 run_test_case(test_case_function *test_case_func)
2488 {
2489 	int ret = 0;
2490 	uint8_t dev;
2491 
2492 	/* Alloc op_params */
2493 	struct test_op_params *op_params = rte_zmalloc(NULL,
2494 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2495 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2496 			RTE_ALIGN(sizeof(struct test_op_params),
2497 				RTE_CACHE_LINE_SIZE));
2498 
2499 	/* For each device run test case function */
2500 	for (dev = 0; dev < nb_active_devs; ++dev)
2501 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2502 
2503 	rte_free(op_params);
2504 
2505 	return ret;
2506 }
2507 
2508 
2509 /* Push back the HARQ output from DDR to host */
2510 static void
2511 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2512 		struct rte_bbdev_dec_op **ops,
2513 		const uint16_t n)
2514 {
2515 	uint16_t j;
2516 	int save_status, ret;
2517 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2518 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2519 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2520 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2521 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2522 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2523 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2524 	for (j = 0; j < n; ++j) {
2525 		if ((loopback && mem_out) || hc_out) {
2526 			save_status = ops[j]->status;
2527 			ops[j]->ldpc_dec.op_flags =
2528 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2529 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2530 			if (h_comp)
2531 				ops[j]->ldpc_dec.op_flags +=
2532 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2533 			ops[j]->ldpc_dec.harq_combined_input.offset =
2534 					harq_offset;
2535 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2536 			harq_offset += HARQ_INCR;
2537 			if (!loopback)
2538 				ops[j]->ldpc_dec.harq_combined_input.length =
2539 				ops[j]->ldpc_dec.harq_combined_output.length;
2540 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2541 					&ops[j], 1);
2542 			ret = 0;
2543 			while (ret == 0)
2544 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2545 						dev_id, queue_id,
2546 						&ops_deq[j], 1);
2547 			ops[j]->ldpc_dec.op_flags = flags;
2548 			ops[j]->status = save_status;
2549 		}
2550 	}
2551 }
2552 
2553 /*
2554  * Push back the HARQ output from HW DDR to Host
2555  * Preload HARQ memory input and adjust HARQ offset
2556  */
2557 static void
2558 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2559 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2560 		bool preload)
2561 {
2562 	uint16_t j;
2563 	int deq;
2564 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2565 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2566 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2567 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2568 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2569 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2570 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2571 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2572 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2573 	if ((mem_in || hc_in) && preload) {
2574 		for (j = 0; j < n; ++j) {
2575 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2576 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2577 			ops[j]->ldpc_dec.op_flags =
2578 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2579 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2580 			if (h_comp)
2581 				ops[j]->ldpc_dec.op_flags +=
2582 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2583 			ops[j]->ldpc_dec.harq_combined_output.offset =
2584 					harq_offset;
2585 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2586 			harq_offset += HARQ_INCR;
2587 		}
2588 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2589 		deq = 0;
2590 		while (deq != n)
2591 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2592 					dev_id, queue_id, &ops_deq[deq],
2593 					n - deq);
2594 		/* Restore the operations */
2595 		for (j = 0; j < n; ++j) {
2596 			ops[j]->ldpc_dec.op_flags = flags;
2597 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2598 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2599 		}
2600 	}
2601 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2602 	for (j = 0; j < n; ++j) {
2603 		/* Adjust HARQ offset when we reach external DDR */
2604 		if (mem_in || hc_in)
2605 			ops[j]->ldpc_dec.harq_combined_input.offset
2606 				= harq_offset;
2607 		if (mem_out || hc_out)
2608 			ops[j]->ldpc_dec.harq_combined_output.offset
2609 				= harq_offset;
2610 		harq_offset += HARQ_INCR;
2611 	}
2612 }
2613 
2614 static void
2615 dequeue_event_callback(uint16_t dev_id,
2616 		enum rte_bbdev_event_type event, void *cb_arg,
2617 		void *ret_param)
2618 {
2619 	int ret;
2620 	uint16_t i;
2621 	uint64_t total_time;
2622 	uint16_t deq, burst_sz, num_ops;
2623 	uint16_t queue_id = *(uint16_t *) ret_param;
2624 	struct rte_bbdev_info info;
2625 	double tb_len_bits;
2626 	struct thread_params *tp = cb_arg;
2627 
2628 	/* Find matching thread params using queue_id */
2629 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2630 		if (tp->queue_id == queue_id)
2631 			break;
2632 
2633 	if (i == MAX_QUEUES) {
2634 		printf("%s: Queue_id from interrupt details was not found!\n",
2635 				__func__);
2636 		return;
2637 	}
2638 
2639 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2640 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2641 		printf(
2642 			"Dequeue interrupt handler called for incorrect event!\n");
2643 		return;
2644 	}
2645 
2646 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
2647 	num_ops = tp->op_params->num_to_process;
2648 
2649 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2650 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2651 				&tp->dec_ops[
2652 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2653 				burst_sz);
2654 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2655 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2656 				&tp->dec_ops[
2657 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2658 				burst_sz);
2659 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2660 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2661 				&tp->enc_ops[
2662 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2663 				burst_sz);
2664 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2665 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2666 				&tp->enc_ops[
2667 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2668 				burst_sz);
2669 
2670 	if (deq < burst_sz) {
2671 		printf(
2672 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2673 			burst_sz, deq);
2674 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2675 		return;
2676 	}
2677 
2678 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
2679 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2680 		return;
2681 	}
2682 
2683 	total_time = rte_rdtsc_precise() - tp->start_time;
2684 
2685 	rte_bbdev_info_get(dev_id, &info);
2686 
2687 	ret = TEST_SUCCESS;
2688 
2689 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2690 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2691 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2692 				tp->op_params->vector_mask);
2693 		/* get the max of iter_count for all dequeued ops */
2694 		for (i = 0; i < num_ops; ++i)
2695 			tp->iter_count = RTE_MAX(
2696 					tp->dec_ops[i]->turbo_dec.iter_count,
2697 					tp->iter_count);
2698 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2699 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2700 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2701 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2702 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2703 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2704 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2705 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2706 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2707 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2708 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2709 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2710 				tp->op_params->vector_mask);
2711 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2712 	}
2713 
2714 	if (ret) {
2715 		printf("Buffers validation failed\n");
2716 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2717 	}
2718 
2719 	switch (test_vector.op_type) {
2720 	case RTE_BBDEV_OP_TURBO_DEC:
2721 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2722 		break;
2723 	case RTE_BBDEV_OP_TURBO_ENC:
2724 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2725 		break;
2726 	case RTE_BBDEV_OP_LDPC_DEC:
2727 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2728 		break;
2729 	case RTE_BBDEV_OP_LDPC_ENC:
2730 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2731 		break;
2732 	case RTE_BBDEV_OP_NONE:
2733 		tb_len_bits = 0.0;
2734 		break;
2735 	default:
2736 		printf("Unknown op type: %d\n", test_vector.op_type);
2737 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2738 		return;
2739 	}
2740 
2741 	tp->ops_per_sec += ((double)num_ops) /
2742 			((double)total_time / (double)rte_get_tsc_hz());
2743 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2744 			((double)total_time / (double)rte_get_tsc_hz());
2745 
2746 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2747 }
2748 
2749 static int
2750 throughput_intr_lcore_ldpc_dec(void *arg)
2751 {
2752 	struct thread_params *tp = arg;
2753 	unsigned int enqueued;
2754 	const uint16_t queue_id = tp->queue_id;
2755 	const uint16_t burst_sz = tp->op_params->burst_sz;
2756 	const uint16_t num_to_process = tp->op_params->num_to_process;
2757 	struct rte_bbdev_dec_op *ops[num_to_process];
2758 	struct test_buffers *bufs = NULL;
2759 	struct rte_bbdev_info info;
2760 	int ret, i, j;
2761 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2762 	uint16_t num_to_enq, enq;
2763 
2764 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2765 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2766 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2767 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2768 
2769 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2770 			"BURST_SIZE should be <= %u", MAX_BURST);
2771 
2772 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2773 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2774 			tp->dev_id, queue_id);
2775 
2776 	rte_bbdev_info_get(tp->dev_id, &info);
2777 
2778 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2779 			"NUM_OPS cannot exceed %u for this device",
2780 			info.drv.queue_size_lim);
2781 
2782 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2783 
2784 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2785 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2786 
2787 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2788 
2789 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2790 				num_to_process);
2791 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2792 			num_to_process);
2793 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2794 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2795 				bufs->hard_outputs, bufs->soft_outputs,
2796 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2797 
2798 	/* Set counter to validate the ordering */
2799 	for (j = 0; j < num_to_process; ++j)
2800 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2801 
2802 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2803 		for (i = 0; i < num_to_process; ++i) {
2804 			if (!loopback)
2805 				rte_pktmbuf_reset(
2806 					ops[i]->ldpc_dec.hard_output.data);
2807 			if (hc_out || loopback)
2808 				mbuf_reset(
2809 				ops[i]->ldpc_dec.harq_combined_output.data);
2810 		}
2811 
2812 		tp->start_time = rte_rdtsc_precise();
2813 		for (enqueued = 0; enqueued < num_to_process;) {
2814 			num_to_enq = burst_sz;
2815 
2816 			if (unlikely(num_to_process - enqueued < num_to_enq))
2817 				num_to_enq = num_to_process - enqueued;
2818 
2819 			enq = 0;
2820 			do {
2821 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2822 						tp->dev_id,
2823 						queue_id, &ops[enqueued],
2824 						num_to_enq);
2825 			} while (unlikely(num_to_enq != enq));
2826 			enqueued += enq;
2827 
2828 			/* Write to thread burst_sz current number of enqueued
2829 			 * descriptors. It ensures that proper number of
2830 			 * descriptors will be dequeued in callback
2831 			 * function - needed for last batch in case where
2832 			 * the number of operations is not a multiple of
2833 			 * burst size.
2834 			 */
2835 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2836 
2837 			/* Wait until processing of previous batch is
2838 			 * completed
2839 			 */
2840 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2841 		}
2842 		if (j != TEST_REPETITIONS - 1)
2843 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2844 	}
2845 
2846 	return TEST_SUCCESS;
2847 }
2848 
2849 static int
2850 throughput_intr_lcore_dec(void *arg)
2851 {
2852 	struct thread_params *tp = arg;
2853 	unsigned int enqueued;
2854 	const uint16_t queue_id = tp->queue_id;
2855 	const uint16_t burst_sz = tp->op_params->burst_sz;
2856 	const uint16_t num_to_process = tp->op_params->num_to_process;
2857 	struct rte_bbdev_dec_op *ops[num_to_process];
2858 	struct test_buffers *bufs = NULL;
2859 	struct rte_bbdev_info info;
2860 	int ret, i, j;
2861 	uint16_t num_to_enq, enq;
2862 
2863 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2864 			"BURST_SIZE should be <= %u", MAX_BURST);
2865 
2866 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2867 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2868 			tp->dev_id, queue_id);
2869 
2870 	rte_bbdev_info_get(tp->dev_id, &info);
2871 
2872 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2873 			"NUM_OPS cannot exceed %u for this device",
2874 			info.drv.queue_size_lim);
2875 
2876 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2877 
2878 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2879 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2880 
2881 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2882 
2883 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2884 				num_to_process);
2885 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2886 			num_to_process);
2887 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2888 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2889 				bufs->hard_outputs, bufs->soft_outputs,
2890 				tp->op_params->ref_dec_op);
2891 
2892 	/* Set counter to validate the ordering */
2893 	for (j = 0; j < num_to_process; ++j)
2894 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2895 
2896 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2897 		for (i = 0; i < num_to_process; ++i)
2898 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2899 
2900 		tp->start_time = rte_rdtsc_precise();
2901 		for (enqueued = 0; enqueued < num_to_process;) {
2902 			num_to_enq = burst_sz;
2903 
2904 			if (unlikely(num_to_process - enqueued < num_to_enq))
2905 				num_to_enq = num_to_process - enqueued;
2906 
2907 			enq = 0;
2908 			do {
2909 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2910 						queue_id, &ops[enqueued],
2911 						num_to_enq);
2912 			} while (unlikely(num_to_enq != enq));
2913 			enqueued += enq;
2914 
2915 			/* Write to thread burst_sz current number of enqueued
2916 			 * descriptors. It ensures that proper number of
2917 			 * descriptors will be dequeued in callback
2918 			 * function - needed for last batch in case where
2919 			 * the number of operations is not a multiple of
2920 			 * burst size.
2921 			 */
2922 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2923 
2924 			/* Wait until processing of previous batch is
2925 			 * completed
2926 			 */
2927 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2928 		}
2929 		if (j != TEST_REPETITIONS - 1)
2930 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2931 	}
2932 
2933 	return TEST_SUCCESS;
2934 }
2935 
2936 static int
2937 throughput_intr_lcore_enc(void *arg)
2938 {
2939 	struct thread_params *tp = arg;
2940 	unsigned int enqueued;
2941 	const uint16_t queue_id = tp->queue_id;
2942 	const uint16_t burst_sz = tp->op_params->burst_sz;
2943 	const uint16_t num_to_process = tp->op_params->num_to_process;
2944 	struct rte_bbdev_enc_op *ops[num_to_process];
2945 	struct test_buffers *bufs = NULL;
2946 	struct rte_bbdev_info info;
2947 	int ret, i, j;
2948 	uint16_t num_to_enq, enq;
2949 
2950 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2951 			"BURST_SIZE should be <= %u", MAX_BURST);
2952 
2953 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2954 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2955 			tp->dev_id, queue_id);
2956 
2957 	rte_bbdev_info_get(tp->dev_id, &info);
2958 
2959 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2960 			"NUM_OPS cannot exceed %u for this device",
2961 			info.drv.queue_size_lim);
2962 
2963 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2964 
2965 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2966 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2967 
2968 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2969 
2970 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2971 			num_to_process);
2972 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2973 			num_to_process);
2974 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2975 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2976 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2977 
2978 	/* Set counter to validate the ordering */
2979 	for (j = 0; j < num_to_process; ++j)
2980 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2981 
2982 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2983 		for (i = 0; i < num_to_process; ++i)
2984 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2985 
2986 		tp->start_time = rte_rdtsc_precise();
2987 		for (enqueued = 0; enqueued < num_to_process;) {
2988 			num_to_enq = burst_sz;
2989 
2990 			if (unlikely(num_to_process - enqueued < num_to_enq))
2991 				num_to_enq = num_to_process - enqueued;
2992 
2993 			enq = 0;
2994 			do {
2995 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2996 						queue_id, &ops[enqueued],
2997 						num_to_enq);
2998 			} while (unlikely(enq != num_to_enq));
2999 			enqueued += enq;
3000 
3001 			/* Write to thread burst_sz current number of enqueued
3002 			 * descriptors. It ensures that proper number of
3003 			 * descriptors will be dequeued in callback
3004 			 * function - needed for last batch in case where
3005 			 * the number of operations is not a multiple of
3006 			 * burst size.
3007 			 */
3008 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3009 
3010 			/* Wait until processing of previous batch is
3011 			 * completed
3012 			 */
3013 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3014 		}
3015 		if (j != TEST_REPETITIONS - 1)
3016 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3017 	}
3018 
3019 	return TEST_SUCCESS;
3020 }
3021 
3022 
3023 static int
3024 throughput_intr_lcore_ldpc_enc(void *arg)
3025 {
3026 	struct thread_params *tp = arg;
3027 	unsigned int enqueued;
3028 	const uint16_t queue_id = tp->queue_id;
3029 	const uint16_t burst_sz = tp->op_params->burst_sz;
3030 	const uint16_t num_to_process = tp->op_params->num_to_process;
3031 	struct rte_bbdev_enc_op *ops[num_to_process];
3032 	struct test_buffers *bufs = NULL;
3033 	struct rte_bbdev_info info;
3034 	int ret, i, j;
3035 	uint16_t num_to_enq, enq;
3036 
3037 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3038 			"BURST_SIZE should be <= %u", MAX_BURST);
3039 
3040 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3041 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3042 			tp->dev_id, queue_id);
3043 
3044 	rte_bbdev_info_get(tp->dev_id, &info);
3045 
3046 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3047 			"NUM_OPS cannot exceed %u for this device",
3048 			info.drv.queue_size_lim);
3049 
3050 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3051 
3052 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3053 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3054 
3055 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3056 
3057 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3058 			num_to_process);
3059 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3060 			num_to_process);
3061 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3062 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3063 				bufs->inputs, bufs->hard_outputs,
3064 				tp->op_params->ref_enc_op);
3065 
3066 	/* Set counter to validate the ordering */
3067 	for (j = 0; j < num_to_process; ++j)
3068 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3069 
3070 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3071 		for (i = 0; i < num_to_process; ++i)
3072 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3073 
3074 		tp->start_time = rte_rdtsc_precise();
3075 		for (enqueued = 0; enqueued < num_to_process;) {
3076 			num_to_enq = burst_sz;
3077 
3078 			if (unlikely(num_to_process - enqueued < num_to_enq))
3079 				num_to_enq = num_to_process - enqueued;
3080 
3081 			enq = 0;
3082 			do {
3083 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3084 						tp->dev_id,
3085 						queue_id, &ops[enqueued],
3086 						num_to_enq);
3087 			} while (unlikely(enq != num_to_enq));
3088 			enqueued += enq;
3089 
3090 			/* Write to thread burst_sz current number of enqueued
3091 			 * descriptors. It ensures that proper number of
3092 			 * descriptors will be dequeued in callback
3093 			 * function - needed for last batch in case where
3094 			 * the number of operations is not a multiple of
3095 			 * burst size.
3096 			 */
3097 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3098 
3099 			/* Wait until processing of previous batch is
3100 			 * completed
3101 			 */
3102 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3103 		}
3104 		if (j != TEST_REPETITIONS - 1)
3105 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3106 	}
3107 
3108 	return TEST_SUCCESS;
3109 }
3110 
3111 static int
3112 throughput_pmd_lcore_dec(void *arg)
3113 {
3114 	struct thread_params *tp = arg;
3115 	uint16_t enq, deq;
3116 	uint64_t total_time = 0, start_time;
3117 	const uint16_t queue_id = tp->queue_id;
3118 	const uint16_t burst_sz = tp->op_params->burst_sz;
3119 	const uint16_t num_ops = tp->op_params->num_to_process;
3120 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3121 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3122 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3123 	struct test_buffers *bufs = NULL;
3124 	int i, j, ret;
3125 	struct rte_bbdev_info info;
3126 	uint16_t num_to_enq;
3127 
3128 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3129 			"BURST_SIZE should be <= %u", MAX_BURST);
3130 
3131 	rte_bbdev_info_get(tp->dev_id, &info);
3132 
3133 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3134 			"NUM_OPS cannot exceed %u for this device",
3135 			info.drv.queue_size_lim);
3136 
3137 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3138 
3139 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3140 
3141 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3142 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3143 
3144 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3145 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3146 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3147 
3148 	/* Set counter to validate the ordering */
3149 	for (j = 0; j < num_ops; ++j)
3150 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3151 
3152 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3153 
3154 		for (j = 0; j < num_ops; ++j)
3155 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3156 
3157 		start_time = rte_rdtsc_precise();
3158 
3159 		for (enq = 0, deq = 0; enq < num_ops;) {
3160 			num_to_enq = burst_sz;
3161 
3162 			if (unlikely(num_ops - enq < num_to_enq))
3163 				num_to_enq = num_ops - enq;
3164 
3165 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3166 					queue_id, &ops_enq[enq], num_to_enq);
3167 
3168 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3169 					queue_id, &ops_deq[deq], enq - deq);
3170 		}
3171 
3172 		/* dequeue the remaining */
3173 		while (deq < enq) {
3174 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3175 					queue_id, &ops_deq[deq], enq - deq);
3176 		}
3177 
3178 		total_time += rte_rdtsc_precise() - start_time;
3179 	}
3180 
3181 	tp->iter_count = 0;
3182 	/* get the max of iter_count for all dequeued ops */
3183 	for (i = 0; i < num_ops; ++i) {
3184 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3185 				tp->iter_count);
3186 	}
3187 
3188 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3189 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3190 				tp->op_params->vector_mask);
3191 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3192 	}
3193 
3194 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3195 
3196 	double tb_len_bits = calc_dec_TB_size(ref_op);
3197 
3198 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3199 			((double)total_time / (double)rte_get_tsc_hz());
3200 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3201 			1000000.0) / ((double)total_time /
3202 			(double)rte_get_tsc_hz());
3203 
3204 	return TEST_SUCCESS;
3205 }
3206 
3207 static int
3208 bler_pmd_lcore_ldpc_dec(void *arg)
3209 {
3210 	struct thread_params *tp = arg;
3211 	uint16_t enq, deq;
3212 	uint64_t total_time = 0, start_time;
3213 	const uint16_t queue_id = tp->queue_id;
3214 	const uint16_t burst_sz = tp->op_params->burst_sz;
3215 	const uint16_t num_ops = tp->op_params->num_to_process;
3216 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3217 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3218 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3219 	struct test_buffers *bufs = NULL;
3220 	int i, j, ret;
3221 	float parity_bler = 0;
3222 	struct rte_bbdev_info info;
3223 	uint16_t num_to_enq;
3224 	bool extDdr = check_bit(ldpc_cap_flags,
3225 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3226 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3227 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3228 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3229 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3230 
3231 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3232 			"BURST_SIZE should be <= %u", MAX_BURST);
3233 
3234 	rte_bbdev_info_get(tp->dev_id, &info);
3235 
3236 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3237 			"NUM_OPS cannot exceed %u for this device",
3238 			info.drv.queue_size_lim);
3239 
3240 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3241 
3242 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3243 
3244 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3245 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3246 
3247 	/* For BLER tests we need to enable early termination */
3248 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3249 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3250 		ref_op->ldpc_dec.op_flags +=
3251 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3252 	ref_op->ldpc_dec.iter_max = get_iter_max();
3253 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3254 
3255 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3256 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3257 				bufs->hard_outputs, bufs->soft_outputs,
3258 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3259 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3260 
3261 	/* Set counter to validate the ordering */
3262 	for (j = 0; j < num_ops; ++j)
3263 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3264 
3265 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3266 		for (j = 0; j < num_ops; ++j) {
3267 			if (!loopback)
3268 				mbuf_reset(
3269 				ops_enq[j]->ldpc_dec.hard_output.data);
3270 			if (hc_out || loopback)
3271 				mbuf_reset(
3272 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3273 		}
3274 		if (extDdr)
3275 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3276 					num_ops, true);
3277 		start_time = rte_rdtsc_precise();
3278 
3279 		for (enq = 0, deq = 0; enq < num_ops;) {
3280 			num_to_enq = burst_sz;
3281 
3282 			if (unlikely(num_ops - enq < num_to_enq))
3283 				num_to_enq = num_ops - enq;
3284 
3285 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3286 					queue_id, &ops_enq[enq], num_to_enq);
3287 
3288 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3289 					queue_id, &ops_deq[deq], enq - deq);
3290 		}
3291 
3292 		/* dequeue the remaining */
3293 		while (deq < enq) {
3294 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3295 					queue_id, &ops_deq[deq], enq - deq);
3296 		}
3297 
3298 		total_time += rte_rdtsc_precise() - start_time;
3299 	}
3300 
3301 	tp->iter_count = 0;
3302 	tp->iter_average = 0;
3303 	/* get the max of iter_count for all dequeued ops */
3304 	for (i = 0; i < num_ops; ++i) {
3305 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3306 				tp->iter_count);
3307 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3308 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3309 			parity_bler += 1.0;
3310 	}
3311 
3312 	parity_bler /= num_ops; /* This one is based on SYND */
3313 	tp->iter_average /= num_ops;
3314 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3315 
3316 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3317 			&& tp->bler == 0
3318 			&& parity_bler == 0
3319 			&& !hc_out) {
3320 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3321 				tp->op_params->vector_mask);
3322 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3323 	}
3324 
3325 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3326 
3327 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3328 	tp->ops_per_sec = ((double)num_ops * 1) /
3329 			((double)total_time / (double)rte_get_tsc_hz());
3330 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3331 			1000000.0) / ((double)total_time /
3332 			(double)rte_get_tsc_hz());
3333 
3334 	return TEST_SUCCESS;
3335 }
3336 
3337 static int
3338 throughput_pmd_lcore_ldpc_dec(void *arg)
3339 {
3340 	struct thread_params *tp = arg;
3341 	uint16_t enq, deq;
3342 	uint64_t total_time = 0, start_time;
3343 	const uint16_t queue_id = tp->queue_id;
3344 	const uint16_t burst_sz = tp->op_params->burst_sz;
3345 	const uint16_t num_ops = tp->op_params->num_to_process;
3346 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3347 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3348 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3349 	struct test_buffers *bufs = NULL;
3350 	int i, j, ret;
3351 	struct rte_bbdev_info info;
3352 	uint16_t num_to_enq;
3353 	bool extDdr = check_bit(ldpc_cap_flags,
3354 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3355 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3356 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3357 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3358 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3359 
3360 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3361 			"BURST_SIZE should be <= %u", MAX_BURST);
3362 
3363 	rte_bbdev_info_get(tp->dev_id, &info);
3364 
3365 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3366 			"NUM_OPS cannot exceed %u for this device",
3367 			info.drv.queue_size_lim);
3368 
3369 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3370 
3371 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3372 
3373 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3374 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3375 
3376 	/* For throughput tests we need to disable early termination */
3377 	if (check_bit(ref_op->ldpc_dec.op_flags,
3378 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3379 		ref_op->ldpc_dec.op_flags -=
3380 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3381 	ref_op->ldpc_dec.iter_max = get_iter_max();
3382 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3383 
3384 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3385 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3386 				bufs->hard_outputs, bufs->soft_outputs,
3387 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3388 
3389 	/* Set counter to validate the ordering */
3390 	for (j = 0; j < num_ops; ++j)
3391 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3392 
3393 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3394 		for (j = 0; j < num_ops; ++j) {
3395 			if (!loopback)
3396 				mbuf_reset(
3397 				ops_enq[j]->ldpc_dec.hard_output.data);
3398 			if (hc_out || loopback)
3399 				mbuf_reset(
3400 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3401 		}
3402 		if (extDdr)
3403 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3404 					num_ops, true);
3405 		start_time = rte_rdtsc_precise();
3406 
3407 		for (enq = 0, deq = 0; enq < num_ops;) {
3408 			num_to_enq = burst_sz;
3409 
3410 			if (unlikely(num_ops - enq < num_to_enq))
3411 				num_to_enq = num_ops - enq;
3412 
3413 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3414 					queue_id, &ops_enq[enq], num_to_enq);
3415 
3416 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3417 					queue_id, &ops_deq[deq], enq - deq);
3418 		}
3419 
3420 		/* dequeue the remaining */
3421 		while (deq < enq) {
3422 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3423 					queue_id, &ops_deq[deq], enq - deq);
3424 		}
3425 
3426 		total_time += rte_rdtsc_precise() - start_time;
3427 	}
3428 
3429 	tp->iter_count = 0;
3430 	/* get the max of iter_count for all dequeued ops */
3431 	for (i = 0; i < num_ops; ++i) {
3432 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3433 				tp->iter_count);
3434 	}
3435 	if (extDdr) {
3436 		/* Read loopback is not thread safe */
3437 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3438 	}
3439 
3440 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3441 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3442 				tp->op_params->vector_mask);
3443 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3444 	}
3445 
3446 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3447 
3448 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3449 
3450 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3451 			((double)total_time / (double)rte_get_tsc_hz());
3452 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3453 			1000000.0) / ((double)total_time /
3454 			(double)rte_get_tsc_hz());
3455 
3456 	return TEST_SUCCESS;
3457 }
3458 
3459 static int
3460 throughput_pmd_lcore_enc(void *arg)
3461 {
3462 	struct thread_params *tp = arg;
3463 	uint16_t enq, deq;
3464 	uint64_t total_time = 0, start_time;
3465 	const uint16_t queue_id = tp->queue_id;
3466 	const uint16_t burst_sz = tp->op_params->burst_sz;
3467 	const uint16_t num_ops = tp->op_params->num_to_process;
3468 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3469 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3470 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3471 	struct test_buffers *bufs = NULL;
3472 	int i, j, ret;
3473 	struct rte_bbdev_info info;
3474 	uint16_t num_to_enq;
3475 
3476 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3477 			"BURST_SIZE should be <= %u", MAX_BURST);
3478 
3479 	rte_bbdev_info_get(tp->dev_id, &info);
3480 
3481 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3482 			"NUM_OPS cannot exceed %u for this device",
3483 			info.drv.queue_size_lim);
3484 
3485 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3486 
3487 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3488 
3489 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3490 			num_ops);
3491 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3492 			num_ops);
3493 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3494 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3495 				bufs->hard_outputs, ref_op);
3496 
3497 	/* Set counter to validate the ordering */
3498 	for (j = 0; j < num_ops; ++j)
3499 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3500 
3501 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3502 
3503 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3504 			for (j = 0; j < num_ops; ++j)
3505 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3506 
3507 		start_time = rte_rdtsc_precise();
3508 
3509 		for (enq = 0, deq = 0; enq < num_ops;) {
3510 			num_to_enq = burst_sz;
3511 
3512 			if (unlikely(num_ops - enq < num_to_enq))
3513 				num_to_enq = num_ops - enq;
3514 
3515 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3516 					queue_id, &ops_enq[enq], num_to_enq);
3517 
3518 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3519 					queue_id, &ops_deq[deq], enq - deq);
3520 		}
3521 
3522 		/* dequeue the remaining */
3523 		while (deq < enq) {
3524 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3525 					queue_id, &ops_deq[deq], enq - deq);
3526 		}
3527 
3528 		total_time += rte_rdtsc_precise() - start_time;
3529 	}
3530 
3531 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3532 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3533 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3534 	}
3535 
3536 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3537 
3538 	double tb_len_bits = calc_enc_TB_size(ref_op);
3539 
3540 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3541 			((double)total_time / (double)rte_get_tsc_hz());
3542 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3543 			/ 1000000.0) / ((double)total_time /
3544 			(double)rte_get_tsc_hz());
3545 
3546 	return TEST_SUCCESS;
3547 }
3548 
3549 static int
3550 throughput_pmd_lcore_ldpc_enc(void *arg)
3551 {
3552 	struct thread_params *tp = arg;
3553 	uint16_t enq, deq;
3554 	uint64_t total_time = 0, start_time;
3555 	const uint16_t queue_id = tp->queue_id;
3556 	const uint16_t burst_sz = tp->op_params->burst_sz;
3557 	const uint16_t num_ops = tp->op_params->num_to_process;
3558 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3559 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3560 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3561 	struct test_buffers *bufs = NULL;
3562 	int i, j, ret;
3563 	struct rte_bbdev_info info;
3564 	uint16_t num_to_enq;
3565 
3566 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3567 			"BURST_SIZE should be <= %u", MAX_BURST);
3568 
3569 	rte_bbdev_info_get(tp->dev_id, &info);
3570 
3571 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3572 			"NUM_OPS cannot exceed %u for this device",
3573 			info.drv.queue_size_lim);
3574 
3575 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3576 
3577 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3578 
3579 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3580 			num_ops);
3581 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3582 			num_ops);
3583 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3584 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3585 				bufs->hard_outputs, ref_op);
3586 
3587 	/* Set counter to validate the ordering */
3588 	for (j = 0; j < num_ops; ++j)
3589 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3590 
3591 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3592 
3593 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3594 			for (j = 0; j < num_ops; ++j)
3595 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3596 
3597 		start_time = rte_rdtsc_precise();
3598 
3599 		for (enq = 0, deq = 0; enq < num_ops;) {
3600 			num_to_enq = burst_sz;
3601 
3602 			if (unlikely(num_ops - enq < num_to_enq))
3603 				num_to_enq = num_ops - enq;
3604 
3605 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3606 					queue_id, &ops_enq[enq], num_to_enq);
3607 
3608 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3609 					queue_id, &ops_deq[deq], enq - deq);
3610 		}
3611 
3612 		/* dequeue the remaining */
3613 		while (deq < enq) {
3614 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3615 					queue_id, &ops_deq[deq], enq - deq);
3616 		}
3617 
3618 		total_time += rte_rdtsc_precise() - start_time;
3619 	}
3620 
3621 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3622 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3623 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3624 	}
3625 
3626 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3627 
3628 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3629 
3630 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3631 			((double)total_time / (double)rte_get_tsc_hz());
3632 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3633 			/ 1000000.0) / ((double)total_time /
3634 			(double)rte_get_tsc_hz());
3635 
3636 	return TEST_SUCCESS;
3637 }
3638 
3639 static void
3640 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3641 {
3642 	unsigned int iter = 0;
3643 	double total_mops = 0, total_mbps = 0;
3644 
3645 	for (iter = 0; iter < used_cores; iter++) {
3646 		printf(
3647 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3648 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3649 			t_params[iter].mbps);
3650 		total_mops += t_params[iter].ops_per_sec;
3651 		total_mbps += t_params[iter].mbps;
3652 	}
3653 	printf(
3654 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3655 		used_cores, total_mops, total_mbps);
3656 }
3657 
3658 /* Aggregate the performance results over the number of cores used */
3659 static void
3660 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3661 {
3662 	unsigned int core_idx = 0;
3663 	double total_mops = 0, total_mbps = 0;
3664 	uint8_t iter_count = 0;
3665 
3666 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3667 		printf(
3668 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3669 			t_params[core_idx].lcore_id,
3670 			t_params[core_idx].ops_per_sec,
3671 			t_params[core_idx].mbps,
3672 			t_params[core_idx].iter_count);
3673 		total_mops += t_params[core_idx].ops_per_sec;
3674 		total_mbps += t_params[core_idx].mbps;
3675 		iter_count = RTE_MAX(iter_count,
3676 				t_params[core_idx].iter_count);
3677 	}
3678 	printf(
3679 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3680 		used_cores, total_mops, total_mbps, iter_count);
3681 }
3682 
3683 /* Aggregate the performance results over the number of cores used */
3684 static void
3685 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3686 {
3687 	unsigned int core_idx = 0;
3688 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3689 	double snr = get_snr();
3690 
3691 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3692 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3693 				t_params[core_idx].lcore_id,
3694 				t_params[core_idx].bler * 100,
3695 				t_params[core_idx].iter_average,
3696 				t_params[core_idx].mbps,
3697 				get_vector_filename());
3698 		total_mbps += t_params[core_idx].mbps;
3699 		total_bler += t_params[core_idx].bler;
3700 		total_iter += t_params[core_idx].iter_average;
3701 	}
3702 	total_bler /= used_cores;
3703 	total_iter /= used_cores;
3704 
3705 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3706 			snr, total_bler * 100, total_iter, get_iter_max(),
3707 			total_mbps, get_vector_filename());
3708 }
3709 
3710 /*
3711  * Test function that determines BLER wireless performance
3712  */
3713 static int
3714 bler_test(struct active_device *ad,
3715 		struct test_op_params *op_params)
3716 {
3717 	int ret;
3718 	unsigned int lcore_id, used_cores = 0;
3719 	struct thread_params *t_params;
3720 	struct rte_bbdev_info info;
3721 	lcore_function_t *bler_function;
3722 	uint16_t num_lcores;
3723 	const char *op_type_str;
3724 
3725 	rte_bbdev_info_get(ad->dev_id, &info);
3726 
3727 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3728 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3729 			test_vector.op_type);
3730 
3731 	printf("+ ------------------------------------------------------- +\n");
3732 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3733 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3734 			op_params->num_to_process, op_params->num_lcores,
3735 			op_type_str,
3736 			intr_enabled ? "Interrupt mode" : "PMD mode",
3737 			(double)rte_get_tsc_hz() / 1000000000.0);
3738 
3739 	/* Set number of lcores */
3740 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3741 			? ad->nb_queues
3742 			: op_params->num_lcores;
3743 
3744 	/* Allocate memory for thread parameters structure */
3745 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3746 			RTE_CACHE_LINE_SIZE);
3747 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3748 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3749 				RTE_CACHE_LINE_SIZE));
3750 
3751 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3752 			!check_bit(test_vector.ldpc_dec.op_flags,
3753 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3754 			&& !check_bit(test_vector.ldpc_dec.op_flags,
3755 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
3756 		bler_function = bler_pmd_lcore_ldpc_dec;
3757 	else
3758 		return TEST_SKIPPED;
3759 
3760 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3761 
3762 	/* Main core is set at first entry */
3763 	t_params[0].dev_id = ad->dev_id;
3764 	t_params[0].lcore_id = rte_lcore_id();
3765 	t_params[0].op_params = op_params;
3766 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3767 	t_params[0].iter_count = 0;
3768 
3769 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3770 		if (used_cores >= num_lcores)
3771 			break;
3772 
3773 		t_params[used_cores].dev_id = ad->dev_id;
3774 		t_params[used_cores].lcore_id = lcore_id;
3775 		t_params[used_cores].op_params = op_params;
3776 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3777 		t_params[used_cores].iter_count = 0;
3778 
3779 		rte_eal_remote_launch(bler_function,
3780 				&t_params[used_cores++], lcore_id);
3781 	}
3782 
3783 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3784 	ret = bler_function(&t_params[0]);
3785 
3786 	/* Main core is always used */
3787 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3788 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3789 
3790 	print_dec_bler(t_params, num_lcores);
3791 
3792 	/* Return if test failed */
3793 	if (ret) {
3794 		rte_free(t_params);
3795 		return ret;
3796 	}
3797 
3798 	/* Function to print something  here*/
3799 	rte_free(t_params);
3800 	return ret;
3801 }
3802 
3803 /*
3804  * Test function that determines how long an enqueue + dequeue of a burst
3805  * takes on available lcores.
3806  */
3807 static int
3808 throughput_test(struct active_device *ad,
3809 		struct test_op_params *op_params)
3810 {
3811 	int ret;
3812 	unsigned int lcore_id, used_cores = 0;
3813 	struct thread_params *t_params, *tp;
3814 	struct rte_bbdev_info info;
3815 	lcore_function_t *throughput_function;
3816 	uint16_t num_lcores;
3817 	const char *op_type_str;
3818 
3819 	rte_bbdev_info_get(ad->dev_id, &info);
3820 
3821 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3822 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3823 			test_vector.op_type);
3824 
3825 	printf("+ ------------------------------------------------------- +\n");
3826 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3827 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3828 			op_params->num_to_process, op_params->num_lcores,
3829 			op_type_str,
3830 			intr_enabled ? "Interrupt mode" : "PMD mode",
3831 			(double)rte_get_tsc_hz() / 1000000000.0);
3832 
3833 	/* Set number of lcores */
3834 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3835 			? ad->nb_queues
3836 			: op_params->num_lcores;
3837 
3838 	/* Allocate memory for thread parameters structure */
3839 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3840 			RTE_CACHE_LINE_SIZE);
3841 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3842 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3843 				RTE_CACHE_LINE_SIZE));
3844 
3845 	if (intr_enabled) {
3846 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3847 			throughput_function = throughput_intr_lcore_dec;
3848 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3849 			throughput_function = throughput_intr_lcore_ldpc_dec;
3850 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3851 			throughput_function = throughput_intr_lcore_enc;
3852 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3853 			throughput_function = throughput_intr_lcore_ldpc_enc;
3854 		else
3855 			throughput_function = throughput_intr_lcore_enc;
3856 
3857 		/* Dequeue interrupt callback registration */
3858 		ret = rte_bbdev_callback_register(ad->dev_id,
3859 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3860 				t_params);
3861 		if (ret < 0) {
3862 			rte_free(t_params);
3863 			return ret;
3864 		}
3865 	} else {
3866 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3867 			throughput_function = throughput_pmd_lcore_dec;
3868 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3869 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3870 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3871 			throughput_function = throughput_pmd_lcore_enc;
3872 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3873 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3874 		else
3875 			throughput_function = throughput_pmd_lcore_enc;
3876 	}
3877 
3878 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3879 
3880 	/* Main core is set at first entry */
3881 	t_params[0].dev_id = ad->dev_id;
3882 	t_params[0].lcore_id = rte_lcore_id();
3883 	t_params[0].op_params = op_params;
3884 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3885 	t_params[0].iter_count = 0;
3886 
3887 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3888 		if (used_cores >= num_lcores)
3889 			break;
3890 
3891 		t_params[used_cores].dev_id = ad->dev_id;
3892 		t_params[used_cores].lcore_id = lcore_id;
3893 		t_params[used_cores].op_params = op_params;
3894 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3895 		t_params[used_cores].iter_count = 0;
3896 
3897 		rte_eal_remote_launch(throughput_function,
3898 				&t_params[used_cores++], lcore_id);
3899 	}
3900 
3901 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3902 	ret = throughput_function(&t_params[0]);
3903 
3904 	/* Main core is always used */
3905 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3906 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3907 
3908 	/* Return if test failed */
3909 	if (ret) {
3910 		rte_free(t_params);
3911 		return ret;
3912 	}
3913 
3914 	/* Print throughput if interrupts are disabled and test passed */
3915 	if (!intr_enabled) {
3916 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3917 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3918 			print_dec_throughput(t_params, num_lcores);
3919 		else
3920 			print_enc_throughput(t_params, num_lcores);
3921 		rte_free(t_params);
3922 		return ret;
3923 	}
3924 
3925 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3926 	 * all pending operations. Skip waiting for queues which reported an
3927 	 * error using processing_status variable.
3928 	 * Wait for main lcore operations.
3929 	 */
3930 	tp = &t_params[0];
3931 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3932 		op_params->num_to_process) &&
3933 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3934 		TEST_FAILED))
3935 		rte_pause();
3936 
3937 	tp->ops_per_sec /= TEST_REPETITIONS;
3938 	tp->mbps /= TEST_REPETITIONS;
3939 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3940 
3941 	/* Wait for worker lcores operations */
3942 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3943 		tp = &t_params[used_cores];
3944 
3945 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3946 			op_params->num_to_process) &&
3947 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3948 			TEST_FAILED))
3949 			rte_pause();
3950 
3951 		tp->ops_per_sec /= TEST_REPETITIONS;
3952 		tp->mbps /= TEST_REPETITIONS;
3953 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3954 	}
3955 
3956 	/* Print throughput if test passed */
3957 	if (!ret) {
3958 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3959 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3960 			print_dec_throughput(t_params, num_lcores);
3961 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3962 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3963 			print_enc_throughput(t_params, num_lcores);
3964 	}
3965 
3966 	rte_free(t_params);
3967 	return ret;
3968 }
3969 
3970 static int
3971 latency_test_dec(struct rte_mempool *mempool,
3972 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3973 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3974 		const uint16_t num_to_process, uint16_t burst_sz,
3975 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3976 {
3977 	int ret = TEST_SUCCESS;
3978 	uint16_t i, j, dequeued;
3979 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3980 	uint64_t start_time = 0, last_time = 0;
3981 
3982 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3983 		uint16_t enq = 0, deq = 0;
3984 		bool first_time = true;
3985 		last_time = 0;
3986 
3987 		if (unlikely(num_to_process - dequeued < burst_sz))
3988 			burst_sz = num_to_process - dequeued;
3989 
3990 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3991 		TEST_ASSERT_SUCCESS(ret,
3992 				"rte_bbdev_dec_op_alloc_bulk() failed");
3993 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3994 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3995 					bufs->inputs,
3996 					bufs->hard_outputs,
3997 					bufs->soft_outputs,
3998 					ref_op);
3999 
4000 		/* Set counter to validate the ordering */
4001 		for (j = 0; j < burst_sz; ++j)
4002 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4003 
4004 		start_time = rte_rdtsc_precise();
4005 
4006 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4007 				burst_sz);
4008 		TEST_ASSERT(enq == burst_sz,
4009 				"Error enqueueing burst, expected %u, got %u",
4010 				burst_sz, enq);
4011 
4012 		/* Dequeue */
4013 		do {
4014 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4015 					&ops_deq[deq], burst_sz - deq);
4016 			if (likely(first_time && (deq > 0))) {
4017 				last_time = rte_rdtsc_precise() - start_time;
4018 				first_time = false;
4019 			}
4020 		} while (unlikely(burst_sz != deq));
4021 
4022 		*max_time = RTE_MAX(*max_time, last_time);
4023 		*min_time = RTE_MIN(*min_time, last_time);
4024 		*total_time += last_time;
4025 
4026 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4027 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4028 					vector_mask);
4029 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4030 		}
4031 
4032 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4033 		dequeued += deq;
4034 	}
4035 
4036 	return i;
4037 }
4038 
4039 /* Test case for latency/validation for LDPC Decoder */
4040 static int
4041 latency_test_ldpc_dec(struct rte_mempool *mempool,
4042 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4043 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4044 		const uint16_t num_to_process, uint16_t burst_sz,
4045 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4046 		bool disable_et)
4047 {
4048 	int ret = TEST_SUCCESS;
4049 	uint16_t i, j, dequeued;
4050 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4051 	uint64_t start_time = 0, last_time = 0;
4052 	bool extDdr = ldpc_cap_flags &
4053 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4054 
4055 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4056 		uint16_t enq = 0, deq = 0;
4057 		bool first_time = true;
4058 		last_time = 0;
4059 
4060 		if (unlikely(num_to_process - dequeued < burst_sz))
4061 			burst_sz = num_to_process - dequeued;
4062 
4063 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4064 		TEST_ASSERT_SUCCESS(ret,
4065 				"rte_bbdev_dec_op_alloc_bulk() failed");
4066 
4067 		/* For latency tests we need to disable early termination */
4068 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4069 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4070 			ref_op->ldpc_dec.op_flags -=
4071 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4072 		ref_op->ldpc_dec.iter_max = get_iter_max();
4073 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4074 
4075 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4076 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4077 					bufs->inputs,
4078 					bufs->hard_outputs,
4079 					bufs->soft_outputs,
4080 					bufs->harq_inputs,
4081 					bufs->harq_outputs,
4082 					ref_op);
4083 
4084 		if (extDdr)
4085 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4086 					burst_sz, true);
4087 
4088 		/* Set counter to validate the ordering */
4089 		for (j = 0; j < burst_sz; ++j)
4090 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4091 
4092 		start_time = rte_rdtsc_precise();
4093 
4094 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4095 				&ops_enq[enq], burst_sz);
4096 		TEST_ASSERT(enq == burst_sz,
4097 				"Error enqueueing burst, expected %u, got %u",
4098 				burst_sz, enq);
4099 
4100 		/* Dequeue */
4101 		do {
4102 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4103 					&ops_deq[deq], burst_sz - deq);
4104 			if (likely(first_time && (deq > 0))) {
4105 				last_time = rte_rdtsc_precise() - start_time;
4106 				first_time = false;
4107 			}
4108 		} while (unlikely(burst_sz != deq));
4109 
4110 		*max_time = RTE_MAX(*max_time, last_time);
4111 		*min_time = RTE_MIN(*min_time, last_time);
4112 		*total_time += last_time;
4113 
4114 		if (extDdr)
4115 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4116 
4117 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4118 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4119 					vector_mask);
4120 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4121 		}
4122 
4123 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4124 		dequeued += deq;
4125 	}
4126 	return i;
4127 }
4128 
4129 static int
4130 latency_test_enc(struct rte_mempool *mempool,
4131 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4132 		uint16_t dev_id, uint16_t queue_id,
4133 		const uint16_t num_to_process, uint16_t burst_sz,
4134 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4135 {
4136 	int ret = TEST_SUCCESS;
4137 	uint16_t i, j, dequeued;
4138 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4139 	uint64_t start_time = 0, last_time = 0;
4140 
4141 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4142 		uint16_t enq = 0, deq = 0;
4143 		bool first_time = true;
4144 		last_time = 0;
4145 
4146 		if (unlikely(num_to_process - dequeued < burst_sz))
4147 			burst_sz = num_to_process - dequeued;
4148 
4149 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4150 		TEST_ASSERT_SUCCESS(ret,
4151 				"rte_bbdev_enc_op_alloc_bulk() failed");
4152 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4153 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4154 					bufs->inputs,
4155 					bufs->hard_outputs,
4156 					ref_op);
4157 
4158 		/* Set counter to validate the ordering */
4159 		for (j = 0; j < burst_sz; ++j)
4160 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4161 
4162 		start_time = rte_rdtsc_precise();
4163 
4164 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4165 				burst_sz);
4166 		TEST_ASSERT(enq == burst_sz,
4167 				"Error enqueueing burst, expected %u, got %u",
4168 				burst_sz, enq);
4169 
4170 		/* Dequeue */
4171 		do {
4172 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4173 					&ops_deq[deq], burst_sz - deq);
4174 			if (likely(first_time && (deq > 0))) {
4175 				last_time += rte_rdtsc_precise() - start_time;
4176 				first_time = false;
4177 			}
4178 		} while (unlikely(burst_sz != deq));
4179 
4180 		*max_time = RTE_MAX(*max_time, last_time);
4181 		*min_time = RTE_MIN(*min_time, last_time);
4182 		*total_time += last_time;
4183 
4184 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4185 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4186 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4187 		}
4188 
4189 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4190 		dequeued += deq;
4191 	}
4192 
4193 	return i;
4194 }
4195 
4196 static int
4197 latency_test_ldpc_enc(struct rte_mempool *mempool,
4198 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4199 		uint16_t dev_id, uint16_t queue_id,
4200 		const uint16_t num_to_process, uint16_t burst_sz,
4201 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4202 {
4203 	int ret = TEST_SUCCESS;
4204 	uint16_t i, j, dequeued;
4205 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4206 	uint64_t start_time = 0, last_time = 0;
4207 
4208 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4209 		uint16_t enq = 0, deq = 0;
4210 		bool first_time = true;
4211 		last_time = 0;
4212 
4213 		if (unlikely(num_to_process - dequeued < burst_sz))
4214 			burst_sz = num_to_process - dequeued;
4215 
4216 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4217 		TEST_ASSERT_SUCCESS(ret,
4218 				"rte_bbdev_enc_op_alloc_bulk() failed");
4219 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4220 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4221 					bufs->inputs,
4222 					bufs->hard_outputs,
4223 					ref_op);
4224 
4225 		/* Set counter to validate the ordering */
4226 		for (j = 0; j < burst_sz; ++j)
4227 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4228 
4229 		start_time = rte_rdtsc_precise();
4230 
4231 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4232 				&ops_enq[enq], burst_sz);
4233 		TEST_ASSERT(enq == burst_sz,
4234 				"Error enqueueing burst, expected %u, got %u",
4235 				burst_sz, enq);
4236 
4237 		/* Dequeue */
4238 		do {
4239 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4240 					&ops_deq[deq], burst_sz - deq);
4241 			if (likely(first_time && (deq > 0))) {
4242 				last_time += rte_rdtsc_precise() - start_time;
4243 				first_time = false;
4244 			}
4245 		} while (unlikely(burst_sz != deq));
4246 
4247 		*max_time = RTE_MAX(*max_time, last_time);
4248 		*min_time = RTE_MIN(*min_time, last_time);
4249 		*total_time += last_time;
4250 
4251 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4252 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4253 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4254 		}
4255 
4256 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4257 		dequeued += deq;
4258 	}
4259 
4260 	return i;
4261 }
4262 
4263 /* Common function for running validation and latency test cases */
4264 static int
4265 validation_latency_test(struct active_device *ad,
4266 		struct test_op_params *op_params, bool latency_flag)
4267 {
4268 	int iter;
4269 	uint16_t burst_sz = op_params->burst_sz;
4270 	const uint16_t num_to_process = op_params->num_to_process;
4271 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4272 	const uint16_t queue_id = ad->queue_ids[0];
4273 	struct test_buffers *bufs = NULL;
4274 	struct rte_bbdev_info info;
4275 	uint64_t total_time, min_time, max_time;
4276 	const char *op_type_str;
4277 
4278 	total_time = max_time = 0;
4279 	min_time = UINT64_MAX;
4280 
4281 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4282 			"BURST_SIZE should be <= %u", MAX_BURST);
4283 
4284 	rte_bbdev_info_get(ad->dev_id, &info);
4285 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4286 
4287 	op_type_str = rte_bbdev_op_type_str(op_type);
4288 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4289 
4290 	printf("+ ------------------------------------------------------- +\n");
4291 	if (latency_flag)
4292 		printf("== test: latency\ndev:");
4293 	else
4294 		printf("== test: validation\ndev:");
4295 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4296 			info.dev_name, burst_sz, num_to_process, op_type_str);
4297 
4298 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4299 		iter = latency_test_dec(op_params->mp, bufs,
4300 				op_params->ref_dec_op, op_params->vector_mask,
4301 				ad->dev_id, queue_id, num_to_process,
4302 				burst_sz, &total_time, &min_time, &max_time);
4303 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4304 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4305 				op_params->ref_enc_op, ad->dev_id, queue_id,
4306 				num_to_process, burst_sz, &total_time,
4307 				&min_time, &max_time);
4308 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4309 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4310 				op_params->ref_dec_op, op_params->vector_mask,
4311 				ad->dev_id, queue_id, num_to_process,
4312 				burst_sz, &total_time, &min_time, &max_time,
4313 				latency_flag);
4314 	else /* RTE_BBDEV_OP_TURBO_ENC */
4315 		iter = latency_test_enc(op_params->mp, bufs,
4316 				op_params->ref_enc_op,
4317 				ad->dev_id, queue_id,
4318 				num_to_process, burst_sz, &total_time,
4319 				&min_time, &max_time);
4320 
4321 	if (iter <= 0)
4322 		return TEST_FAILED;
4323 
4324 	printf("Operation latency:\n"
4325 			"\tavg: %lg cycles, %lg us\n"
4326 			"\tmin: %lg cycles, %lg us\n"
4327 			"\tmax: %lg cycles, %lg us\n",
4328 			(double)total_time / (double)iter,
4329 			(double)(total_time * 1000000) / (double)iter /
4330 			(double)rte_get_tsc_hz(), (double)min_time,
4331 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4332 			(double)max_time, (double)(max_time * 1000000) /
4333 			(double)rte_get_tsc_hz());
4334 
4335 	return TEST_SUCCESS;
4336 }
4337 
4338 static int
4339 latency_test(struct active_device *ad, struct test_op_params *op_params)
4340 {
4341 	return validation_latency_test(ad, op_params, true);
4342 }
4343 
4344 static int
4345 validation_test(struct active_device *ad, struct test_op_params *op_params)
4346 {
4347 	return validation_latency_test(ad, op_params, false);
4348 }
4349 
4350 #ifdef RTE_BBDEV_OFFLOAD_COST
4351 static int
4352 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4353 		struct rte_bbdev_stats *stats)
4354 {
4355 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4356 	struct rte_bbdev_stats *q_stats;
4357 
4358 	if (queue_id >= dev->data->num_queues)
4359 		return -1;
4360 
4361 	q_stats = &dev->data->queues[queue_id].queue_stats;
4362 
4363 	stats->enqueued_count = q_stats->enqueued_count;
4364 	stats->dequeued_count = q_stats->dequeued_count;
4365 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4366 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4367 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4368 
4369 	return 0;
4370 }
4371 
4372 static int
4373 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4374 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4375 		uint16_t queue_id, const uint16_t num_to_process,
4376 		uint16_t burst_sz, struct test_time_stats *time_st)
4377 {
4378 	int i, dequeued, ret;
4379 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4380 	uint64_t enq_start_time, deq_start_time;
4381 	uint64_t enq_sw_last_time, deq_last_time;
4382 	struct rte_bbdev_stats stats;
4383 
4384 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4385 		uint16_t enq = 0, deq = 0;
4386 
4387 		if (unlikely(num_to_process - dequeued < burst_sz))
4388 			burst_sz = num_to_process - dequeued;
4389 
4390 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4391 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4392 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4393 					bufs->inputs,
4394 					bufs->hard_outputs,
4395 					bufs->soft_outputs,
4396 					ref_op);
4397 
4398 		/* Start time meas for enqueue function offload latency */
4399 		enq_start_time = rte_rdtsc_precise();
4400 		do {
4401 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4402 					&ops_enq[enq], burst_sz - enq);
4403 		} while (unlikely(burst_sz != enq));
4404 
4405 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4406 		TEST_ASSERT_SUCCESS(ret,
4407 				"Failed to get stats for queue (%u) of device (%u)",
4408 				queue_id, dev_id);
4409 
4410 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4411 				stats.acc_offload_cycles;
4412 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4413 				enq_sw_last_time);
4414 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4415 				enq_sw_last_time);
4416 		time_st->enq_sw_total_time += enq_sw_last_time;
4417 
4418 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4419 				stats.acc_offload_cycles);
4420 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4421 				stats.acc_offload_cycles);
4422 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4423 
4424 		/* give time for device to process ops */
4425 		rte_delay_us(WAIT_OFFLOAD_US);
4426 
4427 		/* Start time meas for dequeue function offload latency */
4428 		deq_start_time = rte_rdtsc_precise();
4429 		/* Dequeue one operation */
4430 		do {
4431 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4432 					&ops_deq[deq], enq);
4433 		} while (unlikely(deq == 0));
4434 
4435 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4436 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4437 				deq_last_time);
4438 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4439 				deq_last_time);
4440 		time_st->deq_total_time += deq_last_time;
4441 
4442 		/* Dequeue remaining operations if needed*/
4443 		while (burst_sz != deq)
4444 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4445 					&ops_deq[deq], burst_sz - deq);
4446 
4447 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4448 		dequeued += deq;
4449 	}
4450 
4451 	return i;
4452 }
4453 
4454 static int
4455 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4456 		struct test_buffers *bufs,
4457 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4458 		uint16_t queue_id, const uint16_t num_to_process,
4459 		uint16_t burst_sz, struct test_time_stats *time_st)
4460 {
4461 	int i, dequeued, ret;
4462 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4463 	uint64_t enq_start_time, deq_start_time;
4464 	uint64_t enq_sw_last_time, deq_last_time;
4465 	struct rte_bbdev_stats stats;
4466 	bool extDdr = ldpc_cap_flags &
4467 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4468 
4469 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4470 		uint16_t enq = 0, deq = 0;
4471 
4472 		if (unlikely(num_to_process - dequeued < burst_sz))
4473 			burst_sz = num_to_process - dequeued;
4474 
4475 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4476 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4477 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4478 					bufs->inputs,
4479 					bufs->hard_outputs,
4480 					bufs->soft_outputs,
4481 					bufs->harq_inputs,
4482 					bufs->harq_outputs,
4483 					ref_op);
4484 
4485 		if (extDdr)
4486 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4487 					burst_sz, true);
4488 
4489 		/* Start time meas for enqueue function offload latency */
4490 		enq_start_time = rte_rdtsc_precise();
4491 		do {
4492 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4493 					&ops_enq[enq], burst_sz - enq);
4494 		} while (unlikely(burst_sz != enq));
4495 
4496 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4497 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4498 		TEST_ASSERT_SUCCESS(ret,
4499 				"Failed to get stats for queue (%u) of device (%u)",
4500 				queue_id, dev_id);
4501 
4502 		enq_sw_last_time -= stats.acc_offload_cycles;
4503 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4504 				enq_sw_last_time);
4505 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4506 				enq_sw_last_time);
4507 		time_st->enq_sw_total_time += enq_sw_last_time;
4508 
4509 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4510 				stats.acc_offload_cycles);
4511 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4512 				stats.acc_offload_cycles);
4513 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4514 
4515 		/* give time for device to process ops */
4516 		rte_delay_us(WAIT_OFFLOAD_US);
4517 
4518 		/* Start time meas for dequeue function offload latency */
4519 		deq_start_time = rte_rdtsc_precise();
4520 		/* Dequeue one operation */
4521 		do {
4522 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4523 					&ops_deq[deq], enq);
4524 		} while (unlikely(deq == 0));
4525 
4526 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4527 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4528 				deq_last_time);
4529 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4530 				deq_last_time);
4531 		time_st->deq_total_time += deq_last_time;
4532 
4533 		/* Dequeue remaining operations if needed*/
4534 		while (burst_sz != deq)
4535 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4536 					&ops_deq[deq], burst_sz - deq);
4537 
4538 		if (extDdr) {
4539 			/* Read loopback is not thread safe */
4540 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4541 		}
4542 
4543 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4544 		dequeued += deq;
4545 	}
4546 
4547 	return i;
4548 }
4549 
4550 static int
4551 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4552 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4553 		uint16_t queue_id, const uint16_t num_to_process,
4554 		uint16_t burst_sz, struct test_time_stats *time_st)
4555 {
4556 	int i, dequeued, ret;
4557 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4558 	uint64_t enq_start_time, deq_start_time;
4559 	uint64_t enq_sw_last_time, deq_last_time;
4560 	struct rte_bbdev_stats stats;
4561 
4562 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4563 		uint16_t enq = 0, deq = 0;
4564 
4565 		if (unlikely(num_to_process - dequeued < burst_sz))
4566 			burst_sz = num_to_process - dequeued;
4567 
4568 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4569 		TEST_ASSERT_SUCCESS(ret,
4570 				"rte_bbdev_enc_op_alloc_bulk() failed");
4571 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4572 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4573 					bufs->inputs,
4574 					bufs->hard_outputs,
4575 					ref_op);
4576 
4577 		/* Start time meas for enqueue function offload latency */
4578 		enq_start_time = rte_rdtsc_precise();
4579 		do {
4580 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4581 					&ops_enq[enq], burst_sz - enq);
4582 		} while (unlikely(burst_sz != enq));
4583 
4584 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4585 
4586 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4587 		TEST_ASSERT_SUCCESS(ret,
4588 				"Failed to get stats for queue (%u) of device (%u)",
4589 				queue_id, dev_id);
4590 		enq_sw_last_time -= stats.acc_offload_cycles;
4591 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4592 				enq_sw_last_time);
4593 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4594 				enq_sw_last_time);
4595 		time_st->enq_sw_total_time += enq_sw_last_time;
4596 
4597 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4598 				stats.acc_offload_cycles);
4599 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4600 				stats.acc_offload_cycles);
4601 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4602 
4603 		/* give time for device to process ops */
4604 		rte_delay_us(WAIT_OFFLOAD_US);
4605 
4606 		/* Start time meas for dequeue function offload latency */
4607 		deq_start_time = rte_rdtsc_precise();
4608 		/* Dequeue one operation */
4609 		do {
4610 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4611 					&ops_deq[deq], enq);
4612 		} while (unlikely(deq == 0));
4613 
4614 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4615 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4616 				deq_last_time);
4617 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4618 				deq_last_time);
4619 		time_st->deq_total_time += deq_last_time;
4620 
4621 		while (burst_sz != deq)
4622 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4623 					&ops_deq[deq], burst_sz - deq);
4624 
4625 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4626 		dequeued += deq;
4627 	}
4628 
4629 	return i;
4630 }
4631 
4632 static int
4633 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4634 		struct test_buffers *bufs,
4635 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4636 		uint16_t queue_id, const uint16_t num_to_process,
4637 		uint16_t burst_sz, struct test_time_stats *time_st)
4638 {
4639 	int i, dequeued, ret;
4640 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4641 	uint64_t enq_start_time, deq_start_time;
4642 	uint64_t enq_sw_last_time, deq_last_time;
4643 	struct rte_bbdev_stats stats;
4644 
4645 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4646 		uint16_t enq = 0, deq = 0;
4647 
4648 		if (unlikely(num_to_process - dequeued < burst_sz))
4649 			burst_sz = num_to_process - dequeued;
4650 
4651 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4652 		TEST_ASSERT_SUCCESS(ret,
4653 				"rte_bbdev_enc_op_alloc_bulk() failed");
4654 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4655 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4656 					bufs->inputs,
4657 					bufs->hard_outputs,
4658 					ref_op);
4659 
4660 		/* Start time meas for enqueue function offload latency */
4661 		enq_start_time = rte_rdtsc_precise();
4662 		do {
4663 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4664 					&ops_enq[enq], burst_sz - enq);
4665 		} while (unlikely(burst_sz != enq));
4666 
4667 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4668 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4669 		TEST_ASSERT_SUCCESS(ret,
4670 				"Failed to get stats for queue (%u) of device (%u)",
4671 				queue_id, dev_id);
4672 
4673 		enq_sw_last_time -= stats.acc_offload_cycles;
4674 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4675 				enq_sw_last_time);
4676 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4677 				enq_sw_last_time);
4678 		time_st->enq_sw_total_time += enq_sw_last_time;
4679 
4680 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4681 				stats.acc_offload_cycles);
4682 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4683 				stats.acc_offload_cycles);
4684 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4685 
4686 		/* give time for device to process ops */
4687 		rte_delay_us(WAIT_OFFLOAD_US);
4688 
4689 		/* Start time meas for dequeue function offload latency */
4690 		deq_start_time = rte_rdtsc_precise();
4691 		/* Dequeue one operation */
4692 		do {
4693 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4694 					&ops_deq[deq], enq);
4695 		} while (unlikely(deq == 0));
4696 
4697 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4698 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4699 				deq_last_time);
4700 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4701 				deq_last_time);
4702 		time_st->deq_total_time += deq_last_time;
4703 
4704 		while (burst_sz != deq)
4705 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4706 					&ops_deq[deq], burst_sz - deq);
4707 
4708 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4709 		dequeued += deq;
4710 	}
4711 
4712 	return i;
4713 }
4714 #endif
4715 
4716 static int
4717 offload_cost_test(struct active_device *ad,
4718 		struct test_op_params *op_params)
4719 {
4720 #ifndef RTE_BBDEV_OFFLOAD_COST
4721 	RTE_SET_USED(ad);
4722 	RTE_SET_USED(op_params);
4723 	printf("Offload latency test is disabled.\n");
4724 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4725 	return TEST_SKIPPED;
4726 #else
4727 	int iter;
4728 	uint16_t burst_sz = op_params->burst_sz;
4729 	const uint16_t num_to_process = op_params->num_to_process;
4730 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4731 	const uint16_t queue_id = ad->queue_ids[0];
4732 	struct test_buffers *bufs = NULL;
4733 	struct rte_bbdev_info info;
4734 	const char *op_type_str;
4735 	struct test_time_stats time_st;
4736 
4737 	memset(&time_st, 0, sizeof(struct test_time_stats));
4738 	time_st.enq_sw_min_time = UINT64_MAX;
4739 	time_st.enq_acc_min_time = UINT64_MAX;
4740 	time_st.deq_min_time = UINT64_MAX;
4741 
4742 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4743 			"BURST_SIZE should be <= %u", MAX_BURST);
4744 
4745 	rte_bbdev_info_get(ad->dev_id, &info);
4746 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4747 
4748 	op_type_str = rte_bbdev_op_type_str(op_type);
4749 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4750 
4751 	printf("+ ------------------------------------------------------- +\n");
4752 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4753 			info.dev_name, burst_sz, num_to_process, op_type_str);
4754 
4755 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4756 		iter = offload_latency_test_dec(op_params->mp, bufs,
4757 				op_params->ref_dec_op, ad->dev_id, queue_id,
4758 				num_to_process, burst_sz, &time_st);
4759 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4760 		iter = offload_latency_test_enc(op_params->mp, bufs,
4761 				op_params->ref_enc_op, ad->dev_id, queue_id,
4762 				num_to_process, burst_sz, &time_st);
4763 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4764 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4765 				op_params->ref_enc_op, ad->dev_id, queue_id,
4766 				num_to_process, burst_sz, &time_st);
4767 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4768 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4769 			op_params->ref_dec_op, ad->dev_id, queue_id,
4770 			num_to_process, burst_sz, &time_st);
4771 	else
4772 		iter = offload_latency_test_enc(op_params->mp, bufs,
4773 				op_params->ref_enc_op, ad->dev_id, queue_id,
4774 				num_to_process, burst_sz, &time_st);
4775 
4776 	if (iter <= 0)
4777 		return TEST_FAILED;
4778 
4779 	printf("Enqueue driver offload cost latency:\n"
4780 			"\tavg: %lg cycles, %lg us\n"
4781 			"\tmin: %lg cycles, %lg us\n"
4782 			"\tmax: %lg cycles, %lg us\n"
4783 			"Enqueue accelerator offload cost latency:\n"
4784 			"\tavg: %lg cycles, %lg us\n"
4785 			"\tmin: %lg cycles, %lg us\n"
4786 			"\tmax: %lg cycles, %lg us\n",
4787 			(double)time_st.enq_sw_total_time / (double)iter,
4788 			(double)(time_st.enq_sw_total_time * 1000000) /
4789 			(double)iter / (double)rte_get_tsc_hz(),
4790 			(double)time_st.enq_sw_min_time,
4791 			(double)(time_st.enq_sw_min_time * 1000000) /
4792 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4793 			(double)(time_st.enq_sw_max_time * 1000000) /
4794 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4795 			(double)iter,
4796 			(double)(time_st.enq_acc_total_time * 1000000) /
4797 			(double)iter / (double)rte_get_tsc_hz(),
4798 			(double)time_st.enq_acc_min_time,
4799 			(double)(time_st.enq_acc_min_time * 1000000) /
4800 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4801 			(double)(time_st.enq_acc_max_time * 1000000) /
4802 			rte_get_tsc_hz());
4803 
4804 	printf("Dequeue offload cost latency - one op:\n"
4805 			"\tavg: %lg cycles, %lg us\n"
4806 			"\tmin: %lg cycles, %lg us\n"
4807 			"\tmax: %lg cycles, %lg us\n",
4808 			(double)time_st.deq_total_time / (double)iter,
4809 			(double)(time_st.deq_total_time * 1000000) /
4810 			(double)iter / (double)rte_get_tsc_hz(),
4811 			(double)time_st.deq_min_time,
4812 			(double)(time_st.deq_min_time * 1000000) /
4813 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4814 			(double)(time_st.deq_max_time * 1000000) /
4815 			rte_get_tsc_hz());
4816 
4817 	struct rte_bbdev_stats stats = {0};
4818 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4819 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4820 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4821 				"Mismatch in enqueue count %10"PRIu64" %d",
4822 				stats.enqueued_count, num_to_process);
4823 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4824 				"Mismatch in dequeue count %10"PRIu64" %d",
4825 				stats.dequeued_count, num_to_process);
4826 	}
4827 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4828 			"Enqueue count Error %10"PRIu64"",
4829 			stats.enqueue_err_count);
4830 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4831 			"Dequeue count Error (%10"PRIu64"",
4832 			stats.dequeue_err_count);
4833 
4834 	return TEST_SUCCESS;
4835 #endif
4836 }
4837 
4838 #ifdef RTE_BBDEV_OFFLOAD_COST
4839 static int
4840 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4841 		const uint16_t num_to_process, uint16_t burst_sz,
4842 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4843 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4844 {
4845 	int i, deq_total;
4846 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4847 	uint64_t deq_start_time, deq_last_time;
4848 
4849 	/* Test deq offload latency from an empty queue */
4850 
4851 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4852 			++i, deq_total += burst_sz) {
4853 		deq_start_time = rte_rdtsc_precise();
4854 
4855 		if (unlikely(num_to_process - deq_total < burst_sz))
4856 			burst_sz = num_to_process - deq_total;
4857 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4858 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4859 					burst_sz);
4860 		else
4861 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4862 					burst_sz);
4863 
4864 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4865 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4866 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4867 		*deq_total_time += deq_last_time;
4868 	}
4869 
4870 	return i;
4871 }
4872 
4873 static int
4874 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4875 		const uint16_t num_to_process, uint16_t burst_sz,
4876 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4877 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4878 {
4879 	int i, deq_total;
4880 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4881 	uint64_t deq_start_time, deq_last_time;
4882 
4883 	/* Test deq offload latency from an empty queue */
4884 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4885 			++i, deq_total += burst_sz) {
4886 		deq_start_time = rte_rdtsc_precise();
4887 
4888 		if (unlikely(num_to_process - deq_total < burst_sz))
4889 			burst_sz = num_to_process - deq_total;
4890 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4891 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4892 					burst_sz);
4893 		else
4894 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4895 					burst_sz);
4896 
4897 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4898 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4899 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4900 		*deq_total_time += deq_last_time;
4901 	}
4902 
4903 	return i;
4904 }
4905 
4906 #endif
4907 
4908 static int
4909 offload_latency_empty_q_test(struct active_device *ad,
4910 		struct test_op_params *op_params)
4911 {
4912 #ifndef RTE_BBDEV_OFFLOAD_COST
4913 	RTE_SET_USED(ad);
4914 	RTE_SET_USED(op_params);
4915 	printf("Offload latency empty dequeue test is disabled.\n");
4916 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4917 	return TEST_SKIPPED;
4918 #else
4919 	int iter;
4920 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4921 	uint16_t burst_sz = op_params->burst_sz;
4922 	const uint16_t num_to_process = op_params->num_to_process;
4923 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4924 	const uint16_t queue_id = ad->queue_ids[0];
4925 	struct rte_bbdev_info info;
4926 	const char *op_type_str;
4927 
4928 	deq_total_time = deq_max_time = 0;
4929 	deq_min_time = UINT64_MAX;
4930 
4931 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4932 			"BURST_SIZE should be <= %u", MAX_BURST);
4933 
4934 	rte_bbdev_info_get(ad->dev_id, &info);
4935 
4936 	op_type_str = rte_bbdev_op_type_str(op_type);
4937 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4938 
4939 	printf("+ ------------------------------------------------------- +\n");
4940 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4941 			info.dev_name, burst_sz, num_to_process, op_type_str);
4942 
4943 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4944 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4945 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4946 				num_to_process, burst_sz, &deq_total_time,
4947 				&deq_min_time, &deq_max_time, op_type);
4948 	else
4949 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4950 				num_to_process, burst_sz, &deq_total_time,
4951 				&deq_min_time, &deq_max_time, op_type);
4952 
4953 	if (iter <= 0)
4954 		return TEST_FAILED;
4955 
4956 	printf("Empty dequeue offload:\n"
4957 			"\tavg: %lg cycles, %lg us\n"
4958 			"\tmin: %lg cycles, %lg us\n"
4959 			"\tmax: %lg cycles, %lg us\n",
4960 			(double)deq_total_time / (double)iter,
4961 			(double)(deq_total_time * 1000000) / (double)iter /
4962 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4963 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4964 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4965 			rte_get_tsc_hz());
4966 
4967 	return TEST_SUCCESS;
4968 #endif
4969 }
4970 
4971 static int
4972 bler_tc(void)
4973 {
4974 	return run_test_case(bler_test);
4975 }
4976 
4977 static int
4978 throughput_tc(void)
4979 {
4980 	return run_test_case(throughput_test);
4981 }
4982 
4983 static int
4984 offload_cost_tc(void)
4985 {
4986 	return run_test_case(offload_cost_test);
4987 }
4988 
4989 static int
4990 offload_latency_empty_q_tc(void)
4991 {
4992 	return run_test_case(offload_latency_empty_q_test);
4993 }
4994 
4995 static int
4996 latency_tc(void)
4997 {
4998 	return run_test_case(latency_test);
4999 }
5000 
5001 static int
5002 validation_tc(void)
5003 {
5004 	return run_test_case(validation_test);
5005 }
5006 
5007 static int
5008 interrupt_tc(void)
5009 {
5010 	return run_test_case(throughput_test);
5011 }
5012 
5013 static struct unit_test_suite bbdev_bler_testsuite = {
5014 	.suite_name = "BBdev BLER Tests",
5015 	.setup = testsuite_setup,
5016 	.teardown = testsuite_teardown,
5017 	.unit_test_cases = {
5018 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5019 		TEST_CASES_END() /**< NULL terminate unit test array */
5020 	}
5021 };
5022 
5023 static struct unit_test_suite bbdev_throughput_testsuite = {
5024 	.suite_name = "BBdev Throughput Tests",
5025 	.setup = testsuite_setup,
5026 	.teardown = testsuite_teardown,
5027 	.unit_test_cases = {
5028 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5029 		TEST_CASES_END() /**< NULL terminate unit test array */
5030 	}
5031 };
5032 
5033 static struct unit_test_suite bbdev_validation_testsuite = {
5034 	.suite_name = "BBdev Validation Tests",
5035 	.setup = testsuite_setup,
5036 	.teardown = testsuite_teardown,
5037 	.unit_test_cases = {
5038 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5039 		TEST_CASES_END() /**< NULL terminate unit test array */
5040 	}
5041 };
5042 
5043 static struct unit_test_suite bbdev_latency_testsuite = {
5044 	.suite_name = "BBdev Latency Tests",
5045 	.setup = testsuite_setup,
5046 	.teardown = testsuite_teardown,
5047 	.unit_test_cases = {
5048 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5049 		TEST_CASES_END() /**< NULL terminate unit test array */
5050 	}
5051 };
5052 
5053 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5054 	.suite_name = "BBdev Offload Cost Tests",
5055 	.setup = testsuite_setup,
5056 	.teardown = testsuite_teardown,
5057 	.unit_test_cases = {
5058 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5059 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5060 		TEST_CASES_END() /**< NULL terminate unit test array */
5061 	}
5062 };
5063 
5064 static struct unit_test_suite bbdev_interrupt_testsuite = {
5065 	.suite_name = "BBdev Interrupt Tests",
5066 	.setup = interrupt_testsuite_setup,
5067 	.teardown = testsuite_teardown,
5068 	.unit_test_cases = {
5069 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5070 		TEST_CASES_END() /**< NULL terminate unit test array */
5071 	}
5072 };
5073 
5074 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5075 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5076 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5077 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5078 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5079 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5080