xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 72206323a5dd3182b13f61b25a64abdddfee595c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 100
28 #define WAIT_OFFLOAD_US 1000
29 
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42 
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #endif
54 
55 #ifdef RTE_BASEBAND_ACC100
56 #include <rte_acc100_cfg.h>
57 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
58 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
59 #define ACC100_QMGR_NUM_AQS 16
60 #define ACC100_QMGR_NUM_QGS 2
61 #define ACC100_QMGR_AQ_DEPTH 5
62 #define ACC100_QMGR_INVALID_IDX -1
63 #define ACC100_QMGR_RR 1
64 #define ACC100_QOS_GBR 0
65 #endif
66 
67 #define OPS_CACHE_SIZE 256U
68 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
69 
70 #define SYNC_WAIT 0
71 #define SYNC_START 1
72 #define INVALID_OPAQUE -1
73 
74 #define INVALID_QUEUE_ID -1
75 /* Increment for next code block in external HARQ memory */
76 #define HARQ_INCR 32768
77 /* Headroom for filler LLRs insertion in HARQ buffer */
78 #define FILLER_HEADROOM 1024
79 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
80 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
81 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
82 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
83 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
84 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
85 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
86 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
87 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
88 
89 static struct test_bbdev_vector test_vector;
90 
91 /* Switch between PMD and Interrupt for throughput TC */
92 static bool intr_enabled;
93 
94 /* LLR arithmetic representation for numerical conversion */
95 static int ldpc_llr_decimals;
96 static int ldpc_llr_size;
97 /* Keep track of the LDPC decoder device capability flag */
98 static uint32_t ldpc_cap_flags;
99 
100 /* Represents tested active devices */
101 static struct active_device {
102 	const char *driver_name;
103 	uint8_t dev_id;
104 	uint16_t supported_ops;
105 	uint16_t queue_ids[MAX_QUEUES];
106 	uint16_t nb_queues;
107 	struct rte_mempool *ops_mempool;
108 	struct rte_mempool *in_mbuf_pool;
109 	struct rte_mempool *hard_out_mbuf_pool;
110 	struct rte_mempool *soft_out_mbuf_pool;
111 	struct rte_mempool *harq_in_mbuf_pool;
112 	struct rte_mempool *harq_out_mbuf_pool;
113 } active_devs[RTE_BBDEV_MAX_DEVS];
114 
115 static uint8_t nb_active_devs;
116 
117 /* Data buffers used by BBDEV ops */
118 struct test_buffers {
119 	struct rte_bbdev_op_data *inputs;
120 	struct rte_bbdev_op_data *hard_outputs;
121 	struct rte_bbdev_op_data *soft_outputs;
122 	struct rte_bbdev_op_data *harq_inputs;
123 	struct rte_bbdev_op_data *harq_outputs;
124 };
125 
126 /* Operation parameters specific for given test case */
127 struct test_op_params {
128 	struct rte_mempool *mp;
129 	struct rte_bbdev_dec_op *ref_dec_op;
130 	struct rte_bbdev_enc_op *ref_enc_op;
131 	uint16_t burst_sz;
132 	uint16_t num_to_process;
133 	uint16_t num_lcores;
134 	int vector_mask;
135 	uint16_t sync;
136 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
137 };
138 
139 /* Contains per lcore params */
140 struct thread_params {
141 	uint8_t dev_id;
142 	uint16_t queue_id;
143 	uint32_t lcore_id;
144 	uint64_t start_time;
145 	double ops_per_sec;
146 	double mbps;
147 	uint8_t iter_count;
148 	double iter_average;
149 	double bler;
150 	uint16_t nb_dequeued;
151 	int16_t processing_status;
152 	uint16_t burst_sz;
153 	struct test_op_params *op_params;
154 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
155 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
156 };
157 
158 #ifdef RTE_BBDEV_OFFLOAD_COST
159 /* Stores time statistics */
160 struct test_time_stats {
161 	/* Stores software enqueue total working time */
162 	uint64_t enq_sw_total_time;
163 	/* Stores minimum value of software enqueue working time */
164 	uint64_t enq_sw_min_time;
165 	/* Stores maximum value of software enqueue working time */
166 	uint64_t enq_sw_max_time;
167 	/* Stores turbo enqueue total working time */
168 	uint64_t enq_acc_total_time;
169 	/* Stores minimum value of accelerator enqueue working time */
170 	uint64_t enq_acc_min_time;
171 	/* Stores maximum value of accelerator enqueue working time */
172 	uint64_t enq_acc_max_time;
173 	/* Stores dequeue total working time */
174 	uint64_t deq_total_time;
175 	/* Stores minimum value of dequeue working time */
176 	uint64_t deq_min_time;
177 	/* Stores maximum value of dequeue working time */
178 	uint64_t deq_max_time;
179 };
180 #endif
181 
182 typedef int (test_case_function)(struct active_device *ad,
183 		struct test_op_params *op_params);
184 
185 static inline void
186 mbuf_reset(struct rte_mbuf *m)
187 {
188 	m->pkt_len = 0;
189 
190 	do {
191 		m->data_len = 0;
192 		m = m->next;
193 	} while (m != NULL);
194 }
195 
196 /* Read flag value 0/1 from bitmap */
197 static inline bool
198 check_bit(uint32_t bitmap, uint32_t bitmask)
199 {
200 	return bitmap & bitmask;
201 }
202 
203 static inline void
204 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
205 {
206 	ad->supported_ops |= (1 << op_type);
207 }
208 
209 static inline bool
210 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
211 {
212 	return ad->supported_ops & (1 << op_type);
213 }
214 
215 static inline bool
216 flags_match(uint32_t flags_req, uint32_t flags_present)
217 {
218 	return (flags_req & flags_present) == flags_req;
219 }
220 
221 static void
222 clear_soft_out_cap(uint32_t *op_flags)
223 {
224 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
225 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
227 }
228 
229 /* This API is to convert all the test vector op data entries
230  * to big endian format. It is used when the device supports
231  * the input in the big endian format.
232  */
233 static inline void
234 convert_op_data_to_be(void)
235 {
236 	struct op_data_entries *op;
237 	enum op_data_type type;
238 	uint8_t nb_segs, *rem_data, temp;
239 	uint32_t *data, len;
240 	int complete, rem, i, j;
241 
242 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
243 		nb_segs = test_vector.entries[type].nb_segments;
244 		op = &test_vector.entries[type];
245 
246 		/* Invert byte endianness for all the segments */
247 		for (i = 0; i < nb_segs; ++i) {
248 			len = op->segments[i].length;
249 			data = op->segments[i].addr;
250 
251 			/* Swap complete u32 bytes */
252 			complete = len / 4;
253 			for (j = 0; j < complete; j++)
254 				data[j] = rte_bswap32(data[j]);
255 
256 			/* Swap any remaining bytes */
257 			rem = len % 4;
258 			rem_data = (uint8_t *)&data[j];
259 			for (j = 0; j < rem/2; j++) {
260 				temp = rem_data[j];
261 				rem_data[j] = rem_data[rem - j - 1];
262 				rem_data[rem - j - 1] = temp;
263 			}
264 		}
265 	}
266 }
267 
268 static int
269 check_dev_cap(const struct rte_bbdev_info *dev_info)
270 {
271 	unsigned int i;
272 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
273 		nb_harq_inputs, nb_harq_outputs;
274 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
275 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
276 
277 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
278 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
279 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
280 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
281 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
282 
283 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
284 		if (op_cap->type != test_vector.op_type)
285 			continue;
286 
287 		if (dev_data_endianness == RTE_BIG_ENDIAN)
288 			convert_op_data_to_be();
289 
290 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
291 			const struct rte_bbdev_op_cap_turbo_dec *cap =
292 					&op_cap->cap.turbo_dec;
293 			/* Ignore lack of soft output capability, just skip
294 			 * checking if soft output is valid.
295 			 */
296 			if ((test_vector.turbo_dec.op_flags &
297 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
298 					!(cap->capability_flags &
299 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
300 				printf(
301 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
302 					dev_info->dev_name);
303 				clear_soft_out_cap(
304 					&test_vector.turbo_dec.op_flags);
305 			}
306 
307 			if (!flags_match(test_vector.turbo_dec.op_flags,
308 					cap->capability_flags))
309 				return TEST_FAILED;
310 			if (nb_inputs > cap->num_buffers_src) {
311 				printf("Too many inputs defined: %u, max: %u\n",
312 					nb_inputs, cap->num_buffers_src);
313 				return TEST_FAILED;
314 			}
315 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
316 					(test_vector.turbo_dec.op_flags &
317 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
318 				printf(
319 					"Too many soft outputs defined: %u, max: %u\n",
320 						nb_soft_outputs,
321 						cap->num_buffers_soft_out);
322 				return TEST_FAILED;
323 			}
324 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
325 				printf(
326 					"Too many hard outputs defined: %u, max: %u\n",
327 						nb_hard_outputs,
328 						cap->num_buffers_hard_out);
329 				return TEST_FAILED;
330 			}
331 			if (intr_enabled && !(cap->capability_flags &
332 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
333 				printf(
334 					"Dequeue interrupts are not supported!\n");
335 				return TEST_FAILED;
336 			}
337 
338 			return TEST_SUCCESS;
339 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
340 			const struct rte_bbdev_op_cap_turbo_enc *cap =
341 					&op_cap->cap.turbo_enc;
342 
343 			if (!flags_match(test_vector.turbo_enc.op_flags,
344 					cap->capability_flags))
345 				return TEST_FAILED;
346 			if (nb_inputs > cap->num_buffers_src) {
347 				printf("Too many inputs defined: %u, max: %u\n",
348 					nb_inputs, cap->num_buffers_src);
349 				return TEST_FAILED;
350 			}
351 			if (nb_hard_outputs > cap->num_buffers_dst) {
352 				printf(
353 					"Too many hard outputs defined: %u, max: %u\n",
354 					nb_hard_outputs, cap->num_buffers_dst);
355 				return TEST_FAILED;
356 			}
357 			if (intr_enabled && !(cap->capability_flags &
358 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
359 				printf(
360 					"Dequeue interrupts are not supported!\n");
361 				return TEST_FAILED;
362 			}
363 
364 			return TEST_SUCCESS;
365 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
366 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
367 					&op_cap->cap.ldpc_enc;
368 
369 			if (!flags_match(test_vector.ldpc_enc.op_flags,
370 					cap->capability_flags)){
371 				printf("Flag Mismatch\n");
372 				return TEST_FAILED;
373 			}
374 			if (nb_inputs > cap->num_buffers_src) {
375 				printf("Too many inputs defined: %u, max: %u\n",
376 					nb_inputs, cap->num_buffers_src);
377 				return TEST_FAILED;
378 			}
379 			if (nb_hard_outputs > cap->num_buffers_dst) {
380 				printf(
381 					"Too many hard outputs defined: %u, max: %u\n",
382 					nb_hard_outputs, cap->num_buffers_dst);
383 				return TEST_FAILED;
384 			}
385 			if (intr_enabled && !(cap->capability_flags &
386 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
387 				printf(
388 					"Dequeue interrupts are not supported!\n");
389 				return TEST_FAILED;
390 			}
391 
392 			return TEST_SUCCESS;
393 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
394 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
395 					&op_cap->cap.ldpc_dec;
396 
397 			if (!flags_match(test_vector.ldpc_dec.op_flags,
398 					cap->capability_flags)){
399 				printf("Flag Mismatch\n");
400 				return TEST_FAILED;
401 			}
402 			if (nb_inputs > cap->num_buffers_src) {
403 				printf("Too many inputs defined: %u, max: %u\n",
404 					nb_inputs, cap->num_buffers_src);
405 				return TEST_FAILED;
406 			}
407 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
408 				printf(
409 					"Too many hard outputs defined: %u, max: %u\n",
410 					nb_hard_outputs,
411 					cap->num_buffers_hard_out);
412 				return TEST_FAILED;
413 			}
414 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
415 				printf(
416 					"Too many HARQ inputs defined: %u, max: %u\n",
417 					nb_harq_inputs,
418 					cap->num_buffers_hard_out);
419 				return TEST_FAILED;
420 			}
421 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
422 				printf(
423 					"Too many HARQ outputs defined: %u, max: %u\n",
424 					nb_harq_outputs,
425 					cap->num_buffers_hard_out);
426 				return TEST_FAILED;
427 			}
428 			if (intr_enabled && !(cap->capability_flags &
429 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
430 				printf(
431 					"Dequeue interrupts are not supported!\n");
432 				return TEST_FAILED;
433 			}
434 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
435 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
436 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
437 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
438 					))) {
439 				printf("Skip loop-back with interrupt\n");
440 				return TEST_FAILED;
441 			}
442 			return TEST_SUCCESS;
443 		}
444 	}
445 
446 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
447 		return TEST_SUCCESS; /* Special case for NULL device */
448 
449 	return TEST_FAILED;
450 }
451 
452 /* calculates optimal mempool size not smaller than the val */
453 static unsigned int
454 optimal_mempool_size(unsigned int val)
455 {
456 	return rte_align32pow2(val + 1) - 1;
457 }
458 
459 /* allocates mbuf mempool for inputs and outputs */
460 static struct rte_mempool *
461 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
462 		int socket_id, unsigned int mbuf_pool_size,
463 		const char *op_type_str)
464 {
465 	unsigned int i;
466 	uint32_t max_seg_sz = 0;
467 	char pool_name[RTE_MEMPOOL_NAMESIZE];
468 
469 	/* find max input segment size */
470 	for (i = 0; i < entries->nb_segments; ++i)
471 		if (entries->segments[i].length > max_seg_sz)
472 			max_seg_sz = entries->segments[i].length;
473 
474 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
475 			dev_id);
476 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
477 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
478 					+ FILLER_HEADROOM,
479 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
480 }
481 
482 static int
483 create_mempools(struct active_device *ad, int socket_id,
484 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
485 {
486 	struct rte_mempool *mp;
487 	unsigned int ops_pool_size, mbuf_pool_size = 0;
488 	char pool_name[RTE_MEMPOOL_NAMESIZE];
489 	const char *op_type_str;
490 	enum rte_bbdev_op_type op_type = org_op_type;
491 
492 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
493 	struct op_data_entries *hard_out =
494 			&test_vector.entries[DATA_HARD_OUTPUT];
495 	struct op_data_entries *soft_out =
496 			&test_vector.entries[DATA_SOFT_OUTPUT];
497 	struct op_data_entries *harq_in =
498 			&test_vector.entries[DATA_HARQ_INPUT];
499 	struct op_data_entries *harq_out =
500 			&test_vector.entries[DATA_HARQ_OUTPUT];
501 
502 	/* allocate ops mempool */
503 	ops_pool_size = optimal_mempool_size(RTE_MAX(
504 			/* Ops used plus 1 reference op */
505 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
506 			/* Minimal cache size plus 1 reference op */
507 			(unsigned int)(1.5 * rte_lcore_count() *
508 					OPS_CACHE_SIZE + 1)),
509 			OPS_POOL_SIZE_MIN));
510 
511 	if (org_op_type == RTE_BBDEV_OP_NONE)
512 		op_type = RTE_BBDEV_OP_TURBO_ENC;
513 
514 	op_type_str = rte_bbdev_op_type_str(op_type);
515 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
516 
517 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
518 			ad->dev_id);
519 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
520 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
521 	TEST_ASSERT_NOT_NULL(mp,
522 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
523 			ops_pool_size,
524 			ad->dev_id,
525 			socket_id);
526 	ad->ops_mempool = mp;
527 
528 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
529 	if (org_op_type == RTE_BBDEV_OP_NONE)
530 		return TEST_SUCCESS;
531 
532 	/* Inputs */
533 	if (in->nb_segments > 0) {
534 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
535 				in->nb_segments);
536 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
537 				mbuf_pool_size, "in");
538 		TEST_ASSERT_NOT_NULL(mp,
539 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
540 				mbuf_pool_size,
541 				ad->dev_id,
542 				socket_id);
543 		ad->in_mbuf_pool = mp;
544 	}
545 
546 	/* Hard outputs */
547 	if (hard_out->nb_segments > 0) {
548 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
549 				hard_out->nb_segments);
550 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
551 				mbuf_pool_size,
552 				"hard_out");
553 		TEST_ASSERT_NOT_NULL(mp,
554 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
555 				mbuf_pool_size,
556 				ad->dev_id,
557 				socket_id);
558 		ad->hard_out_mbuf_pool = mp;
559 	}
560 
561 	/* Soft outputs */
562 	if (soft_out->nb_segments > 0) {
563 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
564 				soft_out->nb_segments);
565 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
566 				mbuf_pool_size,
567 				"soft_out");
568 		TEST_ASSERT_NOT_NULL(mp,
569 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
570 				mbuf_pool_size,
571 				ad->dev_id,
572 				socket_id);
573 		ad->soft_out_mbuf_pool = mp;
574 	}
575 
576 	/* HARQ inputs */
577 	if (harq_in->nb_segments > 0) {
578 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
579 				harq_in->nb_segments);
580 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
581 				mbuf_pool_size,
582 				"harq_in");
583 		TEST_ASSERT_NOT_NULL(mp,
584 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
585 				mbuf_pool_size,
586 				ad->dev_id,
587 				socket_id);
588 		ad->harq_in_mbuf_pool = mp;
589 	}
590 
591 	/* HARQ outputs */
592 	if (harq_out->nb_segments > 0) {
593 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
594 				harq_out->nb_segments);
595 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
596 				mbuf_pool_size,
597 				"harq_out");
598 		TEST_ASSERT_NOT_NULL(mp,
599 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
600 				mbuf_pool_size,
601 				ad->dev_id,
602 				socket_id);
603 		ad->harq_out_mbuf_pool = mp;
604 	}
605 
606 	return TEST_SUCCESS;
607 }
608 
609 static int
610 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
611 		struct test_bbdev_vector *vector)
612 {
613 	int ret;
614 	unsigned int queue_id;
615 	struct rte_bbdev_queue_conf qconf;
616 	struct active_device *ad = &active_devs[nb_active_devs];
617 	unsigned int nb_queues;
618 	enum rte_bbdev_op_type op_type = vector->op_type;
619 
620 /* Configure fpga lte fec with PF & VF values
621  * if '-i' flag is set and using fpga device
622  */
623 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
624 	if ((get_init_device() == true) &&
625 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
626 		struct rte_fpga_lte_fec_conf conf;
627 		unsigned int i;
628 
629 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
630 				info->drv.driver_name);
631 
632 		/* clear default configuration before initialization */
633 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
634 
635 		/* Set PF mode :
636 		 * true if PF is used for data plane
637 		 * false for VFs
638 		 */
639 		conf.pf_mode_en = true;
640 
641 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
642 			/* Number of UL queues per VF (fpga supports 8 VFs) */
643 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
644 			/* Number of DL queues per VF (fpga supports 8 VFs) */
645 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
646 		}
647 
648 		/* UL bandwidth. Needed for schedule algorithm */
649 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
650 		/* DL bandwidth */
651 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
652 
653 		/* UL & DL load Balance Factor to 64 */
654 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
655 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
656 
657 		/**< FLR timeout value */
658 		conf.flr_time_out = FLR_4G_TIMEOUT;
659 
660 		/* setup FPGA PF with configuration information */
661 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
662 		TEST_ASSERT_SUCCESS(ret,
663 				"Failed to configure 4G FPGA PF for bbdev %s",
664 				info->dev_name);
665 	}
666 #endif
667 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
668 	if ((get_init_device() == true) &&
669 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
670 		struct rte_fpga_5gnr_fec_conf conf;
671 		unsigned int i;
672 
673 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
674 				info->drv.driver_name);
675 
676 		/* clear default configuration before initialization */
677 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
678 
679 		/* Set PF mode :
680 		 * true if PF is used for data plane
681 		 * false for VFs
682 		 */
683 		conf.pf_mode_en = true;
684 
685 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
686 			/* Number of UL queues per VF (fpga supports 8 VFs) */
687 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
688 			/* Number of DL queues per VF (fpga supports 8 VFs) */
689 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
690 		}
691 
692 		/* UL bandwidth. Needed for schedule algorithm */
693 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
694 		/* DL bandwidth */
695 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
696 
697 		/* UL & DL load Balance Factor to 64 */
698 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
699 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
700 
701 		/* setup FPGA PF with configuration information */
702 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
703 		TEST_ASSERT_SUCCESS(ret,
704 				"Failed to configure 5G FPGA PF for bbdev %s",
705 				info->dev_name);
706 	}
707 #endif
708 #ifdef RTE_BASEBAND_ACC100
709 	if ((get_init_device() == true) &&
710 			(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
711 		struct rte_acc100_conf conf;
712 		unsigned int i;
713 
714 		printf("Configure ACC100/ACC101 FEC Driver %s with default values\n",
715 				info->drv.driver_name);
716 
717 		/* clear default configuration before initialization */
718 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
719 
720 		/* Always set in PF mode for built-in configuration */
721 		conf.pf_mode_en = true;
722 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
723 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
724 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
725 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
726 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
727 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
728 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
729 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
730 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
731 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
732 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
733 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
734 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
735 		}
736 
737 		conf.input_pos_llr_1_bit = true;
738 		conf.output_pos_llr_1_bit = true;
739 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
740 
741 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
742 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
743 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
744 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
745 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
746 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
747 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
748 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
749 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
750 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
751 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
752 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
753 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
754 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
755 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
756 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
757 
758 		/* setup PF with configuration information */
759 		ret = rte_acc10x_configure(info->dev_name, &conf);
760 		TEST_ASSERT_SUCCESS(ret,
761 				"Failed to configure ACC100 PF for bbdev %s",
762 				info->dev_name);
763 	}
764 #endif
765 	/* Let's refresh this now this is configured */
766 	rte_bbdev_info_get(dev_id, info);
767 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
768 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
769 
770 	/* setup device */
771 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
772 	if (ret < 0) {
773 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
774 				dev_id, nb_queues, info->socket_id, ret);
775 		return TEST_FAILED;
776 	}
777 
778 	/* configure interrupts if needed */
779 	if (intr_enabled) {
780 		ret = rte_bbdev_intr_enable(dev_id);
781 		if (ret < 0) {
782 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
783 					ret);
784 			return TEST_FAILED;
785 		}
786 	}
787 
788 	/* setup device queues */
789 	qconf.socket = info->socket_id;
790 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
791 	qconf.priority = 0;
792 	qconf.deferred_start = 0;
793 	qconf.op_type = op_type;
794 
795 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
796 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
797 		if (ret != 0) {
798 			printf(
799 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
800 					queue_id, qconf.priority, dev_id);
801 			qconf.priority++;
802 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
803 					&qconf);
804 		}
805 		if (ret != 0) {
806 			printf("All queues on dev %u allocated: %u\n",
807 					dev_id, queue_id);
808 			break;
809 		}
810 		ad->queue_ids[queue_id] = queue_id;
811 	}
812 	TEST_ASSERT(queue_id != 0,
813 			"ERROR Failed to configure any queues on dev %u",
814 			dev_id);
815 	ad->nb_queues = queue_id;
816 
817 	set_avail_op(ad, op_type);
818 
819 	return TEST_SUCCESS;
820 }
821 
822 static int
823 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
824 		struct test_bbdev_vector *vector)
825 {
826 	int ret;
827 
828 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
829 	active_devs[nb_active_devs].dev_id = dev_id;
830 
831 	ret = add_bbdev_dev(dev_id, info, vector);
832 	if (ret == TEST_SUCCESS)
833 		++nb_active_devs;
834 	return ret;
835 }
836 
837 static uint8_t
838 populate_active_devices(void)
839 {
840 	int ret;
841 	uint8_t dev_id;
842 	uint8_t nb_devs_added = 0;
843 	struct rte_bbdev_info info;
844 
845 	RTE_BBDEV_FOREACH(dev_id) {
846 		rte_bbdev_info_get(dev_id, &info);
847 
848 		if (check_dev_cap(&info)) {
849 			printf(
850 				"Device %d (%s) does not support specified capabilities\n",
851 					dev_id, info.dev_name);
852 			continue;
853 		}
854 
855 		ret = add_active_device(dev_id, &info, &test_vector);
856 		if (ret != 0) {
857 			printf("Adding active bbdev %s skipped\n",
858 					info.dev_name);
859 			continue;
860 		}
861 		nb_devs_added++;
862 	}
863 
864 	return nb_devs_added;
865 }
866 
867 static int
868 read_test_vector(void)
869 {
870 	int ret;
871 
872 	memset(&test_vector, 0, sizeof(test_vector));
873 	printf("Test vector file = %s\n", get_vector_filename());
874 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
875 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
876 			get_vector_filename());
877 
878 	return TEST_SUCCESS;
879 }
880 
881 static int
882 testsuite_setup(void)
883 {
884 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
885 
886 	if (populate_active_devices() == 0) {
887 		printf("No suitable devices found!\n");
888 		return TEST_SKIPPED;
889 	}
890 
891 	return TEST_SUCCESS;
892 }
893 
894 static int
895 interrupt_testsuite_setup(void)
896 {
897 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
898 
899 	/* Enable interrupts */
900 	intr_enabled = true;
901 
902 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
903 	if (populate_active_devices() == 0 ||
904 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
905 		intr_enabled = false;
906 		printf("No suitable devices found!\n");
907 		return TEST_SKIPPED;
908 	}
909 
910 	return TEST_SUCCESS;
911 }
912 
913 static void
914 testsuite_teardown(void)
915 {
916 	uint8_t dev_id;
917 
918 	/* Unconfigure devices */
919 	RTE_BBDEV_FOREACH(dev_id)
920 		rte_bbdev_close(dev_id);
921 
922 	/* Clear active devices structs. */
923 	memset(active_devs, 0, sizeof(active_devs));
924 	nb_active_devs = 0;
925 
926 	/* Disable interrupts */
927 	intr_enabled = false;
928 }
929 
930 static int
931 ut_setup(void)
932 {
933 	uint8_t i, dev_id;
934 
935 	for (i = 0; i < nb_active_devs; i++) {
936 		dev_id = active_devs[i].dev_id;
937 		/* reset bbdev stats */
938 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
939 				"Failed to reset stats of bbdev %u", dev_id);
940 		/* start the device */
941 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
942 				"Failed to start bbdev %u", dev_id);
943 	}
944 
945 	return TEST_SUCCESS;
946 }
947 
948 static void
949 ut_teardown(void)
950 {
951 	uint8_t i, dev_id;
952 	struct rte_bbdev_stats stats;
953 
954 	for (i = 0; i < nb_active_devs; i++) {
955 		dev_id = active_devs[i].dev_id;
956 		/* read stats and print */
957 		rte_bbdev_stats_get(dev_id, &stats);
958 		/* Stop the device */
959 		rte_bbdev_stop(dev_id);
960 	}
961 }
962 
963 static int
964 init_op_data_objs(struct rte_bbdev_op_data *bufs,
965 		struct op_data_entries *ref_entries,
966 		struct rte_mempool *mbuf_pool, const uint16_t n,
967 		enum op_data_type op_type, uint16_t min_alignment)
968 {
969 	int ret;
970 	unsigned int i, j;
971 	bool large_input = false;
972 
973 	for (i = 0; i < n; ++i) {
974 		char *data;
975 		struct op_data_buf *seg = &ref_entries->segments[0];
976 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
977 		TEST_ASSERT_NOT_NULL(m_head,
978 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
979 				op_type, n * ref_entries->nb_segments,
980 				mbuf_pool->size);
981 
982 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
983 			/*
984 			 * Special case when DPDK mbuf cannot handle
985 			 * the required input size
986 			 */
987 			printf("Warning: Larger input size than DPDK mbuf %d\n",
988 					seg->length);
989 			large_input = true;
990 		}
991 		bufs[i].data = m_head;
992 		bufs[i].offset = 0;
993 		bufs[i].length = 0;
994 
995 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
996 			if ((op_type == DATA_INPUT) && large_input) {
997 				/* Allocate a fake overused mbuf */
998 				data = rte_malloc(NULL, seg->length, 0);
999 				TEST_ASSERT_NOT_NULL(data,
1000 					"rte malloc failed with %u bytes",
1001 					seg->length);
1002 				memcpy(data, seg->addr, seg->length);
1003 				m_head->buf_addr = data;
1004 				m_head->buf_iova = rte_malloc_virt2iova(data);
1005 				m_head->data_off = 0;
1006 				m_head->data_len = seg->length;
1007 			} else {
1008 				data = rte_pktmbuf_append(m_head, seg->length);
1009 				TEST_ASSERT_NOT_NULL(data,
1010 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1011 					seg->length, op_type);
1012 
1013 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1014 						data, min_alignment),
1015 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1016 					data, min_alignment);
1017 				rte_memcpy(data, seg->addr, seg->length);
1018 			}
1019 
1020 			bufs[i].length += seg->length;
1021 
1022 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1023 				struct rte_mbuf *m_tail =
1024 						rte_pktmbuf_alloc(mbuf_pool);
1025 				TEST_ASSERT_NOT_NULL(m_tail,
1026 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1027 						op_type,
1028 						n * ref_entries->nb_segments,
1029 						mbuf_pool->size);
1030 				seg += 1;
1031 
1032 				data = rte_pktmbuf_append(m_tail, seg->length);
1033 				TEST_ASSERT_NOT_NULL(data,
1034 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1035 						seg->length, op_type);
1036 
1037 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1038 						min_alignment),
1039 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1040 						data, min_alignment);
1041 				rte_memcpy(data, seg->addr, seg->length);
1042 				bufs[i].length += seg->length;
1043 
1044 				ret = rte_pktmbuf_chain(m_head, m_tail);
1045 				TEST_ASSERT_SUCCESS(ret,
1046 						"Couldn't chain mbufs from %d data type mbuf pool",
1047 						op_type);
1048 			}
1049 		} else {
1050 
1051 			/* allocate chained-mbuf for output buffer */
1052 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1053 				struct rte_mbuf *m_tail =
1054 						rte_pktmbuf_alloc(mbuf_pool);
1055 				TEST_ASSERT_NOT_NULL(m_tail,
1056 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1057 						op_type,
1058 						n * ref_entries->nb_segments,
1059 						mbuf_pool->size);
1060 
1061 				ret = rte_pktmbuf_chain(m_head, m_tail);
1062 				TEST_ASSERT_SUCCESS(ret,
1063 						"Couldn't chain mbufs from %d data type mbuf pool",
1064 						op_type);
1065 			}
1066 		}
1067 	}
1068 
1069 	return 0;
1070 }
1071 
1072 static int
1073 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1074 		const int socket)
1075 {
1076 	int i;
1077 
1078 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1079 	if (*buffers == NULL) {
1080 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1081 				socket);
1082 		/* try to allocate memory on other detected sockets */
1083 		for (i = 0; i < socket; i++) {
1084 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1085 			if (*buffers != NULL)
1086 				break;
1087 		}
1088 	}
1089 
1090 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1091 }
1092 
1093 static void
1094 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1095 		const uint16_t n, const int8_t max_llr_modulus)
1096 {
1097 	uint16_t i, byte_idx;
1098 
1099 	for (i = 0; i < n; ++i) {
1100 		struct rte_mbuf *m = input_ops[i].data;
1101 		while (m != NULL) {
1102 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1103 					input_ops[i].offset);
1104 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1105 					++byte_idx)
1106 				llr[byte_idx] = round((double)max_llr_modulus *
1107 						llr[byte_idx] / INT8_MAX);
1108 
1109 			m = m->next;
1110 		}
1111 	}
1112 }
1113 
1114 /*
1115  * We may have to insert filler bits
1116  * when they are required by the HARQ assumption
1117  */
1118 static void
1119 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1120 		const uint16_t n, struct test_op_params *op_params)
1121 {
1122 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1123 
1124 	if (input_ops == NULL)
1125 		return;
1126 	/* No need to add filler if not required by device */
1127 	if (!(ldpc_cap_flags &
1128 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1129 		return;
1130 	/* No need to add filler for loopback operation */
1131 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1132 		return;
1133 
1134 	uint16_t i, j, parity_offset;
1135 	for (i = 0; i < n; ++i) {
1136 		struct rte_mbuf *m = input_ops[i].data;
1137 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1138 				input_ops[i].offset);
1139 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1140 				* dec.z_c - dec.n_filler;
1141 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1142 		m->data_len = new_hin_size;
1143 		input_ops[i].length = new_hin_size;
1144 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1145 				j--)
1146 			llr[j] = llr[j - dec.n_filler];
1147 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1148 		for (j = 0; j < dec.n_filler; j++)
1149 			llr[parity_offset + j] = llr_max_pre_scaling;
1150 	}
1151 }
1152 
1153 static void
1154 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1155 		const uint16_t n, const int8_t llr_size,
1156 		const int8_t llr_decimals)
1157 {
1158 	if (input_ops == NULL)
1159 		return;
1160 
1161 	uint16_t i, byte_idx;
1162 
1163 	int16_t llr_max, llr_min, llr_tmp;
1164 	llr_max = (1 << (llr_size - 1)) - 1;
1165 	llr_min = -llr_max;
1166 	for (i = 0; i < n; ++i) {
1167 		struct rte_mbuf *m = input_ops[i].data;
1168 		while (m != NULL) {
1169 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1170 					input_ops[i].offset);
1171 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1172 					++byte_idx) {
1173 
1174 				llr_tmp = llr[byte_idx];
1175 				if (llr_decimals == 4)
1176 					llr_tmp *= 8;
1177 				else if (llr_decimals == 2)
1178 					llr_tmp *= 2;
1179 				else if (llr_decimals == 0)
1180 					llr_tmp /= 2;
1181 				llr_tmp = RTE_MIN(llr_max,
1182 						RTE_MAX(llr_min, llr_tmp));
1183 				llr[byte_idx] = (int8_t) llr_tmp;
1184 			}
1185 
1186 			m = m->next;
1187 		}
1188 	}
1189 }
1190 
1191 
1192 
1193 static int
1194 fill_queue_buffers(struct test_op_params *op_params,
1195 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1196 		struct rte_mempool *soft_out_mp,
1197 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1198 		uint16_t queue_id,
1199 		const struct rte_bbdev_op_cap *capabilities,
1200 		uint16_t min_alignment, const int socket_id)
1201 {
1202 	int ret;
1203 	enum op_data_type type;
1204 	const uint16_t n = op_params->num_to_process;
1205 
1206 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1207 		in_mp,
1208 		soft_out_mp,
1209 		hard_out_mp,
1210 		harq_in_mp,
1211 		harq_out_mp,
1212 	};
1213 
1214 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1215 		&op_params->q_bufs[socket_id][queue_id].inputs,
1216 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1217 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1218 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1219 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1220 	};
1221 
1222 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1223 		struct op_data_entries *ref_entries =
1224 				&test_vector.entries[type];
1225 		if (ref_entries->nb_segments == 0)
1226 			continue;
1227 
1228 		ret = allocate_buffers_on_socket(queue_ops[type],
1229 				n * sizeof(struct rte_bbdev_op_data),
1230 				socket_id);
1231 		TEST_ASSERT_SUCCESS(ret,
1232 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1233 
1234 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1235 				mbuf_pools[type], n, type, min_alignment);
1236 		TEST_ASSERT_SUCCESS(ret,
1237 				"Couldn't init rte_bbdev_op_data structs");
1238 	}
1239 
1240 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1241 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1242 			capabilities->cap.turbo_dec.max_llr_modulus);
1243 
1244 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1245 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1246 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1247 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1248 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1249 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1250 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1251 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1252 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1253 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1254 		if (!loopback && !llr_comp)
1255 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1256 					ldpc_llr_size, ldpc_llr_decimals);
1257 		if (!loopback && !harq_comp)
1258 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1259 					ldpc_llr_size, ldpc_llr_decimals);
1260 		if (!loopback)
1261 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1262 					op_params);
1263 	}
1264 
1265 	return 0;
1266 }
1267 
1268 static void
1269 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1270 {
1271 	unsigned int i, j;
1272 
1273 	rte_mempool_free(ad->ops_mempool);
1274 	rte_mempool_free(ad->in_mbuf_pool);
1275 	rte_mempool_free(ad->hard_out_mbuf_pool);
1276 	rte_mempool_free(ad->soft_out_mbuf_pool);
1277 	rte_mempool_free(ad->harq_in_mbuf_pool);
1278 	rte_mempool_free(ad->harq_out_mbuf_pool);
1279 
1280 	for (i = 0; i < rte_lcore_count(); ++i) {
1281 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1282 			rte_free(op_params->q_bufs[j][i].inputs);
1283 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1284 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1285 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1286 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1287 		}
1288 	}
1289 }
1290 
1291 static void
1292 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1293 		unsigned int start_idx,
1294 		struct rte_bbdev_op_data *inputs,
1295 		struct rte_bbdev_op_data *hard_outputs,
1296 		struct rte_bbdev_op_data *soft_outputs,
1297 		struct rte_bbdev_dec_op *ref_op)
1298 {
1299 	unsigned int i;
1300 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1301 
1302 	for (i = 0; i < n; ++i) {
1303 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1304 			ops[i]->turbo_dec.tb_params.ea =
1305 					turbo_dec->tb_params.ea;
1306 			ops[i]->turbo_dec.tb_params.eb =
1307 					turbo_dec->tb_params.eb;
1308 			ops[i]->turbo_dec.tb_params.k_pos =
1309 					turbo_dec->tb_params.k_pos;
1310 			ops[i]->turbo_dec.tb_params.k_neg =
1311 					turbo_dec->tb_params.k_neg;
1312 			ops[i]->turbo_dec.tb_params.c =
1313 					turbo_dec->tb_params.c;
1314 			ops[i]->turbo_dec.tb_params.c_neg =
1315 					turbo_dec->tb_params.c_neg;
1316 			ops[i]->turbo_dec.tb_params.cab =
1317 					turbo_dec->tb_params.cab;
1318 			ops[i]->turbo_dec.tb_params.r =
1319 					turbo_dec->tb_params.r;
1320 		} else {
1321 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1322 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1323 		}
1324 
1325 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1326 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1327 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1328 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1329 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1330 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1331 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1332 
1333 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1334 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1335 		if (soft_outputs != NULL)
1336 			ops[i]->turbo_dec.soft_output =
1337 				soft_outputs[start_idx + i];
1338 	}
1339 }
1340 
1341 static void
1342 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1343 		unsigned int start_idx,
1344 		struct rte_bbdev_op_data *inputs,
1345 		struct rte_bbdev_op_data *outputs,
1346 		struct rte_bbdev_enc_op *ref_op)
1347 {
1348 	unsigned int i;
1349 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1350 	for (i = 0; i < n; ++i) {
1351 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1352 			ops[i]->turbo_enc.tb_params.ea =
1353 					turbo_enc->tb_params.ea;
1354 			ops[i]->turbo_enc.tb_params.eb =
1355 					turbo_enc->tb_params.eb;
1356 			ops[i]->turbo_enc.tb_params.k_pos =
1357 					turbo_enc->tb_params.k_pos;
1358 			ops[i]->turbo_enc.tb_params.k_neg =
1359 					turbo_enc->tb_params.k_neg;
1360 			ops[i]->turbo_enc.tb_params.c =
1361 					turbo_enc->tb_params.c;
1362 			ops[i]->turbo_enc.tb_params.c_neg =
1363 					turbo_enc->tb_params.c_neg;
1364 			ops[i]->turbo_enc.tb_params.cab =
1365 					turbo_enc->tb_params.cab;
1366 			ops[i]->turbo_enc.tb_params.ncb_pos =
1367 					turbo_enc->tb_params.ncb_pos;
1368 			ops[i]->turbo_enc.tb_params.ncb_neg =
1369 					turbo_enc->tb_params.ncb_neg;
1370 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1371 		} else {
1372 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1373 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1374 			ops[i]->turbo_enc.cb_params.ncb =
1375 					turbo_enc->cb_params.ncb;
1376 		}
1377 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1378 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1379 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1380 
1381 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1382 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1383 	}
1384 }
1385 
1386 
1387 /* Returns a random number drawn from a normal distribution
1388  * with mean of 0 and variance of 1
1389  * Marsaglia algorithm
1390  */
1391 static double
1392 randn(int n)
1393 {
1394 	double S, Z, U1, U2, u, v, fac;
1395 
1396 	do {
1397 		U1 = (double)rand() / RAND_MAX;
1398 		U2 = (double)rand() / RAND_MAX;
1399 		u = 2. * U1 - 1.;
1400 		v = 2. * U2 - 1.;
1401 		S = u * u + v * v;
1402 	} while (S >= 1 || S == 0);
1403 	fac = sqrt(-2. * log(S) / S);
1404 	Z = (n % 2) ? u * fac : v * fac;
1405 	return Z;
1406 }
1407 
1408 static inline double
1409 maxstar(double A, double B)
1410 {
1411 	if (fabs(A - B) > 5)
1412 		return RTE_MAX(A, B);
1413 	else
1414 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1415 }
1416 
1417 /*
1418  * Generate Qm LLRS for Qm==8
1419  * Modulation, AWGN and LLR estimation from max log development
1420  */
1421 static void
1422 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1423 {
1424 	int qm = 8;
1425 	int qam = 256;
1426 	int m, k;
1427 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1428 	/* 5.1.4 of TS38.211 */
1429 	const double symbols_I[256] = {
1430 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1431 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1432 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1433 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1434 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1435 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1436 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1437 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1438 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1439 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1440 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1441 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1442 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1443 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1444 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1445 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1446 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1447 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1448 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1449 			-13, -13, -15, -15, -13, -13, -15, -15};
1450 	const double symbols_Q[256] = {
1451 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1452 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1453 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1454 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1455 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1456 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1457 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1458 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1459 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1460 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1461 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1462 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1463 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1464 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1465 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1466 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1467 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1468 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1469 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1470 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1471 	/* Average constellation point energy */
1472 	N0 *= 170.0;
1473 	for (k = 0; k < qm; k++)
1474 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1475 	/* 5.1.4 of TS38.211 */
1476 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1477 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1478 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1479 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1480 	/* AWGN channel */
1481 	I += sqrt(N0 / 2) * randn(0);
1482 	Q += sqrt(N0 / 2) * randn(1);
1483 	/*
1484 	 * Calculate the log of the probability that each of
1485 	 * the constellation points was transmitted
1486 	 */
1487 	for (m = 0; m < qam; m++)
1488 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1489 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1490 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1491 	for (k = 0; k < qm; k++) {
1492 		p0 = -999999;
1493 		p1 = -999999;
1494 		/* For each constellation point */
1495 		for (m = 0; m < qam; m++) {
1496 			if ((m >> (qm - k - 1)) & 1)
1497 				p1 = maxstar(p1, log_syml_prob[m]);
1498 			else
1499 				p0 = maxstar(p0, log_syml_prob[m]);
1500 		}
1501 		/* Calculate the LLR */
1502 		llr_ = p0 - p1;
1503 		llr_ *= (1 << ldpc_llr_decimals);
1504 		llr_ = round(llr_);
1505 		if (llr_ > llr_max)
1506 			llr_ = llr_max;
1507 		if (llr_ < -llr_max)
1508 			llr_ = -llr_max;
1509 		llrs[qm * i + k] = (int8_t) llr_;
1510 	}
1511 }
1512 
1513 
1514 /*
1515  * Generate Qm LLRS for Qm==6
1516  * Modulation, AWGN and LLR estimation from max log development
1517  */
1518 static void
1519 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1520 {
1521 	int qm = 6;
1522 	int qam = 64;
1523 	int m, k;
1524 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1525 	/* 5.1.4 of TS38.211 */
1526 	const double symbols_I[64] = {
1527 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1528 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1529 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1530 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1531 			-5, -5, -7, -7, -5, -5, -7, -7};
1532 	const double symbols_Q[64] = {
1533 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1534 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1535 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1536 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1537 			-3, -1, -3, -1, -5, -7, -5, -7};
1538 	/* Average constellation point energy */
1539 	N0 *= 42.0;
1540 	for (k = 0; k < qm; k++)
1541 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1542 	/* 5.1.4 of TS38.211 */
1543 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1544 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1545 	/* AWGN channel */
1546 	I += sqrt(N0 / 2) * randn(0);
1547 	Q += sqrt(N0 / 2) * randn(1);
1548 	/*
1549 	 * Calculate the log of the probability that each of
1550 	 * the constellation points was transmitted
1551 	 */
1552 	for (m = 0; m < qam; m++)
1553 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1554 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1555 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1556 	for (k = 0; k < qm; k++) {
1557 		p0 = -999999;
1558 		p1 = -999999;
1559 		/* For each constellation point */
1560 		for (m = 0; m < qam; m++) {
1561 			if ((m >> (qm - k - 1)) & 1)
1562 				p1 = maxstar(p1, log_syml_prob[m]);
1563 			else
1564 				p0 = maxstar(p0, log_syml_prob[m]);
1565 		}
1566 		/* Calculate the LLR */
1567 		llr_ = p0 - p1;
1568 		llr_ *= (1 << ldpc_llr_decimals);
1569 		llr_ = round(llr_);
1570 		if (llr_ > llr_max)
1571 			llr_ = llr_max;
1572 		if (llr_ < -llr_max)
1573 			llr_ = -llr_max;
1574 		llrs[qm * i + k] = (int8_t) llr_;
1575 	}
1576 }
1577 
1578 /*
1579  * Generate Qm LLRS for Qm==4
1580  * Modulation, AWGN and LLR estimation from max log development
1581  */
1582 static void
1583 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1584 {
1585 	int qm = 4;
1586 	int qam = 16;
1587 	int m, k;
1588 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1589 	/* 5.1.4 of TS38.211 */
1590 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1591 			-1, -1, -3, -3, -1, -1, -3, -3};
1592 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1593 			1, 3, 1, 3, -1, -3, -1, -3};
1594 	/* Average constellation point energy */
1595 	N0 *= 10.0;
1596 	for (k = 0; k < qm; k++)
1597 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1598 	/* 5.1.4 of TS38.211 */
1599 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1600 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1601 	/* AWGN channel */
1602 	I += sqrt(N0 / 2) * randn(0);
1603 	Q += sqrt(N0 / 2) * randn(1);
1604 	/*
1605 	 * Calculate the log of the probability that each of
1606 	 * the constellation points was transmitted
1607 	 */
1608 	for (m = 0; m < qam; m++)
1609 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1610 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1611 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1612 	for (k = 0; k < qm; k++) {
1613 		p0 = -999999;
1614 		p1 = -999999;
1615 		/* For each constellation point */
1616 		for (m = 0; m < qam; m++) {
1617 			if ((m >> (qm - k - 1)) & 1)
1618 				p1 = maxstar(p1, log_syml_prob[m]);
1619 			else
1620 				p0 = maxstar(p0, log_syml_prob[m]);
1621 		}
1622 		/* Calculate the LLR */
1623 		llr_ = p0 - p1;
1624 		llr_ *= (1 << ldpc_llr_decimals);
1625 		llr_ = round(llr_);
1626 		if (llr_ > llr_max)
1627 			llr_ = llr_max;
1628 		if (llr_ < -llr_max)
1629 			llr_ = -llr_max;
1630 		llrs[qm * i + k] = (int8_t) llr_;
1631 	}
1632 }
1633 
1634 static void
1635 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1636 {
1637 	double b, b1, n;
1638 	double coeff = 2.0 * sqrt(N0);
1639 
1640 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1641 	if (llrs[j] < 8 && llrs[j] > -8)
1642 		return;
1643 
1644 	/* Note don't change sign here */
1645 	n = randn(j % 2);
1646 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1647 			+ coeff * n) / N0;
1648 	b = b1 * (1 << ldpc_llr_decimals);
1649 	b = round(b);
1650 	if (b > llr_max)
1651 		b = llr_max;
1652 	if (b < -llr_max)
1653 		b = -llr_max;
1654 	llrs[j] = (int8_t) b;
1655 }
1656 
1657 /* Generate LLR for a given SNR */
1658 static void
1659 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1660 		struct rte_bbdev_dec_op *ref_op)
1661 {
1662 	struct rte_mbuf *m;
1663 	uint16_t qm;
1664 	uint32_t i, j, e, range;
1665 	double N0, llr_max;
1666 
1667 	e = ref_op->ldpc_dec.cb_params.e;
1668 	qm = ref_op->ldpc_dec.q_m;
1669 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1670 	range = e / qm;
1671 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1672 
1673 	for (i = 0; i < n; ++i) {
1674 		m = inputs[i].data;
1675 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1676 		if (qm == 8) {
1677 			for (j = 0; j < range; ++j)
1678 				gen_qm8_llr(llrs, j, N0, llr_max);
1679 		} else if (qm == 6) {
1680 			for (j = 0; j < range; ++j)
1681 				gen_qm6_llr(llrs, j, N0, llr_max);
1682 		} else if (qm == 4) {
1683 			for (j = 0; j < range; ++j)
1684 				gen_qm4_llr(llrs, j, N0, llr_max);
1685 		} else {
1686 			for (j = 0; j < e; ++j)
1687 				gen_qm2_llr(llrs, j, N0, llr_max);
1688 		}
1689 	}
1690 }
1691 
1692 static void
1693 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1694 		unsigned int start_idx,
1695 		struct rte_bbdev_op_data *inputs,
1696 		struct rte_bbdev_op_data *hard_outputs,
1697 		struct rte_bbdev_op_data *soft_outputs,
1698 		struct rte_bbdev_op_data *harq_inputs,
1699 		struct rte_bbdev_op_data *harq_outputs,
1700 		struct rte_bbdev_dec_op *ref_op)
1701 {
1702 	unsigned int i;
1703 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1704 
1705 	for (i = 0; i < n; ++i) {
1706 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1707 			ops[i]->ldpc_dec.tb_params.ea =
1708 					ldpc_dec->tb_params.ea;
1709 			ops[i]->ldpc_dec.tb_params.eb =
1710 					ldpc_dec->tb_params.eb;
1711 			ops[i]->ldpc_dec.tb_params.c =
1712 					ldpc_dec->tb_params.c;
1713 			ops[i]->ldpc_dec.tb_params.cab =
1714 					ldpc_dec->tb_params.cab;
1715 			ops[i]->ldpc_dec.tb_params.r =
1716 					ldpc_dec->tb_params.r;
1717 		} else {
1718 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1719 		}
1720 
1721 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1722 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1723 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1724 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1725 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1726 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1727 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1728 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1729 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1730 
1731 		if (hard_outputs != NULL)
1732 			ops[i]->ldpc_dec.hard_output =
1733 					hard_outputs[start_idx + i];
1734 		if (inputs != NULL)
1735 			ops[i]->ldpc_dec.input =
1736 					inputs[start_idx + i];
1737 		if (soft_outputs != NULL)
1738 			ops[i]->ldpc_dec.soft_output =
1739 					soft_outputs[start_idx + i];
1740 		if (harq_inputs != NULL)
1741 			ops[i]->ldpc_dec.harq_combined_input =
1742 					harq_inputs[start_idx + i];
1743 		if (harq_outputs != NULL)
1744 			ops[i]->ldpc_dec.harq_combined_output =
1745 					harq_outputs[start_idx + i];
1746 	}
1747 }
1748 
1749 
1750 static void
1751 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1752 		unsigned int start_idx,
1753 		struct rte_bbdev_op_data *inputs,
1754 		struct rte_bbdev_op_data *outputs,
1755 		struct rte_bbdev_enc_op *ref_op)
1756 {
1757 	unsigned int i;
1758 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1759 	for (i = 0; i < n; ++i) {
1760 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1761 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1762 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1763 			ops[i]->ldpc_enc.tb_params.cab =
1764 					ldpc_enc->tb_params.cab;
1765 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1766 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1767 		} else {
1768 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1769 		}
1770 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1771 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1772 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1773 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1774 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1775 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1776 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1777 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1778 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1779 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1780 	}
1781 }
1782 
1783 static int
1784 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1785 		unsigned int order_idx, const int expected_status)
1786 {
1787 	int status = op->status;
1788 	/* ignore parity mismatch false alarms for long iterations */
1789 	if (get_iter_max() >= 10) {
1790 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1791 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1792 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1793 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1794 		}
1795 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1796 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1797 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1798 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1799 		}
1800 	}
1801 
1802 	TEST_ASSERT(status == expected_status,
1803 			"op_status (%d) != expected_status (%d)",
1804 			op->status, expected_status);
1805 
1806 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1807 			"Ordering error, expected %p, got %p",
1808 			(void *)(uintptr_t)order_idx, op->opaque_data);
1809 
1810 	return TEST_SUCCESS;
1811 }
1812 
1813 static int
1814 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1815 		unsigned int order_idx, const int expected_status)
1816 {
1817 	TEST_ASSERT(op->status == expected_status,
1818 			"op_status (%d) != expected_status (%d)",
1819 			op->status, expected_status);
1820 
1821 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1822 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1823 				"Ordering error, expected %p, got %p",
1824 				(void *)(uintptr_t)order_idx, op->opaque_data);
1825 
1826 	return TEST_SUCCESS;
1827 }
1828 
1829 static inline int
1830 validate_op_chain(struct rte_bbdev_op_data *op,
1831 		struct op_data_entries *orig_op)
1832 {
1833 	uint8_t i;
1834 	struct rte_mbuf *m = op->data;
1835 	uint8_t nb_dst_segments = orig_op->nb_segments;
1836 	uint32_t total_data_size = 0;
1837 
1838 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1839 			"Number of segments differ in original (%u) and filled (%u) op",
1840 			nb_dst_segments, m->nb_segs);
1841 
1842 	/* Validate each mbuf segment length */
1843 	for (i = 0; i < nb_dst_segments; ++i) {
1844 		/* Apply offset to the first mbuf segment */
1845 		uint16_t offset = (i == 0) ? op->offset : 0;
1846 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1847 		total_data_size += orig_op->segments[i].length;
1848 
1849 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1850 				"Length of segment differ in original (%u) and filled (%u) op",
1851 				orig_op->segments[i].length, data_len);
1852 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1853 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1854 				data_len,
1855 				"Output buffers (CB=%u) are not equal", i);
1856 		m = m->next;
1857 	}
1858 
1859 	/* Validate total mbuf pkt length */
1860 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1861 	TEST_ASSERT(total_data_size == pkt_len,
1862 			"Length of data differ in original (%u) and filled (%u) op",
1863 			total_data_size, pkt_len);
1864 
1865 	return TEST_SUCCESS;
1866 }
1867 
1868 /*
1869  * Compute K0 for a given configuration for HARQ output length computation
1870  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1871  */
1872 static inline uint16_t
1873 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1874 {
1875 	if (rv_index == 0)
1876 		return 0;
1877 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1878 	if (n_cb == n) {
1879 		if (rv_index == 1)
1880 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1881 		else if (rv_index == 2)
1882 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1883 		else
1884 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1885 	}
1886 	/* LBRM case - includes a division by N */
1887 	if (rv_index == 1)
1888 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1889 				/ n) * z_c;
1890 	else if (rv_index == 2)
1891 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1892 				/ n) * z_c;
1893 	else
1894 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1895 				/ n) * z_c;
1896 }
1897 
1898 /* HARQ output length including the Filler bits */
1899 static inline uint16_t
1900 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1901 {
1902 	uint16_t k0 = 0;
1903 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1904 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1905 	/* Compute RM out size and number of rows */
1906 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1907 			* ops_ld->z_c - ops_ld->n_filler;
1908 	uint16_t deRmOutSize = RTE_MIN(
1909 			k0 + ops_ld->cb_params.e +
1910 			((k0 > parity_offset) ?
1911 					0 : ops_ld->n_filler),
1912 					ops_ld->n_cb);
1913 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1914 			/ ops_ld->z_c);
1915 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1916 	return harq_output_len;
1917 }
1918 
1919 static inline int
1920 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1921 		struct op_data_entries *orig_op,
1922 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1923 {
1924 	uint8_t i;
1925 	uint32_t j, jj, k;
1926 	struct rte_mbuf *m = op->data;
1927 	uint8_t nb_dst_segments = orig_op->nb_segments;
1928 	uint32_t total_data_size = 0;
1929 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1930 	uint32_t byte_error = 0, cum_error = 0, error;
1931 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1932 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1933 	uint16_t parity_offset;
1934 
1935 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1936 			"Number of segments differ in original (%u) and filled (%u) op",
1937 			nb_dst_segments, m->nb_segs);
1938 
1939 	/* Validate each mbuf segment length */
1940 	for (i = 0; i < nb_dst_segments; ++i) {
1941 		/* Apply offset to the first mbuf segment */
1942 		uint16_t offset = (i == 0) ? op->offset : 0;
1943 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1944 		total_data_size += orig_op->segments[i].length;
1945 
1946 		TEST_ASSERT(orig_op->segments[i].length <
1947 				(uint32_t)(data_len + 64),
1948 				"Length of segment differ in original (%u) and filled (%u) op",
1949 				orig_op->segments[i].length, data_len);
1950 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1951 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1952 
1953 		if (!(ldpc_cap_flags &
1954 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1955 				) || (ops_ld->op_flags &
1956 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1957 			data_len -= ops_ld->z_c;
1958 			parity_offset = data_len;
1959 		} else {
1960 			/* Compute RM out size and number of rows */
1961 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1962 					* ops_ld->z_c - ops_ld->n_filler;
1963 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1964 					ops_ld->n_filler;
1965 			if (data_len > deRmOutSize)
1966 				data_len = deRmOutSize;
1967 			if (data_len > orig_op->segments[i].length)
1968 				data_len = orig_op->segments[i].length;
1969 		}
1970 		/*
1971 		 * HARQ output can have minor differences
1972 		 * due to integer representation and related scaling
1973 		 */
1974 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1975 			if (j == parity_offset) {
1976 				/* Special Handling of the filler bits */
1977 				for (k = 0; k < ops_ld->n_filler; k++) {
1978 					if (harq_out[jj] !=
1979 							llr_max_pre_scaling) {
1980 						printf("HARQ Filler issue %d: %d %d\n",
1981 							jj, harq_out[jj],
1982 							llr_max);
1983 						byte_error++;
1984 					}
1985 					jj++;
1986 				}
1987 			}
1988 			if (!(ops_ld->op_flags &
1989 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1990 				if (ldpc_llr_decimals > 1)
1991 					harq_out[jj] = (harq_out[jj] + 1)
1992 						>> (ldpc_llr_decimals - 1);
1993 				/* Saturated to S7 */
1994 				if (harq_orig[j] > llr_max)
1995 					harq_orig[j] = llr_max;
1996 				if (harq_orig[j] < -llr_max)
1997 					harq_orig[j] = -llr_max;
1998 			}
1999 			if (harq_orig[j] != harq_out[jj]) {
2000 				error = (harq_orig[j] > harq_out[jj]) ?
2001 						harq_orig[j] - harq_out[jj] :
2002 						harq_out[jj] - harq_orig[j];
2003 				abs_harq_origin = harq_orig[j] > 0 ?
2004 							harq_orig[j] :
2005 							-harq_orig[j];
2006 				/* Residual quantization error */
2007 				if ((error > 8 && (abs_harq_origin <
2008 						(llr_max - 16))) ||
2009 						(error > 16)) {
2010 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2011 							j, harq_orig[j],
2012 							harq_out[jj], error);
2013 					byte_error++;
2014 					cum_error += error;
2015 				}
2016 			}
2017 		}
2018 		m = m->next;
2019 	}
2020 
2021 	if (byte_error)
2022 		TEST_ASSERT(byte_error <= 1,
2023 				"HARQ output mismatch (%d) %d",
2024 				byte_error, cum_error);
2025 
2026 	/* Validate total mbuf pkt length */
2027 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2028 	TEST_ASSERT(total_data_size < pkt_len + 64,
2029 			"Length of data differ in original (%u) and filled (%u) op",
2030 			total_data_size, pkt_len);
2031 
2032 	return TEST_SUCCESS;
2033 }
2034 
2035 static int
2036 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2037 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2038 {
2039 	unsigned int i;
2040 	int ret;
2041 	struct op_data_entries *hard_data_orig =
2042 			&test_vector.entries[DATA_HARD_OUTPUT];
2043 	struct op_data_entries *soft_data_orig =
2044 			&test_vector.entries[DATA_SOFT_OUTPUT];
2045 	struct rte_bbdev_op_turbo_dec *ops_td;
2046 	struct rte_bbdev_op_data *hard_output;
2047 	struct rte_bbdev_op_data *soft_output;
2048 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2049 
2050 	for (i = 0; i < n; ++i) {
2051 		ops_td = &ops[i]->turbo_dec;
2052 		hard_output = &ops_td->hard_output;
2053 		soft_output = &ops_td->soft_output;
2054 
2055 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2056 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2057 					"Returned iter_count (%d) > expected iter_count (%d)",
2058 					ops_td->iter_count, ref_td->iter_count);
2059 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2060 		TEST_ASSERT_SUCCESS(ret,
2061 				"Checking status and ordering for decoder failed");
2062 
2063 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2064 				hard_data_orig),
2065 				"Hard output buffers (CB=%u) are not equal",
2066 				i);
2067 
2068 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2069 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2070 					soft_data_orig),
2071 					"Soft output buffers (CB=%u) are not equal",
2072 					i);
2073 	}
2074 
2075 	return TEST_SUCCESS;
2076 }
2077 
2078 /* Check Number of code blocks errors */
2079 static int
2080 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2081 {
2082 	unsigned int i;
2083 	struct op_data_entries *hard_data_orig =
2084 			&test_vector.entries[DATA_HARD_OUTPUT];
2085 	struct rte_bbdev_op_ldpc_dec *ops_td;
2086 	struct rte_bbdev_op_data *hard_output;
2087 	int errors = 0;
2088 	struct rte_mbuf *m;
2089 
2090 	for (i = 0; i < n; ++i) {
2091 		ops_td = &ops[i]->ldpc_dec;
2092 		hard_output = &ops_td->hard_output;
2093 		m = hard_output->data;
2094 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2095 				hard_data_orig->segments[0].addr,
2096 				hard_data_orig->segments[0].length))
2097 			errors++;
2098 	}
2099 	return errors;
2100 }
2101 
2102 static int
2103 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2104 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2105 {
2106 	unsigned int i;
2107 	int ret;
2108 	struct op_data_entries *hard_data_orig =
2109 			&test_vector.entries[DATA_HARD_OUTPUT];
2110 	struct op_data_entries *soft_data_orig =
2111 			&test_vector.entries[DATA_SOFT_OUTPUT];
2112 	struct op_data_entries *harq_data_orig =
2113 				&test_vector.entries[DATA_HARQ_OUTPUT];
2114 	struct rte_bbdev_op_ldpc_dec *ops_td;
2115 	struct rte_bbdev_op_data *hard_output;
2116 	struct rte_bbdev_op_data *harq_output;
2117 	struct rte_bbdev_op_data *soft_output;
2118 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2119 
2120 	for (i = 0; i < n; ++i) {
2121 		ops_td = &ops[i]->ldpc_dec;
2122 		hard_output = &ops_td->hard_output;
2123 		harq_output = &ops_td->harq_combined_output;
2124 		soft_output = &ops_td->soft_output;
2125 
2126 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2127 		TEST_ASSERT_SUCCESS(ret,
2128 				"Checking status and ordering for decoder failed");
2129 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2130 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2131 					"Returned iter_count (%d) > expected iter_count (%d)",
2132 					ops_td->iter_count, ref_td->iter_count);
2133 		/*
2134 		 * We can ignore output data when the decoding failed to
2135 		 * converge or for loop-back cases
2136 		 */
2137 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2138 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2139 				) && (
2140 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2141 						)) == 0)
2142 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2143 					hard_data_orig),
2144 					"Hard output buffers (CB=%u) are not equal",
2145 					i);
2146 
2147 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2148 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2149 					soft_data_orig),
2150 					"Soft output buffers (CB=%u) are not equal",
2151 					i);
2152 		if (ref_op->ldpc_dec.op_flags &
2153 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2154 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2155 					harq_data_orig, ops_td),
2156 					"HARQ output buffers (CB=%u) are not equal",
2157 					i);
2158 		}
2159 		if (ref_op->ldpc_dec.op_flags &
2160 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2161 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2162 					harq_data_orig, ops_td),
2163 					"HARQ output buffers (CB=%u) are not equal",
2164 					i);
2165 
2166 	}
2167 
2168 	return TEST_SUCCESS;
2169 }
2170 
2171 
2172 static int
2173 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2174 		struct rte_bbdev_enc_op *ref_op)
2175 {
2176 	unsigned int i;
2177 	int ret;
2178 	struct op_data_entries *hard_data_orig =
2179 			&test_vector.entries[DATA_HARD_OUTPUT];
2180 
2181 	for (i = 0; i < n; ++i) {
2182 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2183 		TEST_ASSERT_SUCCESS(ret,
2184 				"Checking status and ordering for encoder failed");
2185 		TEST_ASSERT_SUCCESS(validate_op_chain(
2186 				&ops[i]->turbo_enc.output,
2187 				hard_data_orig),
2188 				"Output buffers (CB=%u) are not equal",
2189 				i);
2190 	}
2191 
2192 	return TEST_SUCCESS;
2193 }
2194 
2195 static int
2196 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2197 		struct rte_bbdev_enc_op *ref_op)
2198 {
2199 	unsigned int i;
2200 	int ret;
2201 	struct op_data_entries *hard_data_orig =
2202 			&test_vector.entries[DATA_HARD_OUTPUT];
2203 
2204 	for (i = 0; i < n; ++i) {
2205 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2206 		TEST_ASSERT_SUCCESS(ret,
2207 				"Checking status and ordering for encoder failed");
2208 		TEST_ASSERT_SUCCESS(validate_op_chain(
2209 				&ops[i]->ldpc_enc.output,
2210 				hard_data_orig),
2211 				"Output buffers (CB=%u) are not equal",
2212 				i);
2213 	}
2214 
2215 	return TEST_SUCCESS;
2216 }
2217 
2218 static void
2219 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2220 {
2221 	unsigned int i;
2222 	struct op_data_entries *entry;
2223 
2224 	op->turbo_dec = test_vector.turbo_dec;
2225 	entry = &test_vector.entries[DATA_INPUT];
2226 	for (i = 0; i < entry->nb_segments; ++i)
2227 		op->turbo_dec.input.length +=
2228 				entry->segments[i].length;
2229 }
2230 
2231 static void
2232 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2233 {
2234 	unsigned int i;
2235 	struct op_data_entries *entry;
2236 
2237 	op->ldpc_dec = test_vector.ldpc_dec;
2238 	entry = &test_vector.entries[DATA_INPUT];
2239 	for (i = 0; i < entry->nb_segments; ++i)
2240 		op->ldpc_dec.input.length +=
2241 				entry->segments[i].length;
2242 	if (test_vector.ldpc_dec.op_flags &
2243 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2244 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2245 		for (i = 0; i < entry->nb_segments; ++i)
2246 			op->ldpc_dec.harq_combined_input.length +=
2247 				entry->segments[i].length;
2248 	}
2249 }
2250 
2251 
2252 static void
2253 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2254 {
2255 	unsigned int i;
2256 	struct op_data_entries *entry;
2257 
2258 	op->turbo_enc = test_vector.turbo_enc;
2259 	entry = &test_vector.entries[DATA_INPUT];
2260 	for (i = 0; i < entry->nb_segments; ++i)
2261 		op->turbo_enc.input.length +=
2262 				entry->segments[i].length;
2263 }
2264 
2265 static void
2266 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2267 {
2268 	unsigned int i;
2269 	struct op_data_entries *entry;
2270 
2271 	op->ldpc_enc = test_vector.ldpc_enc;
2272 	entry = &test_vector.entries[DATA_INPUT];
2273 	for (i = 0; i < entry->nb_segments; ++i)
2274 		op->ldpc_enc.input.length +=
2275 				entry->segments[i].length;
2276 }
2277 
2278 static uint32_t
2279 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2280 {
2281 	uint8_t i;
2282 	uint32_t c, r, tb_size = 0;
2283 
2284 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2285 		tb_size = op->turbo_dec.tb_params.k_neg;
2286 	} else {
2287 		c = op->turbo_dec.tb_params.c;
2288 		r = op->turbo_dec.tb_params.r;
2289 		for (i = 0; i < c-r; i++)
2290 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2291 				op->turbo_dec.tb_params.k_neg :
2292 				op->turbo_dec.tb_params.k_pos;
2293 	}
2294 	return tb_size;
2295 }
2296 
2297 static uint32_t
2298 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2299 {
2300 	uint8_t i;
2301 	uint32_t c, r, tb_size = 0;
2302 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2303 
2304 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2305 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2306 	} else {
2307 		c = op->ldpc_dec.tb_params.c;
2308 		r = op->ldpc_dec.tb_params.r;
2309 		for (i = 0; i < c-r; i++)
2310 			tb_size += sys_cols * op->ldpc_dec.z_c
2311 					- op->ldpc_dec.n_filler;
2312 	}
2313 	return tb_size;
2314 }
2315 
2316 static uint32_t
2317 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2318 {
2319 	uint8_t i;
2320 	uint32_t c, r, tb_size = 0;
2321 
2322 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2323 		tb_size = op->turbo_enc.tb_params.k_neg;
2324 	} else {
2325 		c = op->turbo_enc.tb_params.c;
2326 		r = op->turbo_enc.tb_params.r;
2327 		for (i = 0; i < c-r; i++)
2328 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2329 				op->turbo_enc.tb_params.k_neg :
2330 				op->turbo_enc.tb_params.k_pos;
2331 	}
2332 	return tb_size;
2333 }
2334 
2335 static uint32_t
2336 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2337 {
2338 	uint8_t i;
2339 	uint32_t c, r, tb_size = 0;
2340 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2341 
2342 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2343 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2344 	} else {
2345 		c = op->turbo_enc.tb_params.c;
2346 		r = op->turbo_enc.tb_params.r;
2347 		for (i = 0; i < c-r; i++)
2348 			tb_size += sys_cols * op->ldpc_enc.z_c
2349 					- op->ldpc_enc.n_filler;
2350 	}
2351 	return tb_size;
2352 }
2353 
2354 
2355 static int
2356 init_test_op_params(struct test_op_params *op_params,
2357 		enum rte_bbdev_op_type op_type, const int expected_status,
2358 		const int vector_mask, struct rte_mempool *ops_mp,
2359 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2360 {
2361 	int ret = 0;
2362 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2363 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2364 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2365 				&op_params->ref_dec_op, 1);
2366 	else
2367 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2368 				&op_params->ref_enc_op, 1);
2369 
2370 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2371 
2372 	op_params->mp = ops_mp;
2373 	op_params->burst_sz = burst_sz;
2374 	op_params->num_to_process = num_to_process;
2375 	op_params->num_lcores = num_lcores;
2376 	op_params->vector_mask = vector_mask;
2377 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2378 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2379 		op_params->ref_dec_op->status = expected_status;
2380 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2381 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2382 		op_params->ref_enc_op->status = expected_status;
2383 	return 0;
2384 }
2385 
2386 static int
2387 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2388 		struct test_op_params *op_params)
2389 {
2390 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2391 	unsigned int i;
2392 	struct active_device *ad;
2393 	unsigned int burst_sz = get_burst_sz();
2394 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2395 	const struct rte_bbdev_op_cap *capabilities = NULL;
2396 
2397 	ad = &active_devs[dev_id];
2398 
2399 	/* Check if device supports op_type */
2400 	if (!is_avail_op(ad, test_vector.op_type))
2401 		return TEST_SUCCESS;
2402 
2403 	struct rte_bbdev_info info;
2404 	rte_bbdev_info_get(ad->dev_id, &info);
2405 	socket_id = GET_SOCKET(info.socket_id);
2406 
2407 	f_ret = create_mempools(ad, socket_id, op_type,
2408 			get_num_ops());
2409 	if (f_ret != TEST_SUCCESS) {
2410 		printf("Couldn't create mempools");
2411 		goto fail;
2412 	}
2413 	if (op_type == RTE_BBDEV_OP_NONE)
2414 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2415 
2416 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2417 			test_vector.expected_status,
2418 			test_vector.mask,
2419 			ad->ops_mempool,
2420 			burst_sz,
2421 			get_num_ops(),
2422 			get_num_lcores());
2423 	if (f_ret != TEST_SUCCESS) {
2424 		printf("Couldn't init test op params");
2425 		goto fail;
2426 	}
2427 
2428 
2429 	/* Find capabilities */
2430 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2431 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2432 		if (cap->type == test_vector.op_type) {
2433 			capabilities = cap;
2434 			break;
2435 		}
2436 		cap++;
2437 	}
2438 	TEST_ASSERT_NOT_NULL(capabilities,
2439 			"Couldn't find capabilities");
2440 
2441 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2442 		create_reference_dec_op(op_params->ref_dec_op);
2443 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2444 		create_reference_enc_op(op_params->ref_enc_op);
2445 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2446 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2447 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2448 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2449 
2450 	for (i = 0; i < ad->nb_queues; ++i) {
2451 		f_ret = fill_queue_buffers(op_params,
2452 				ad->in_mbuf_pool,
2453 				ad->hard_out_mbuf_pool,
2454 				ad->soft_out_mbuf_pool,
2455 				ad->harq_in_mbuf_pool,
2456 				ad->harq_out_mbuf_pool,
2457 				ad->queue_ids[i],
2458 				capabilities,
2459 				info.drv.min_alignment,
2460 				socket_id);
2461 		if (f_ret != TEST_SUCCESS) {
2462 			printf("Couldn't init queue buffers");
2463 			goto fail;
2464 		}
2465 	}
2466 
2467 	/* Run test case function */
2468 	t_ret = test_case_func(ad, op_params);
2469 
2470 	/* Free active device resources and return */
2471 	free_buffers(ad, op_params);
2472 	return t_ret;
2473 
2474 fail:
2475 	free_buffers(ad, op_params);
2476 	return TEST_FAILED;
2477 }
2478 
2479 /* Run given test function per active device per supported op type
2480  * per burst size.
2481  */
2482 static int
2483 run_test_case(test_case_function *test_case_func)
2484 {
2485 	int ret = 0;
2486 	uint8_t dev;
2487 
2488 	/* Alloc op_params */
2489 	struct test_op_params *op_params = rte_zmalloc(NULL,
2490 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2491 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2492 			RTE_ALIGN(sizeof(struct test_op_params),
2493 				RTE_CACHE_LINE_SIZE));
2494 
2495 	/* For each device run test case function */
2496 	for (dev = 0; dev < nb_active_devs; ++dev)
2497 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2498 
2499 	rte_free(op_params);
2500 
2501 	return ret;
2502 }
2503 
2504 
2505 /* Push back the HARQ output from DDR to host */
2506 static void
2507 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2508 		struct rte_bbdev_dec_op **ops,
2509 		const uint16_t n)
2510 {
2511 	uint16_t j;
2512 	int save_status, ret;
2513 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2514 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2515 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2516 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2517 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2518 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2519 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2520 	for (j = 0; j < n; ++j) {
2521 		if ((loopback && mem_out) || hc_out) {
2522 			save_status = ops[j]->status;
2523 			ops[j]->ldpc_dec.op_flags =
2524 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2525 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2526 			if (h_comp)
2527 				ops[j]->ldpc_dec.op_flags +=
2528 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2529 			ops[j]->ldpc_dec.harq_combined_input.offset =
2530 					harq_offset;
2531 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2532 			harq_offset += HARQ_INCR;
2533 			if (!loopback)
2534 				ops[j]->ldpc_dec.harq_combined_input.length =
2535 				ops[j]->ldpc_dec.harq_combined_output.length;
2536 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2537 					&ops[j], 1);
2538 			ret = 0;
2539 			while (ret == 0)
2540 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2541 						dev_id, queue_id,
2542 						&ops_deq[j], 1);
2543 			ops[j]->ldpc_dec.op_flags = flags;
2544 			ops[j]->status = save_status;
2545 		}
2546 	}
2547 }
2548 
2549 /*
2550  * Push back the HARQ output from HW DDR to Host
2551  * Preload HARQ memory input and adjust HARQ offset
2552  */
2553 static void
2554 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2555 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2556 		bool preload)
2557 {
2558 	uint16_t j;
2559 	int deq;
2560 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2561 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2562 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2563 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2564 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2565 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2566 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2567 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2568 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2569 	if ((mem_in || hc_in) && preload) {
2570 		for (j = 0; j < n; ++j) {
2571 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2572 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2573 			ops[j]->ldpc_dec.op_flags =
2574 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2575 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2576 			if (h_comp)
2577 				ops[j]->ldpc_dec.op_flags +=
2578 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2579 			ops[j]->ldpc_dec.harq_combined_output.offset =
2580 					harq_offset;
2581 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2582 			harq_offset += HARQ_INCR;
2583 		}
2584 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2585 		deq = 0;
2586 		while (deq != n)
2587 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2588 					dev_id, queue_id, &ops_deq[deq],
2589 					n - deq);
2590 		/* Restore the operations */
2591 		for (j = 0; j < n; ++j) {
2592 			ops[j]->ldpc_dec.op_flags = flags;
2593 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2594 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2595 		}
2596 	}
2597 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2598 	for (j = 0; j < n; ++j) {
2599 		/* Adjust HARQ offset when we reach external DDR */
2600 		if (mem_in || hc_in)
2601 			ops[j]->ldpc_dec.harq_combined_input.offset
2602 				= harq_offset;
2603 		if (mem_out || hc_out)
2604 			ops[j]->ldpc_dec.harq_combined_output.offset
2605 				= harq_offset;
2606 		harq_offset += HARQ_INCR;
2607 	}
2608 }
2609 
2610 static void
2611 dequeue_event_callback(uint16_t dev_id,
2612 		enum rte_bbdev_event_type event, void *cb_arg,
2613 		void *ret_param)
2614 {
2615 	int ret;
2616 	uint16_t i;
2617 	uint64_t total_time;
2618 	uint16_t deq, burst_sz, num_ops;
2619 	uint16_t queue_id = *(uint16_t *) ret_param;
2620 	struct rte_bbdev_info info;
2621 	double tb_len_bits;
2622 	struct thread_params *tp = cb_arg;
2623 
2624 	/* Find matching thread params using queue_id */
2625 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2626 		if (tp->queue_id == queue_id)
2627 			break;
2628 
2629 	if (i == MAX_QUEUES) {
2630 		printf("%s: Queue_id from interrupt details was not found!\n",
2631 				__func__);
2632 		return;
2633 	}
2634 
2635 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2636 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2637 		printf(
2638 			"Dequeue interrupt handler called for incorrect event!\n");
2639 		return;
2640 	}
2641 
2642 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
2643 	num_ops = tp->op_params->num_to_process;
2644 
2645 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2646 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2647 				&tp->dec_ops[
2648 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2649 				burst_sz);
2650 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2651 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2652 				&tp->dec_ops[
2653 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2654 				burst_sz);
2655 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2656 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2657 				&tp->enc_ops[
2658 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2659 				burst_sz);
2660 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2661 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2662 				&tp->enc_ops[
2663 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2664 				burst_sz);
2665 
2666 	if (deq < burst_sz) {
2667 		printf(
2668 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2669 			burst_sz, deq);
2670 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2671 		return;
2672 	}
2673 
2674 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
2675 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2676 		return;
2677 	}
2678 
2679 	total_time = rte_rdtsc_precise() - tp->start_time;
2680 
2681 	rte_bbdev_info_get(dev_id, &info);
2682 
2683 	ret = TEST_SUCCESS;
2684 
2685 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2686 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2687 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2688 				tp->op_params->vector_mask);
2689 		/* get the max of iter_count for all dequeued ops */
2690 		for (i = 0; i < num_ops; ++i)
2691 			tp->iter_count = RTE_MAX(
2692 					tp->dec_ops[i]->turbo_dec.iter_count,
2693 					tp->iter_count);
2694 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2695 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2696 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2697 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2698 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2699 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2700 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2701 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2702 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2703 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2704 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2705 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2706 				tp->op_params->vector_mask);
2707 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2708 	}
2709 
2710 	if (ret) {
2711 		printf("Buffers validation failed\n");
2712 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2713 	}
2714 
2715 	switch (test_vector.op_type) {
2716 	case RTE_BBDEV_OP_TURBO_DEC:
2717 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2718 		break;
2719 	case RTE_BBDEV_OP_TURBO_ENC:
2720 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2721 		break;
2722 	case RTE_BBDEV_OP_LDPC_DEC:
2723 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2724 		break;
2725 	case RTE_BBDEV_OP_LDPC_ENC:
2726 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2727 		break;
2728 	case RTE_BBDEV_OP_NONE:
2729 		tb_len_bits = 0.0;
2730 		break;
2731 	default:
2732 		printf("Unknown op type: %d\n", test_vector.op_type);
2733 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2734 		return;
2735 	}
2736 
2737 	tp->ops_per_sec += ((double)num_ops) /
2738 			((double)total_time / (double)rte_get_tsc_hz());
2739 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2740 			((double)total_time / (double)rte_get_tsc_hz());
2741 
2742 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2743 }
2744 
2745 static int
2746 throughput_intr_lcore_ldpc_dec(void *arg)
2747 {
2748 	struct thread_params *tp = arg;
2749 	unsigned int enqueued;
2750 	const uint16_t queue_id = tp->queue_id;
2751 	const uint16_t burst_sz = tp->op_params->burst_sz;
2752 	const uint16_t num_to_process = tp->op_params->num_to_process;
2753 	struct rte_bbdev_dec_op *ops[num_to_process];
2754 	struct test_buffers *bufs = NULL;
2755 	struct rte_bbdev_info info;
2756 	int ret, i, j;
2757 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2758 	uint16_t num_to_enq, enq;
2759 
2760 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2761 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2762 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2763 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2764 
2765 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2766 			"BURST_SIZE should be <= %u", MAX_BURST);
2767 
2768 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2769 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2770 			tp->dev_id, queue_id);
2771 
2772 	rte_bbdev_info_get(tp->dev_id, &info);
2773 
2774 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2775 			"NUM_OPS cannot exceed %u for this device",
2776 			info.drv.queue_size_lim);
2777 
2778 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2779 
2780 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2781 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2782 
2783 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2784 
2785 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2786 				num_to_process);
2787 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2788 			num_to_process);
2789 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2790 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2791 				bufs->hard_outputs, bufs->soft_outputs,
2792 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2793 
2794 	/* Set counter to validate the ordering */
2795 	for (j = 0; j < num_to_process; ++j)
2796 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2797 
2798 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2799 		for (i = 0; i < num_to_process; ++i) {
2800 			if (!loopback)
2801 				rte_pktmbuf_reset(
2802 					ops[i]->ldpc_dec.hard_output.data);
2803 			if (hc_out || loopback)
2804 				mbuf_reset(
2805 				ops[i]->ldpc_dec.harq_combined_output.data);
2806 		}
2807 
2808 		tp->start_time = rte_rdtsc_precise();
2809 		for (enqueued = 0; enqueued < num_to_process;) {
2810 			num_to_enq = burst_sz;
2811 
2812 			if (unlikely(num_to_process - enqueued < num_to_enq))
2813 				num_to_enq = num_to_process - enqueued;
2814 
2815 			enq = 0;
2816 			do {
2817 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2818 						tp->dev_id,
2819 						queue_id, &ops[enqueued],
2820 						num_to_enq);
2821 			} while (unlikely(num_to_enq != enq));
2822 			enqueued += enq;
2823 
2824 			/* Write to thread burst_sz current number of enqueued
2825 			 * descriptors. It ensures that proper number of
2826 			 * descriptors will be dequeued in callback
2827 			 * function - needed for last batch in case where
2828 			 * the number of operations is not a multiple of
2829 			 * burst size.
2830 			 */
2831 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2832 
2833 			/* Wait until processing of previous batch is
2834 			 * completed
2835 			 */
2836 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2837 		}
2838 		if (j != TEST_REPETITIONS - 1)
2839 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2840 	}
2841 
2842 	return TEST_SUCCESS;
2843 }
2844 
2845 static int
2846 throughput_intr_lcore_dec(void *arg)
2847 {
2848 	struct thread_params *tp = arg;
2849 	unsigned int enqueued;
2850 	const uint16_t queue_id = tp->queue_id;
2851 	const uint16_t burst_sz = tp->op_params->burst_sz;
2852 	const uint16_t num_to_process = tp->op_params->num_to_process;
2853 	struct rte_bbdev_dec_op *ops[num_to_process];
2854 	struct test_buffers *bufs = NULL;
2855 	struct rte_bbdev_info info;
2856 	int ret, i, j;
2857 	uint16_t num_to_enq, enq;
2858 
2859 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2860 			"BURST_SIZE should be <= %u", MAX_BURST);
2861 
2862 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2863 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2864 			tp->dev_id, queue_id);
2865 
2866 	rte_bbdev_info_get(tp->dev_id, &info);
2867 
2868 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2869 			"NUM_OPS cannot exceed %u for this device",
2870 			info.drv.queue_size_lim);
2871 
2872 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2873 
2874 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2875 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2876 
2877 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2878 
2879 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2880 				num_to_process);
2881 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2882 			num_to_process);
2883 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2884 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2885 				bufs->hard_outputs, bufs->soft_outputs,
2886 				tp->op_params->ref_dec_op);
2887 
2888 	/* Set counter to validate the ordering */
2889 	for (j = 0; j < num_to_process; ++j)
2890 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2891 
2892 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2893 		for (i = 0; i < num_to_process; ++i)
2894 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2895 
2896 		tp->start_time = rte_rdtsc_precise();
2897 		for (enqueued = 0; enqueued < num_to_process;) {
2898 			num_to_enq = burst_sz;
2899 
2900 			if (unlikely(num_to_process - enqueued < num_to_enq))
2901 				num_to_enq = num_to_process - enqueued;
2902 
2903 			enq = 0;
2904 			do {
2905 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2906 						queue_id, &ops[enqueued],
2907 						num_to_enq);
2908 			} while (unlikely(num_to_enq != enq));
2909 			enqueued += enq;
2910 
2911 			/* Write to thread burst_sz current number of enqueued
2912 			 * descriptors. It ensures that proper number of
2913 			 * descriptors will be dequeued in callback
2914 			 * function - needed for last batch in case where
2915 			 * the number of operations is not a multiple of
2916 			 * burst size.
2917 			 */
2918 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2919 
2920 			/* Wait until processing of previous batch is
2921 			 * completed
2922 			 */
2923 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2924 		}
2925 		if (j != TEST_REPETITIONS - 1)
2926 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2927 	}
2928 
2929 	return TEST_SUCCESS;
2930 }
2931 
2932 static int
2933 throughput_intr_lcore_enc(void *arg)
2934 {
2935 	struct thread_params *tp = arg;
2936 	unsigned int enqueued;
2937 	const uint16_t queue_id = tp->queue_id;
2938 	const uint16_t burst_sz = tp->op_params->burst_sz;
2939 	const uint16_t num_to_process = tp->op_params->num_to_process;
2940 	struct rte_bbdev_enc_op *ops[num_to_process];
2941 	struct test_buffers *bufs = NULL;
2942 	struct rte_bbdev_info info;
2943 	int ret, i, j;
2944 	uint16_t num_to_enq, enq;
2945 
2946 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2947 			"BURST_SIZE should be <= %u", MAX_BURST);
2948 
2949 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2950 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2951 			tp->dev_id, queue_id);
2952 
2953 	rte_bbdev_info_get(tp->dev_id, &info);
2954 
2955 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2956 			"NUM_OPS cannot exceed %u for this device",
2957 			info.drv.queue_size_lim);
2958 
2959 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2960 
2961 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2962 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2963 
2964 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2965 
2966 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2967 			num_to_process);
2968 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2969 			num_to_process);
2970 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2971 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2972 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2973 
2974 	/* Set counter to validate the ordering */
2975 	for (j = 0; j < num_to_process; ++j)
2976 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2977 
2978 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2979 		for (i = 0; i < num_to_process; ++i)
2980 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2981 
2982 		tp->start_time = rte_rdtsc_precise();
2983 		for (enqueued = 0; enqueued < num_to_process;) {
2984 			num_to_enq = burst_sz;
2985 
2986 			if (unlikely(num_to_process - enqueued < num_to_enq))
2987 				num_to_enq = num_to_process - enqueued;
2988 
2989 			enq = 0;
2990 			do {
2991 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2992 						queue_id, &ops[enqueued],
2993 						num_to_enq);
2994 			} while (unlikely(enq != num_to_enq));
2995 			enqueued += enq;
2996 
2997 			/* Write to thread burst_sz current number of enqueued
2998 			 * descriptors. It ensures that proper number of
2999 			 * descriptors will be dequeued in callback
3000 			 * function - needed for last batch in case where
3001 			 * the number of operations is not a multiple of
3002 			 * burst size.
3003 			 */
3004 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3005 
3006 			/* Wait until processing of previous batch is
3007 			 * completed
3008 			 */
3009 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3010 		}
3011 		if (j != TEST_REPETITIONS - 1)
3012 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3013 	}
3014 
3015 	return TEST_SUCCESS;
3016 }
3017 
3018 
3019 static int
3020 throughput_intr_lcore_ldpc_enc(void *arg)
3021 {
3022 	struct thread_params *tp = arg;
3023 	unsigned int enqueued;
3024 	const uint16_t queue_id = tp->queue_id;
3025 	const uint16_t burst_sz = tp->op_params->burst_sz;
3026 	const uint16_t num_to_process = tp->op_params->num_to_process;
3027 	struct rte_bbdev_enc_op *ops[num_to_process];
3028 	struct test_buffers *bufs = NULL;
3029 	struct rte_bbdev_info info;
3030 	int ret, i, j;
3031 	uint16_t num_to_enq, enq;
3032 
3033 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3034 			"BURST_SIZE should be <= %u", MAX_BURST);
3035 
3036 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3037 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3038 			tp->dev_id, queue_id);
3039 
3040 	rte_bbdev_info_get(tp->dev_id, &info);
3041 
3042 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3043 			"NUM_OPS cannot exceed %u for this device",
3044 			info.drv.queue_size_lim);
3045 
3046 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3047 
3048 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3049 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3050 
3051 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3052 
3053 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3054 			num_to_process);
3055 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3056 			num_to_process);
3057 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3058 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3059 				bufs->inputs, bufs->hard_outputs,
3060 				tp->op_params->ref_enc_op);
3061 
3062 	/* Set counter to validate the ordering */
3063 	for (j = 0; j < num_to_process; ++j)
3064 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3065 
3066 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3067 		for (i = 0; i < num_to_process; ++i)
3068 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3069 
3070 		tp->start_time = rte_rdtsc_precise();
3071 		for (enqueued = 0; enqueued < num_to_process;) {
3072 			num_to_enq = burst_sz;
3073 
3074 			if (unlikely(num_to_process - enqueued < num_to_enq))
3075 				num_to_enq = num_to_process - enqueued;
3076 
3077 			enq = 0;
3078 			do {
3079 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3080 						tp->dev_id,
3081 						queue_id, &ops[enqueued],
3082 						num_to_enq);
3083 			} while (unlikely(enq != num_to_enq));
3084 			enqueued += enq;
3085 
3086 			/* Write to thread burst_sz current number of enqueued
3087 			 * descriptors. It ensures that proper number of
3088 			 * descriptors will be dequeued in callback
3089 			 * function - needed for last batch in case where
3090 			 * the number of operations is not a multiple of
3091 			 * burst size.
3092 			 */
3093 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3094 
3095 			/* Wait until processing of previous batch is
3096 			 * completed
3097 			 */
3098 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3099 		}
3100 		if (j != TEST_REPETITIONS - 1)
3101 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3102 	}
3103 
3104 	return TEST_SUCCESS;
3105 }
3106 
3107 static int
3108 throughput_pmd_lcore_dec(void *arg)
3109 {
3110 	struct thread_params *tp = arg;
3111 	uint16_t enq, deq;
3112 	uint64_t total_time = 0, start_time;
3113 	const uint16_t queue_id = tp->queue_id;
3114 	const uint16_t burst_sz = tp->op_params->burst_sz;
3115 	const uint16_t num_ops = tp->op_params->num_to_process;
3116 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3117 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3118 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3119 	struct test_buffers *bufs = NULL;
3120 	int i, j, ret;
3121 	struct rte_bbdev_info info;
3122 	uint16_t num_to_enq;
3123 
3124 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3125 			"BURST_SIZE should be <= %u", MAX_BURST);
3126 
3127 	rte_bbdev_info_get(tp->dev_id, &info);
3128 
3129 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3130 			"NUM_OPS cannot exceed %u for this device",
3131 			info.drv.queue_size_lim);
3132 
3133 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3134 
3135 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3136 
3137 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3138 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3139 
3140 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3141 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3142 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3143 
3144 	/* Set counter to validate the ordering */
3145 	for (j = 0; j < num_ops; ++j)
3146 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3147 
3148 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3149 
3150 		for (j = 0; j < num_ops; ++j)
3151 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3152 
3153 		start_time = rte_rdtsc_precise();
3154 
3155 		for (enq = 0, deq = 0; enq < num_ops;) {
3156 			num_to_enq = burst_sz;
3157 
3158 			if (unlikely(num_ops - enq < num_to_enq))
3159 				num_to_enq = num_ops - enq;
3160 
3161 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3162 					queue_id, &ops_enq[enq], num_to_enq);
3163 
3164 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3165 					queue_id, &ops_deq[deq], enq - deq);
3166 		}
3167 
3168 		/* dequeue the remaining */
3169 		while (deq < enq) {
3170 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3171 					queue_id, &ops_deq[deq], enq - deq);
3172 		}
3173 
3174 		total_time += rte_rdtsc_precise() - start_time;
3175 	}
3176 
3177 	tp->iter_count = 0;
3178 	/* get the max of iter_count for all dequeued ops */
3179 	for (i = 0; i < num_ops; ++i) {
3180 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3181 				tp->iter_count);
3182 	}
3183 
3184 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3185 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3186 				tp->op_params->vector_mask);
3187 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3188 	}
3189 
3190 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3191 
3192 	double tb_len_bits = calc_dec_TB_size(ref_op);
3193 
3194 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3195 			((double)total_time / (double)rte_get_tsc_hz());
3196 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3197 			1000000.0) / ((double)total_time /
3198 			(double)rte_get_tsc_hz());
3199 
3200 	return TEST_SUCCESS;
3201 }
3202 
3203 static int
3204 bler_pmd_lcore_ldpc_dec(void *arg)
3205 {
3206 	struct thread_params *tp = arg;
3207 	uint16_t enq, deq;
3208 	uint64_t total_time = 0, start_time;
3209 	const uint16_t queue_id = tp->queue_id;
3210 	const uint16_t burst_sz = tp->op_params->burst_sz;
3211 	const uint16_t num_ops = tp->op_params->num_to_process;
3212 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3213 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3214 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3215 	struct test_buffers *bufs = NULL;
3216 	int i, j, ret;
3217 	float parity_bler = 0;
3218 	struct rte_bbdev_info info;
3219 	uint16_t num_to_enq;
3220 	bool extDdr = check_bit(ldpc_cap_flags,
3221 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3222 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3223 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3224 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3225 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3226 
3227 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3228 			"BURST_SIZE should be <= %u", MAX_BURST);
3229 
3230 	rte_bbdev_info_get(tp->dev_id, &info);
3231 
3232 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3233 			"NUM_OPS cannot exceed %u for this device",
3234 			info.drv.queue_size_lim);
3235 
3236 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3237 
3238 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3239 
3240 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3241 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3242 
3243 	/* For BLER tests we need to enable early termination */
3244 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3245 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3246 		ref_op->ldpc_dec.op_flags +=
3247 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3248 	ref_op->ldpc_dec.iter_max = get_iter_max();
3249 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3250 
3251 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3252 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3253 				bufs->hard_outputs, bufs->soft_outputs,
3254 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3255 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3256 
3257 	/* Set counter to validate the ordering */
3258 	for (j = 0; j < num_ops; ++j)
3259 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3260 
3261 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3262 		for (j = 0; j < num_ops; ++j) {
3263 			if (!loopback)
3264 				mbuf_reset(
3265 				ops_enq[j]->ldpc_dec.hard_output.data);
3266 			if (hc_out || loopback)
3267 				mbuf_reset(
3268 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3269 		}
3270 		if (extDdr)
3271 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3272 					num_ops, true);
3273 		start_time = rte_rdtsc_precise();
3274 
3275 		for (enq = 0, deq = 0; enq < num_ops;) {
3276 			num_to_enq = burst_sz;
3277 
3278 			if (unlikely(num_ops - enq < num_to_enq))
3279 				num_to_enq = num_ops - enq;
3280 
3281 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3282 					queue_id, &ops_enq[enq], num_to_enq);
3283 
3284 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3285 					queue_id, &ops_deq[deq], enq - deq);
3286 		}
3287 
3288 		/* dequeue the remaining */
3289 		while (deq < enq) {
3290 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3291 					queue_id, &ops_deq[deq], enq - deq);
3292 		}
3293 
3294 		total_time += rte_rdtsc_precise() - start_time;
3295 	}
3296 
3297 	tp->iter_count = 0;
3298 	tp->iter_average = 0;
3299 	/* get the max of iter_count for all dequeued ops */
3300 	for (i = 0; i < num_ops; ++i) {
3301 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3302 				tp->iter_count);
3303 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3304 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3305 			parity_bler += 1.0;
3306 	}
3307 
3308 	parity_bler /= num_ops; /* This one is based on SYND */
3309 	tp->iter_average /= num_ops;
3310 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3311 
3312 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3313 			&& tp->bler == 0
3314 			&& parity_bler == 0
3315 			&& !hc_out) {
3316 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3317 				tp->op_params->vector_mask);
3318 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3319 	}
3320 
3321 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3322 
3323 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3324 	tp->ops_per_sec = ((double)num_ops * 1) /
3325 			((double)total_time / (double)rte_get_tsc_hz());
3326 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3327 			1000000.0) / ((double)total_time /
3328 			(double)rte_get_tsc_hz());
3329 
3330 	return TEST_SUCCESS;
3331 }
3332 
3333 static int
3334 throughput_pmd_lcore_ldpc_dec(void *arg)
3335 {
3336 	struct thread_params *tp = arg;
3337 	uint16_t enq, deq;
3338 	uint64_t total_time = 0, start_time;
3339 	const uint16_t queue_id = tp->queue_id;
3340 	const uint16_t burst_sz = tp->op_params->burst_sz;
3341 	const uint16_t num_ops = tp->op_params->num_to_process;
3342 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3343 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3344 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3345 	struct test_buffers *bufs = NULL;
3346 	int i, j, ret;
3347 	struct rte_bbdev_info info;
3348 	uint16_t num_to_enq;
3349 	bool extDdr = check_bit(ldpc_cap_flags,
3350 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3351 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3352 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3353 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3354 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3355 
3356 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3357 			"BURST_SIZE should be <= %u", MAX_BURST);
3358 
3359 	rte_bbdev_info_get(tp->dev_id, &info);
3360 
3361 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3362 			"NUM_OPS cannot exceed %u for this device",
3363 			info.drv.queue_size_lim);
3364 
3365 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3366 
3367 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3368 
3369 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3370 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3371 
3372 	/* For throughput tests we need to disable early termination */
3373 	if (check_bit(ref_op->ldpc_dec.op_flags,
3374 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3375 		ref_op->ldpc_dec.op_flags -=
3376 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3377 	ref_op->ldpc_dec.iter_max = get_iter_max();
3378 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3379 
3380 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3381 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3382 				bufs->hard_outputs, bufs->soft_outputs,
3383 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3384 
3385 	/* Set counter to validate the ordering */
3386 	for (j = 0; j < num_ops; ++j)
3387 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3388 
3389 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3390 		for (j = 0; j < num_ops; ++j) {
3391 			if (!loopback)
3392 				mbuf_reset(
3393 				ops_enq[j]->ldpc_dec.hard_output.data);
3394 			if (hc_out || loopback)
3395 				mbuf_reset(
3396 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3397 		}
3398 		if (extDdr)
3399 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3400 					num_ops, true);
3401 		start_time = rte_rdtsc_precise();
3402 
3403 		for (enq = 0, deq = 0; enq < num_ops;) {
3404 			num_to_enq = burst_sz;
3405 
3406 			if (unlikely(num_ops - enq < num_to_enq))
3407 				num_to_enq = num_ops - enq;
3408 
3409 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3410 					queue_id, &ops_enq[enq], num_to_enq);
3411 
3412 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3413 					queue_id, &ops_deq[deq], enq - deq);
3414 		}
3415 
3416 		/* dequeue the remaining */
3417 		while (deq < enq) {
3418 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3419 					queue_id, &ops_deq[deq], enq - deq);
3420 		}
3421 
3422 		total_time += rte_rdtsc_precise() - start_time;
3423 	}
3424 
3425 	tp->iter_count = 0;
3426 	/* get the max of iter_count for all dequeued ops */
3427 	for (i = 0; i < num_ops; ++i) {
3428 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3429 				tp->iter_count);
3430 	}
3431 	if (extDdr) {
3432 		/* Read loopback is not thread safe */
3433 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3434 	}
3435 
3436 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3437 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3438 				tp->op_params->vector_mask);
3439 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3440 	}
3441 
3442 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3443 
3444 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3445 
3446 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3447 			((double)total_time / (double)rte_get_tsc_hz());
3448 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3449 			1000000.0) / ((double)total_time /
3450 			(double)rte_get_tsc_hz());
3451 
3452 	return TEST_SUCCESS;
3453 }
3454 
3455 static int
3456 throughput_pmd_lcore_enc(void *arg)
3457 {
3458 	struct thread_params *tp = arg;
3459 	uint16_t enq, deq;
3460 	uint64_t total_time = 0, start_time;
3461 	const uint16_t queue_id = tp->queue_id;
3462 	const uint16_t burst_sz = tp->op_params->burst_sz;
3463 	const uint16_t num_ops = tp->op_params->num_to_process;
3464 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3465 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3466 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3467 	struct test_buffers *bufs = NULL;
3468 	int i, j, ret;
3469 	struct rte_bbdev_info info;
3470 	uint16_t num_to_enq;
3471 
3472 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3473 			"BURST_SIZE should be <= %u", MAX_BURST);
3474 
3475 	rte_bbdev_info_get(tp->dev_id, &info);
3476 
3477 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3478 			"NUM_OPS cannot exceed %u for this device",
3479 			info.drv.queue_size_lim);
3480 
3481 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3482 
3483 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3484 
3485 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3486 			num_ops);
3487 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3488 			num_ops);
3489 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3490 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3491 				bufs->hard_outputs, ref_op);
3492 
3493 	/* Set counter to validate the ordering */
3494 	for (j = 0; j < num_ops; ++j)
3495 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3496 
3497 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3498 
3499 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3500 			for (j = 0; j < num_ops; ++j)
3501 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3502 
3503 		start_time = rte_rdtsc_precise();
3504 
3505 		for (enq = 0, deq = 0; enq < num_ops;) {
3506 			num_to_enq = burst_sz;
3507 
3508 			if (unlikely(num_ops - enq < num_to_enq))
3509 				num_to_enq = num_ops - enq;
3510 
3511 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3512 					queue_id, &ops_enq[enq], num_to_enq);
3513 
3514 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3515 					queue_id, &ops_deq[deq], enq - deq);
3516 		}
3517 
3518 		/* dequeue the remaining */
3519 		while (deq < enq) {
3520 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3521 					queue_id, &ops_deq[deq], enq - deq);
3522 		}
3523 
3524 		total_time += rte_rdtsc_precise() - start_time;
3525 	}
3526 
3527 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3528 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3529 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3530 	}
3531 
3532 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3533 
3534 	double tb_len_bits = calc_enc_TB_size(ref_op);
3535 
3536 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3537 			((double)total_time / (double)rte_get_tsc_hz());
3538 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3539 			/ 1000000.0) / ((double)total_time /
3540 			(double)rte_get_tsc_hz());
3541 
3542 	return TEST_SUCCESS;
3543 }
3544 
3545 static int
3546 throughput_pmd_lcore_ldpc_enc(void *arg)
3547 {
3548 	struct thread_params *tp = arg;
3549 	uint16_t enq, deq;
3550 	uint64_t total_time = 0, start_time;
3551 	const uint16_t queue_id = tp->queue_id;
3552 	const uint16_t burst_sz = tp->op_params->burst_sz;
3553 	const uint16_t num_ops = tp->op_params->num_to_process;
3554 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3555 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3556 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3557 	struct test_buffers *bufs = NULL;
3558 	int i, j, ret;
3559 	struct rte_bbdev_info info;
3560 	uint16_t num_to_enq;
3561 
3562 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3563 			"BURST_SIZE should be <= %u", MAX_BURST);
3564 
3565 	rte_bbdev_info_get(tp->dev_id, &info);
3566 
3567 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3568 			"NUM_OPS cannot exceed %u for this device",
3569 			info.drv.queue_size_lim);
3570 
3571 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3572 
3573 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3574 
3575 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3576 			num_ops);
3577 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3578 			num_ops);
3579 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3580 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3581 				bufs->hard_outputs, ref_op);
3582 
3583 	/* Set counter to validate the ordering */
3584 	for (j = 0; j < num_ops; ++j)
3585 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3586 
3587 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3588 
3589 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3590 			for (j = 0; j < num_ops; ++j)
3591 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3592 
3593 		start_time = rte_rdtsc_precise();
3594 
3595 		for (enq = 0, deq = 0; enq < num_ops;) {
3596 			num_to_enq = burst_sz;
3597 
3598 			if (unlikely(num_ops - enq < num_to_enq))
3599 				num_to_enq = num_ops - enq;
3600 
3601 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3602 					queue_id, &ops_enq[enq], num_to_enq);
3603 
3604 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3605 					queue_id, &ops_deq[deq], enq - deq);
3606 		}
3607 
3608 		/* dequeue the remaining */
3609 		while (deq < enq) {
3610 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3611 					queue_id, &ops_deq[deq], enq - deq);
3612 		}
3613 
3614 		total_time += rte_rdtsc_precise() - start_time;
3615 	}
3616 
3617 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3618 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3619 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3620 	}
3621 
3622 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3623 
3624 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3625 
3626 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3627 			((double)total_time / (double)rte_get_tsc_hz());
3628 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3629 			/ 1000000.0) / ((double)total_time /
3630 			(double)rte_get_tsc_hz());
3631 
3632 	return TEST_SUCCESS;
3633 }
3634 
3635 static void
3636 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3637 {
3638 	unsigned int iter = 0;
3639 	double total_mops = 0, total_mbps = 0;
3640 
3641 	for (iter = 0; iter < used_cores; iter++) {
3642 		printf(
3643 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3644 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3645 			t_params[iter].mbps);
3646 		total_mops += t_params[iter].ops_per_sec;
3647 		total_mbps += t_params[iter].mbps;
3648 	}
3649 	printf(
3650 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3651 		used_cores, total_mops, total_mbps);
3652 }
3653 
3654 /* Aggregate the performance results over the number of cores used */
3655 static void
3656 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3657 {
3658 	unsigned int core_idx = 0;
3659 	double total_mops = 0, total_mbps = 0;
3660 	uint8_t iter_count = 0;
3661 
3662 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3663 		printf(
3664 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3665 			t_params[core_idx].lcore_id,
3666 			t_params[core_idx].ops_per_sec,
3667 			t_params[core_idx].mbps,
3668 			t_params[core_idx].iter_count);
3669 		total_mops += t_params[core_idx].ops_per_sec;
3670 		total_mbps += t_params[core_idx].mbps;
3671 		iter_count = RTE_MAX(iter_count,
3672 				t_params[core_idx].iter_count);
3673 	}
3674 	printf(
3675 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3676 		used_cores, total_mops, total_mbps, iter_count);
3677 }
3678 
3679 /* Aggregate the performance results over the number of cores used */
3680 static void
3681 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3682 {
3683 	unsigned int core_idx = 0;
3684 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3685 	double snr = get_snr();
3686 
3687 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3688 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3689 				t_params[core_idx].lcore_id,
3690 				t_params[core_idx].bler * 100,
3691 				t_params[core_idx].iter_average,
3692 				t_params[core_idx].mbps,
3693 				get_vector_filename());
3694 		total_mbps += t_params[core_idx].mbps;
3695 		total_bler += t_params[core_idx].bler;
3696 		total_iter += t_params[core_idx].iter_average;
3697 	}
3698 	total_bler /= used_cores;
3699 	total_iter /= used_cores;
3700 
3701 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3702 			snr, total_bler * 100, total_iter, get_iter_max(),
3703 			total_mbps, get_vector_filename());
3704 }
3705 
3706 /*
3707  * Test function that determines BLER wireless performance
3708  */
3709 static int
3710 bler_test(struct active_device *ad,
3711 		struct test_op_params *op_params)
3712 {
3713 	int ret;
3714 	unsigned int lcore_id, used_cores = 0;
3715 	struct thread_params *t_params;
3716 	struct rte_bbdev_info info;
3717 	lcore_function_t *bler_function;
3718 	uint16_t num_lcores;
3719 	const char *op_type_str;
3720 
3721 	rte_bbdev_info_get(ad->dev_id, &info);
3722 
3723 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3724 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3725 			test_vector.op_type);
3726 
3727 	printf("+ ------------------------------------------------------- +\n");
3728 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3729 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3730 			op_params->num_to_process, op_params->num_lcores,
3731 			op_type_str,
3732 			intr_enabled ? "Interrupt mode" : "PMD mode",
3733 			(double)rte_get_tsc_hz() / 1000000000.0);
3734 
3735 	/* Set number of lcores */
3736 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3737 			? ad->nb_queues
3738 			: op_params->num_lcores;
3739 
3740 	/* Allocate memory for thread parameters structure */
3741 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3742 			RTE_CACHE_LINE_SIZE);
3743 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3744 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3745 				RTE_CACHE_LINE_SIZE));
3746 
3747 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3748 			!check_bit(test_vector.ldpc_dec.op_flags,
3749 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3750 			&& !check_bit(test_vector.ldpc_dec.op_flags,
3751 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
3752 		bler_function = bler_pmd_lcore_ldpc_dec;
3753 	else
3754 		return TEST_SKIPPED;
3755 
3756 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3757 
3758 	/* Main core is set at first entry */
3759 	t_params[0].dev_id = ad->dev_id;
3760 	t_params[0].lcore_id = rte_lcore_id();
3761 	t_params[0].op_params = op_params;
3762 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3763 	t_params[0].iter_count = 0;
3764 
3765 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3766 		if (used_cores >= num_lcores)
3767 			break;
3768 
3769 		t_params[used_cores].dev_id = ad->dev_id;
3770 		t_params[used_cores].lcore_id = lcore_id;
3771 		t_params[used_cores].op_params = op_params;
3772 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3773 		t_params[used_cores].iter_count = 0;
3774 
3775 		rte_eal_remote_launch(bler_function,
3776 				&t_params[used_cores++], lcore_id);
3777 	}
3778 
3779 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3780 	ret = bler_function(&t_params[0]);
3781 
3782 	/* Main core is always used */
3783 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3784 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3785 
3786 	print_dec_bler(t_params, num_lcores);
3787 
3788 	/* Return if test failed */
3789 	if (ret) {
3790 		rte_free(t_params);
3791 		return ret;
3792 	}
3793 
3794 	/* Function to print something  here*/
3795 	rte_free(t_params);
3796 	return ret;
3797 }
3798 
3799 /*
3800  * Test function that determines how long an enqueue + dequeue of a burst
3801  * takes on available lcores.
3802  */
3803 static int
3804 throughput_test(struct active_device *ad,
3805 		struct test_op_params *op_params)
3806 {
3807 	int ret;
3808 	unsigned int lcore_id, used_cores = 0;
3809 	struct thread_params *t_params, *tp;
3810 	struct rte_bbdev_info info;
3811 	lcore_function_t *throughput_function;
3812 	uint16_t num_lcores;
3813 	const char *op_type_str;
3814 
3815 	rte_bbdev_info_get(ad->dev_id, &info);
3816 
3817 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3818 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3819 			test_vector.op_type);
3820 
3821 	printf("+ ------------------------------------------------------- +\n");
3822 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3823 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3824 			op_params->num_to_process, op_params->num_lcores,
3825 			op_type_str,
3826 			intr_enabled ? "Interrupt mode" : "PMD mode",
3827 			(double)rte_get_tsc_hz() / 1000000000.0);
3828 
3829 	/* Set number of lcores */
3830 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3831 			? ad->nb_queues
3832 			: op_params->num_lcores;
3833 
3834 	/* Allocate memory for thread parameters structure */
3835 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3836 			RTE_CACHE_LINE_SIZE);
3837 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3838 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3839 				RTE_CACHE_LINE_SIZE));
3840 
3841 	if (intr_enabled) {
3842 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3843 			throughput_function = throughput_intr_lcore_dec;
3844 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3845 			throughput_function = throughput_intr_lcore_ldpc_dec;
3846 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3847 			throughput_function = throughput_intr_lcore_enc;
3848 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3849 			throughput_function = throughput_intr_lcore_ldpc_enc;
3850 		else
3851 			throughput_function = throughput_intr_lcore_enc;
3852 
3853 		/* Dequeue interrupt callback registration */
3854 		ret = rte_bbdev_callback_register(ad->dev_id,
3855 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3856 				t_params);
3857 		if (ret < 0) {
3858 			rte_free(t_params);
3859 			return ret;
3860 		}
3861 	} else {
3862 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3863 			throughput_function = throughput_pmd_lcore_dec;
3864 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3865 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3866 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3867 			throughput_function = throughput_pmd_lcore_enc;
3868 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3869 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3870 		else
3871 			throughput_function = throughput_pmd_lcore_enc;
3872 	}
3873 
3874 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3875 
3876 	/* Main core is set at first entry */
3877 	t_params[0].dev_id = ad->dev_id;
3878 	t_params[0].lcore_id = rte_lcore_id();
3879 	t_params[0].op_params = op_params;
3880 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3881 	t_params[0].iter_count = 0;
3882 
3883 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3884 		if (used_cores >= num_lcores)
3885 			break;
3886 
3887 		t_params[used_cores].dev_id = ad->dev_id;
3888 		t_params[used_cores].lcore_id = lcore_id;
3889 		t_params[used_cores].op_params = op_params;
3890 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3891 		t_params[used_cores].iter_count = 0;
3892 
3893 		rte_eal_remote_launch(throughput_function,
3894 				&t_params[used_cores++], lcore_id);
3895 	}
3896 
3897 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3898 	ret = throughput_function(&t_params[0]);
3899 
3900 	/* Main core is always used */
3901 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3902 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3903 
3904 	/* Return if test failed */
3905 	if (ret) {
3906 		rte_free(t_params);
3907 		return ret;
3908 	}
3909 
3910 	/* Print throughput if interrupts are disabled and test passed */
3911 	if (!intr_enabled) {
3912 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3913 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3914 			print_dec_throughput(t_params, num_lcores);
3915 		else
3916 			print_enc_throughput(t_params, num_lcores);
3917 		rte_free(t_params);
3918 		return ret;
3919 	}
3920 
3921 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3922 	 * all pending operations. Skip waiting for queues which reported an
3923 	 * error using processing_status variable.
3924 	 * Wait for main lcore operations.
3925 	 */
3926 	tp = &t_params[0];
3927 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3928 		op_params->num_to_process) &&
3929 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3930 		TEST_FAILED))
3931 		rte_pause();
3932 
3933 	tp->ops_per_sec /= TEST_REPETITIONS;
3934 	tp->mbps /= TEST_REPETITIONS;
3935 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3936 
3937 	/* Wait for worker lcores operations */
3938 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3939 		tp = &t_params[used_cores];
3940 
3941 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3942 			op_params->num_to_process) &&
3943 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3944 			TEST_FAILED))
3945 			rte_pause();
3946 
3947 		tp->ops_per_sec /= TEST_REPETITIONS;
3948 		tp->mbps /= TEST_REPETITIONS;
3949 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3950 	}
3951 
3952 	/* Print throughput if test passed */
3953 	if (!ret) {
3954 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3955 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3956 			print_dec_throughput(t_params, num_lcores);
3957 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3958 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3959 			print_enc_throughput(t_params, num_lcores);
3960 	}
3961 
3962 	rte_free(t_params);
3963 	return ret;
3964 }
3965 
3966 static int
3967 latency_test_dec(struct rte_mempool *mempool,
3968 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3969 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3970 		const uint16_t num_to_process, uint16_t burst_sz,
3971 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3972 {
3973 	int ret = TEST_SUCCESS;
3974 	uint16_t i, j, dequeued;
3975 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3976 	uint64_t start_time = 0, last_time = 0;
3977 
3978 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3979 		uint16_t enq = 0, deq = 0;
3980 		bool first_time = true;
3981 		last_time = 0;
3982 
3983 		if (unlikely(num_to_process - dequeued < burst_sz))
3984 			burst_sz = num_to_process - dequeued;
3985 
3986 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3987 		TEST_ASSERT_SUCCESS(ret,
3988 				"rte_bbdev_dec_op_alloc_bulk() failed");
3989 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3990 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3991 					bufs->inputs,
3992 					bufs->hard_outputs,
3993 					bufs->soft_outputs,
3994 					ref_op);
3995 
3996 		/* Set counter to validate the ordering */
3997 		for (j = 0; j < burst_sz; ++j)
3998 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3999 
4000 		start_time = rte_rdtsc_precise();
4001 
4002 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4003 				burst_sz);
4004 		TEST_ASSERT(enq == burst_sz,
4005 				"Error enqueueing burst, expected %u, got %u",
4006 				burst_sz, enq);
4007 
4008 		/* Dequeue */
4009 		do {
4010 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4011 					&ops_deq[deq], burst_sz - deq);
4012 			if (likely(first_time && (deq > 0))) {
4013 				last_time = rte_rdtsc_precise() - start_time;
4014 				first_time = false;
4015 			}
4016 		} while (unlikely(burst_sz != deq));
4017 
4018 		*max_time = RTE_MAX(*max_time, last_time);
4019 		*min_time = RTE_MIN(*min_time, last_time);
4020 		*total_time += last_time;
4021 
4022 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4023 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4024 					vector_mask);
4025 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4026 		}
4027 
4028 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4029 		dequeued += deq;
4030 	}
4031 
4032 	return i;
4033 }
4034 
4035 /* Test case for latency/validation for LDPC Decoder */
4036 static int
4037 latency_test_ldpc_dec(struct rte_mempool *mempool,
4038 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4039 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4040 		const uint16_t num_to_process, uint16_t burst_sz,
4041 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4042 		bool disable_et)
4043 {
4044 	int ret = TEST_SUCCESS;
4045 	uint16_t i, j, dequeued;
4046 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4047 	uint64_t start_time = 0, last_time = 0;
4048 	bool extDdr = ldpc_cap_flags &
4049 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4050 
4051 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4052 		uint16_t enq = 0, deq = 0;
4053 		bool first_time = true;
4054 		last_time = 0;
4055 
4056 		if (unlikely(num_to_process - dequeued < burst_sz))
4057 			burst_sz = num_to_process - dequeued;
4058 
4059 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4060 		TEST_ASSERT_SUCCESS(ret,
4061 				"rte_bbdev_dec_op_alloc_bulk() failed");
4062 
4063 		/* For latency tests we need to disable early termination */
4064 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4065 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4066 			ref_op->ldpc_dec.op_flags -=
4067 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4068 		ref_op->ldpc_dec.iter_max = get_iter_max();
4069 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4070 
4071 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4072 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4073 					bufs->inputs,
4074 					bufs->hard_outputs,
4075 					bufs->soft_outputs,
4076 					bufs->harq_inputs,
4077 					bufs->harq_outputs,
4078 					ref_op);
4079 
4080 		if (extDdr)
4081 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4082 					burst_sz, true);
4083 
4084 		/* Set counter to validate the ordering */
4085 		for (j = 0; j < burst_sz; ++j)
4086 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4087 
4088 		start_time = rte_rdtsc_precise();
4089 
4090 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4091 				&ops_enq[enq], burst_sz);
4092 		TEST_ASSERT(enq == burst_sz,
4093 				"Error enqueueing burst, expected %u, got %u",
4094 				burst_sz, enq);
4095 
4096 		/* Dequeue */
4097 		do {
4098 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4099 					&ops_deq[deq], burst_sz - deq);
4100 			if (likely(first_time && (deq > 0))) {
4101 				last_time = rte_rdtsc_precise() - start_time;
4102 				first_time = false;
4103 			}
4104 		} while (unlikely(burst_sz != deq));
4105 
4106 		*max_time = RTE_MAX(*max_time, last_time);
4107 		*min_time = RTE_MIN(*min_time, last_time);
4108 		*total_time += last_time;
4109 
4110 		if (extDdr)
4111 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4112 
4113 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4114 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4115 					vector_mask);
4116 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4117 		}
4118 
4119 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4120 		dequeued += deq;
4121 	}
4122 	return i;
4123 }
4124 
4125 static int
4126 latency_test_enc(struct rte_mempool *mempool,
4127 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4128 		uint16_t dev_id, uint16_t queue_id,
4129 		const uint16_t num_to_process, uint16_t burst_sz,
4130 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4131 {
4132 	int ret = TEST_SUCCESS;
4133 	uint16_t i, j, dequeued;
4134 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4135 	uint64_t start_time = 0, last_time = 0;
4136 
4137 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4138 		uint16_t enq = 0, deq = 0;
4139 		bool first_time = true;
4140 		last_time = 0;
4141 
4142 		if (unlikely(num_to_process - dequeued < burst_sz))
4143 			burst_sz = num_to_process - dequeued;
4144 
4145 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4146 		TEST_ASSERT_SUCCESS(ret,
4147 				"rte_bbdev_enc_op_alloc_bulk() failed");
4148 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4149 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4150 					bufs->inputs,
4151 					bufs->hard_outputs,
4152 					ref_op);
4153 
4154 		/* Set counter to validate the ordering */
4155 		for (j = 0; j < burst_sz; ++j)
4156 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4157 
4158 		start_time = rte_rdtsc_precise();
4159 
4160 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4161 				burst_sz);
4162 		TEST_ASSERT(enq == burst_sz,
4163 				"Error enqueueing burst, expected %u, got %u",
4164 				burst_sz, enq);
4165 
4166 		/* Dequeue */
4167 		do {
4168 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4169 					&ops_deq[deq], burst_sz - deq);
4170 			if (likely(first_time && (deq > 0))) {
4171 				last_time += rte_rdtsc_precise() - start_time;
4172 				first_time = false;
4173 			}
4174 		} while (unlikely(burst_sz != deq));
4175 
4176 		*max_time = RTE_MAX(*max_time, last_time);
4177 		*min_time = RTE_MIN(*min_time, last_time);
4178 		*total_time += last_time;
4179 
4180 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4181 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4182 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4183 		}
4184 
4185 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4186 		dequeued += deq;
4187 	}
4188 
4189 	return i;
4190 }
4191 
4192 static int
4193 latency_test_ldpc_enc(struct rte_mempool *mempool,
4194 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4195 		uint16_t dev_id, uint16_t queue_id,
4196 		const uint16_t num_to_process, uint16_t burst_sz,
4197 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4198 {
4199 	int ret = TEST_SUCCESS;
4200 	uint16_t i, j, dequeued;
4201 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4202 	uint64_t start_time = 0, last_time = 0;
4203 
4204 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4205 		uint16_t enq = 0, deq = 0;
4206 		bool first_time = true;
4207 		last_time = 0;
4208 
4209 		if (unlikely(num_to_process - dequeued < burst_sz))
4210 			burst_sz = num_to_process - dequeued;
4211 
4212 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4213 		TEST_ASSERT_SUCCESS(ret,
4214 				"rte_bbdev_enc_op_alloc_bulk() failed");
4215 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4216 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4217 					bufs->inputs,
4218 					bufs->hard_outputs,
4219 					ref_op);
4220 
4221 		/* Set counter to validate the ordering */
4222 		for (j = 0; j < burst_sz; ++j)
4223 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4224 
4225 		start_time = rte_rdtsc_precise();
4226 
4227 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4228 				&ops_enq[enq], burst_sz);
4229 		TEST_ASSERT(enq == burst_sz,
4230 				"Error enqueueing burst, expected %u, got %u",
4231 				burst_sz, enq);
4232 
4233 		/* Dequeue */
4234 		do {
4235 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4236 					&ops_deq[deq], burst_sz - deq);
4237 			if (likely(first_time && (deq > 0))) {
4238 				last_time += rte_rdtsc_precise() - start_time;
4239 				first_time = false;
4240 			}
4241 		} while (unlikely(burst_sz != deq));
4242 
4243 		*max_time = RTE_MAX(*max_time, last_time);
4244 		*min_time = RTE_MIN(*min_time, last_time);
4245 		*total_time += last_time;
4246 
4247 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4248 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4249 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4250 		}
4251 
4252 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4253 		dequeued += deq;
4254 	}
4255 
4256 	return i;
4257 }
4258 
4259 /* Common function for running validation and latency test cases */
4260 static int
4261 validation_latency_test(struct active_device *ad,
4262 		struct test_op_params *op_params, bool latency_flag)
4263 {
4264 	int iter;
4265 	uint16_t burst_sz = op_params->burst_sz;
4266 	const uint16_t num_to_process = op_params->num_to_process;
4267 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4268 	const uint16_t queue_id = ad->queue_ids[0];
4269 	struct test_buffers *bufs = NULL;
4270 	struct rte_bbdev_info info;
4271 	uint64_t total_time, min_time, max_time;
4272 	const char *op_type_str;
4273 
4274 	total_time = max_time = 0;
4275 	min_time = UINT64_MAX;
4276 
4277 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4278 			"BURST_SIZE should be <= %u", MAX_BURST);
4279 
4280 	rte_bbdev_info_get(ad->dev_id, &info);
4281 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4282 
4283 	op_type_str = rte_bbdev_op_type_str(op_type);
4284 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4285 
4286 	printf("+ ------------------------------------------------------- +\n");
4287 	if (latency_flag)
4288 		printf("== test: latency\ndev:");
4289 	else
4290 		printf("== test: validation\ndev:");
4291 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4292 			info.dev_name, burst_sz, num_to_process, op_type_str);
4293 
4294 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4295 		iter = latency_test_dec(op_params->mp, bufs,
4296 				op_params->ref_dec_op, op_params->vector_mask,
4297 				ad->dev_id, queue_id, num_to_process,
4298 				burst_sz, &total_time, &min_time, &max_time);
4299 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4300 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4301 				op_params->ref_enc_op, ad->dev_id, queue_id,
4302 				num_to_process, burst_sz, &total_time,
4303 				&min_time, &max_time);
4304 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4305 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4306 				op_params->ref_dec_op, op_params->vector_mask,
4307 				ad->dev_id, queue_id, num_to_process,
4308 				burst_sz, &total_time, &min_time, &max_time,
4309 				latency_flag);
4310 	else /* RTE_BBDEV_OP_TURBO_ENC */
4311 		iter = latency_test_enc(op_params->mp, bufs,
4312 				op_params->ref_enc_op,
4313 				ad->dev_id, queue_id,
4314 				num_to_process, burst_sz, &total_time,
4315 				&min_time, &max_time);
4316 
4317 	if (iter <= 0)
4318 		return TEST_FAILED;
4319 
4320 	printf("Operation latency:\n"
4321 			"\tavg: %lg cycles, %lg us\n"
4322 			"\tmin: %lg cycles, %lg us\n"
4323 			"\tmax: %lg cycles, %lg us\n",
4324 			(double)total_time / (double)iter,
4325 			(double)(total_time * 1000000) / (double)iter /
4326 			(double)rte_get_tsc_hz(), (double)min_time,
4327 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4328 			(double)max_time, (double)(max_time * 1000000) /
4329 			(double)rte_get_tsc_hz());
4330 
4331 	return TEST_SUCCESS;
4332 }
4333 
4334 static int
4335 latency_test(struct active_device *ad, struct test_op_params *op_params)
4336 {
4337 	return validation_latency_test(ad, op_params, true);
4338 }
4339 
4340 static int
4341 validation_test(struct active_device *ad, struct test_op_params *op_params)
4342 {
4343 	return validation_latency_test(ad, op_params, false);
4344 }
4345 
4346 #ifdef RTE_BBDEV_OFFLOAD_COST
4347 static int
4348 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4349 		struct rte_bbdev_stats *stats)
4350 {
4351 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4352 	struct rte_bbdev_stats *q_stats;
4353 
4354 	if (queue_id >= dev->data->num_queues)
4355 		return -1;
4356 
4357 	q_stats = &dev->data->queues[queue_id].queue_stats;
4358 
4359 	stats->enqueued_count = q_stats->enqueued_count;
4360 	stats->dequeued_count = q_stats->dequeued_count;
4361 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4362 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4363 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4364 
4365 	return 0;
4366 }
4367 
4368 static int
4369 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4370 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4371 		uint16_t queue_id, const uint16_t num_to_process,
4372 		uint16_t burst_sz, struct test_time_stats *time_st)
4373 {
4374 	int i, dequeued, ret;
4375 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4376 	uint64_t enq_start_time, deq_start_time;
4377 	uint64_t enq_sw_last_time, deq_last_time;
4378 	struct rte_bbdev_stats stats;
4379 
4380 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4381 		uint16_t enq = 0, deq = 0;
4382 
4383 		if (unlikely(num_to_process - dequeued < burst_sz))
4384 			burst_sz = num_to_process - dequeued;
4385 
4386 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4387 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4388 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4389 					bufs->inputs,
4390 					bufs->hard_outputs,
4391 					bufs->soft_outputs,
4392 					ref_op);
4393 
4394 		/* Start time meas for enqueue function offload latency */
4395 		enq_start_time = rte_rdtsc_precise();
4396 		do {
4397 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4398 					&ops_enq[enq], burst_sz - enq);
4399 		} while (unlikely(burst_sz != enq));
4400 
4401 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4402 		TEST_ASSERT_SUCCESS(ret,
4403 				"Failed to get stats for queue (%u) of device (%u)",
4404 				queue_id, dev_id);
4405 
4406 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4407 				stats.acc_offload_cycles;
4408 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4409 				enq_sw_last_time);
4410 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4411 				enq_sw_last_time);
4412 		time_st->enq_sw_total_time += enq_sw_last_time;
4413 
4414 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4415 				stats.acc_offload_cycles);
4416 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4417 				stats.acc_offload_cycles);
4418 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4419 
4420 		/* give time for device to process ops */
4421 		rte_delay_us(WAIT_OFFLOAD_US);
4422 
4423 		/* Start time meas for dequeue function offload latency */
4424 		deq_start_time = rte_rdtsc_precise();
4425 		/* Dequeue one operation */
4426 		do {
4427 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4428 					&ops_deq[deq], enq);
4429 		} while (unlikely(deq == 0));
4430 
4431 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4432 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4433 				deq_last_time);
4434 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4435 				deq_last_time);
4436 		time_st->deq_total_time += deq_last_time;
4437 
4438 		/* Dequeue remaining operations if needed*/
4439 		while (burst_sz != deq)
4440 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4441 					&ops_deq[deq], burst_sz - deq);
4442 
4443 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4444 		dequeued += deq;
4445 	}
4446 
4447 	return i;
4448 }
4449 
4450 static int
4451 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4452 		struct test_buffers *bufs,
4453 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4454 		uint16_t queue_id, const uint16_t num_to_process,
4455 		uint16_t burst_sz, struct test_time_stats *time_st)
4456 {
4457 	int i, dequeued, ret;
4458 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4459 	uint64_t enq_start_time, deq_start_time;
4460 	uint64_t enq_sw_last_time, deq_last_time;
4461 	struct rte_bbdev_stats stats;
4462 	bool extDdr = ldpc_cap_flags &
4463 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4464 
4465 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4466 		uint16_t enq = 0, deq = 0;
4467 
4468 		if (unlikely(num_to_process - dequeued < burst_sz))
4469 			burst_sz = num_to_process - dequeued;
4470 
4471 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4472 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4473 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4474 					bufs->inputs,
4475 					bufs->hard_outputs,
4476 					bufs->soft_outputs,
4477 					bufs->harq_inputs,
4478 					bufs->harq_outputs,
4479 					ref_op);
4480 
4481 		if (extDdr)
4482 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4483 					burst_sz, true);
4484 
4485 		/* Start time meas for enqueue function offload latency */
4486 		enq_start_time = rte_rdtsc_precise();
4487 		do {
4488 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4489 					&ops_enq[enq], burst_sz - enq);
4490 		} while (unlikely(burst_sz != enq));
4491 
4492 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4493 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4494 		TEST_ASSERT_SUCCESS(ret,
4495 				"Failed to get stats for queue (%u) of device (%u)",
4496 				queue_id, dev_id);
4497 
4498 		enq_sw_last_time -= stats.acc_offload_cycles;
4499 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4500 				enq_sw_last_time);
4501 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4502 				enq_sw_last_time);
4503 		time_st->enq_sw_total_time += enq_sw_last_time;
4504 
4505 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4506 				stats.acc_offload_cycles);
4507 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4508 				stats.acc_offload_cycles);
4509 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4510 
4511 		/* give time for device to process ops */
4512 		rte_delay_us(WAIT_OFFLOAD_US);
4513 
4514 		/* Start time meas for dequeue function offload latency */
4515 		deq_start_time = rte_rdtsc_precise();
4516 		/* Dequeue one operation */
4517 		do {
4518 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4519 					&ops_deq[deq], enq);
4520 		} while (unlikely(deq == 0));
4521 
4522 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4523 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4524 				deq_last_time);
4525 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4526 				deq_last_time);
4527 		time_st->deq_total_time += deq_last_time;
4528 
4529 		/* Dequeue remaining operations if needed*/
4530 		while (burst_sz != deq)
4531 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4532 					&ops_deq[deq], burst_sz - deq);
4533 
4534 		if (extDdr) {
4535 			/* Read loopback is not thread safe */
4536 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4537 		}
4538 
4539 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4540 		dequeued += deq;
4541 	}
4542 
4543 	return i;
4544 }
4545 
4546 static int
4547 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4548 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4549 		uint16_t queue_id, const uint16_t num_to_process,
4550 		uint16_t burst_sz, struct test_time_stats *time_st)
4551 {
4552 	int i, dequeued, ret;
4553 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4554 	uint64_t enq_start_time, deq_start_time;
4555 	uint64_t enq_sw_last_time, deq_last_time;
4556 	struct rte_bbdev_stats stats;
4557 
4558 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4559 		uint16_t enq = 0, deq = 0;
4560 
4561 		if (unlikely(num_to_process - dequeued < burst_sz))
4562 			burst_sz = num_to_process - dequeued;
4563 
4564 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4565 		TEST_ASSERT_SUCCESS(ret,
4566 				"rte_bbdev_enc_op_alloc_bulk() failed");
4567 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4568 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4569 					bufs->inputs,
4570 					bufs->hard_outputs,
4571 					ref_op);
4572 
4573 		/* Start time meas for enqueue function offload latency */
4574 		enq_start_time = rte_rdtsc_precise();
4575 		do {
4576 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4577 					&ops_enq[enq], burst_sz - enq);
4578 		} while (unlikely(burst_sz != enq));
4579 
4580 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4581 
4582 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4583 		TEST_ASSERT_SUCCESS(ret,
4584 				"Failed to get stats for queue (%u) of device (%u)",
4585 				queue_id, dev_id);
4586 		enq_sw_last_time -= stats.acc_offload_cycles;
4587 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4588 				enq_sw_last_time);
4589 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4590 				enq_sw_last_time);
4591 		time_st->enq_sw_total_time += enq_sw_last_time;
4592 
4593 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4594 				stats.acc_offload_cycles);
4595 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4596 				stats.acc_offload_cycles);
4597 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4598 
4599 		/* give time for device to process ops */
4600 		rte_delay_us(WAIT_OFFLOAD_US);
4601 
4602 		/* Start time meas for dequeue function offload latency */
4603 		deq_start_time = rte_rdtsc_precise();
4604 		/* Dequeue one operation */
4605 		do {
4606 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4607 					&ops_deq[deq], enq);
4608 		} while (unlikely(deq == 0));
4609 
4610 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4611 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4612 				deq_last_time);
4613 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4614 				deq_last_time);
4615 		time_st->deq_total_time += deq_last_time;
4616 
4617 		while (burst_sz != deq)
4618 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4619 					&ops_deq[deq], burst_sz - deq);
4620 
4621 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4622 		dequeued += deq;
4623 	}
4624 
4625 	return i;
4626 }
4627 
4628 static int
4629 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4630 		struct test_buffers *bufs,
4631 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4632 		uint16_t queue_id, const uint16_t num_to_process,
4633 		uint16_t burst_sz, struct test_time_stats *time_st)
4634 {
4635 	int i, dequeued, ret;
4636 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4637 	uint64_t enq_start_time, deq_start_time;
4638 	uint64_t enq_sw_last_time, deq_last_time;
4639 	struct rte_bbdev_stats stats;
4640 
4641 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4642 		uint16_t enq = 0, deq = 0;
4643 
4644 		if (unlikely(num_to_process - dequeued < burst_sz))
4645 			burst_sz = num_to_process - dequeued;
4646 
4647 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4648 		TEST_ASSERT_SUCCESS(ret,
4649 				"rte_bbdev_enc_op_alloc_bulk() failed");
4650 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4651 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4652 					bufs->inputs,
4653 					bufs->hard_outputs,
4654 					ref_op);
4655 
4656 		/* Start time meas for enqueue function offload latency */
4657 		enq_start_time = rte_rdtsc_precise();
4658 		do {
4659 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4660 					&ops_enq[enq], burst_sz - enq);
4661 		} while (unlikely(burst_sz != enq));
4662 
4663 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4664 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4665 		TEST_ASSERT_SUCCESS(ret,
4666 				"Failed to get stats for queue (%u) of device (%u)",
4667 				queue_id, dev_id);
4668 
4669 		enq_sw_last_time -= stats.acc_offload_cycles;
4670 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4671 				enq_sw_last_time);
4672 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4673 				enq_sw_last_time);
4674 		time_st->enq_sw_total_time += enq_sw_last_time;
4675 
4676 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4677 				stats.acc_offload_cycles);
4678 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4679 				stats.acc_offload_cycles);
4680 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4681 
4682 		/* give time for device to process ops */
4683 		rte_delay_us(WAIT_OFFLOAD_US);
4684 
4685 		/* Start time meas for dequeue function offload latency */
4686 		deq_start_time = rte_rdtsc_precise();
4687 		/* Dequeue one operation */
4688 		do {
4689 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4690 					&ops_deq[deq], enq);
4691 		} while (unlikely(deq == 0));
4692 
4693 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4694 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4695 				deq_last_time);
4696 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4697 				deq_last_time);
4698 		time_st->deq_total_time += deq_last_time;
4699 
4700 		while (burst_sz != deq)
4701 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4702 					&ops_deq[deq], burst_sz - deq);
4703 
4704 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4705 		dequeued += deq;
4706 	}
4707 
4708 	return i;
4709 }
4710 #endif
4711 
4712 static int
4713 offload_cost_test(struct active_device *ad,
4714 		struct test_op_params *op_params)
4715 {
4716 #ifndef RTE_BBDEV_OFFLOAD_COST
4717 	RTE_SET_USED(ad);
4718 	RTE_SET_USED(op_params);
4719 	printf("Offload latency test is disabled.\n");
4720 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4721 	return TEST_SKIPPED;
4722 #else
4723 	int iter;
4724 	uint16_t burst_sz = op_params->burst_sz;
4725 	const uint16_t num_to_process = op_params->num_to_process;
4726 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4727 	const uint16_t queue_id = ad->queue_ids[0];
4728 	struct test_buffers *bufs = NULL;
4729 	struct rte_bbdev_info info;
4730 	const char *op_type_str;
4731 	struct test_time_stats time_st;
4732 
4733 	memset(&time_st, 0, sizeof(struct test_time_stats));
4734 	time_st.enq_sw_min_time = UINT64_MAX;
4735 	time_st.enq_acc_min_time = UINT64_MAX;
4736 	time_st.deq_min_time = UINT64_MAX;
4737 
4738 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4739 			"BURST_SIZE should be <= %u", MAX_BURST);
4740 
4741 	rte_bbdev_info_get(ad->dev_id, &info);
4742 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4743 
4744 	op_type_str = rte_bbdev_op_type_str(op_type);
4745 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4746 
4747 	printf("+ ------------------------------------------------------- +\n");
4748 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4749 			info.dev_name, burst_sz, num_to_process, op_type_str);
4750 
4751 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4752 		iter = offload_latency_test_dec(op_params->mp, bufs,
4753 				op_params->ref_dec_op, ad->dev_id, queue_id,
4754 				num_to_process, burst_sz, &time_st);
4755 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4756 		iter = offload_latency_test_enc(op_params->mp, bufs,
4757 				op_params->ref_enc_op, ad->dev_id, queue_id,
4758 				num_to_process, burst_sz, &time_st);
4759 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4760 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4761 				op_params->ref_enc_op, ad->dev_id, queue_id,
4762 				num_to_process, burst_sz, &time_st);
4763 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4764 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4765 			op_params->ref_dec_op, ad->dev_id, queue_id,
4766 			num_to_process, burst_sz, &time_st);
4767 	else
4768 		iter = offload_latency_test_enc(op_params->mp, bufs,
4769 				op_params->ref_enc_op, ad->dev_id, queue_id,
4770 				num_to_process, burst_sz, &time_st);
4771 
4772 	if (iter <= 0)
4773 		return TEST_FAILED;
4774 
4775 	printf("Enqueue driver offload cost latency:\n"
4776 			"\tavg: %lg cycles, %lg us\n"
4777 			"\tmin: %lg cycles, %lg us\n"
4778 			"\tmax: %lg cycles, %lg us\n"
4779 			"Enqueue accelerator offload cost latency:\n"
4780 			"\tavg: %lg cycles, %lg us\n"
4781 			"\tmin: %lg cycles, %lg us\n"
4782 			"\tmax: %lg cycles, %lg us\n",
4783 			(double)time_st.enq_sw_total_time / (double)iter,
4784 			(double)(time_st.enq_sw_total_time * 1000000) /
4785 			(double)iter / (double)rte_get_tsc_hz(),
4786 			(double)time_st.enq_sw_min_time,
4787 			(double)(time_st.enq_sw_min_time * 1000000) /
4788 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4789 			(double)(time_st.enq_sw_max_time * 1000000) /
4790 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4791 			(double)iter,
4792 			(double)(time_st.enq_acc_total_time * 1000000) /
4793 			(double)iter / (double)rte_get_tsc_hz(),
4794 			(double)time_st.enq_acc_min_time,
4795 			(double)(time_st.enq_acc_min_time * 1000000) /
4796 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4797 			(double)(time_st.enq_acc_max_time * 1000000) /
4798 			rte_get_tsc_hz());
4799 
4800 	printf("Dequeue offload cost latency - one op:\n"
4801 			"\tavg: %lg cycles, %lg us\n"
4802 			"\tmin: %lg cycles, %lg us\n"
4803 			"\tmax: %lg cycles, %lg us\n",
4804 			(double)time_st.deq_total_time / (double)iter,
4805 			(double)(time_st.deq_total_time * 1000000) /
4806 			(double)iter / (double)rte_get_tsc_hz(),
4807 			(double)time_st.deq_min_time,
4808 			(double)(time_st.deq_min_time * 1000000) /
4809 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4810 			(double)(time_st.deq_max_time * 1000000) /
4811 			rte_get_tsc_hz());
4812 
4813 	struct rte_bbdev_stats stats = {0};
4814 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4815 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4816 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4817 				"Mismatch in enqueue count %10"PRIu64" %d",
4818 				stats.enqueued_count, num_to_process);
4819 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4820 				"Mismatch in dequeue count %10"PRIu64" %d",
4821 				stats.dequeued_count, num_to_process);
4822 	}
4823 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4824 			"Enqueue count Error %10"PRIu64"",
4825 			stats.enqueue_err_count);
4826 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4827 			"Dequeue count Error (%10"PRIu64"",
4828 			stats.dequeue_err_count);
4829 
4830 	return TEST_SUCCESS;
4831 #endif
4832 }
4833 
4834 #ifdef RTE_BBDEV_OFFLOAD_COST
4835 static int
4836 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4837 		const uint16_t num_to_process, uint16_t burst_sz,
4838 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4839 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4840 {
4841 	int i, deq_total;
4842 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4843 	uint64_t deq_start_time, deq_last_time;
4844 
4845 	/* Test deq offload latency from an empty queue */
4846 
4847 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4848 			++i, deq_total += burst_sz) {
4849 		deq_start_time = rte_rdtsc_precise();
4850 
4851 		if (unlikely(num_to_process - deq_total < burst_sz))
4852 			burst_sz = num_to_process - deq_total;
4853 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4854 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4855 					burst_sz);
4856 		else
4857 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4858 					burst_sz);
4859 
4860 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4861 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4862 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4863 		*deq_total_time += deq_last_time;
4864 	}
4865 
4866 	return i;
4867 }
4868 
4869 static int
4870 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4871 		const uint16_t num_to_process, uint16_t burst_sz,
4872 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4873 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4874 {
4875 	int i, deq_total;
4876 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4877 	uint64_t deq_start_time, deq_last_time;
4878 
4879 	/* Test deq offload latency from an empty queue */
4880 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4881 			++i, deq_total += burst_sz) {
4882 		deq_start_time = rte_rdtsc_precise();
4883 
4884 		if (unlikely(num_to_process - deq_total < burst_sz))
4885 			burst_sz = num_to_process - deq_total;
4886 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4887 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4888 					burst_sz);
4889 		else
4890 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4891 					burst_sz);
4892 
4893 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4894 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4895 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4896 		*deq_total_time += deq_last_time;
4897 	}
4898 
4899 	return i;
4900 }
4901 
4902 #endif
4903 
4904 static int
4905 offload_latency_empty_q_test(struct active_device *ad,
4906 		struct test_op_params *op_params)
4907 {
4908 #ifndef RTE_BBDEV_OFFLOAD_COST
4909 	RTE_SET_USED(ad);
4910 	RTE_SET_USED(op_params);
4911 	printf("Offload latency empty dequeue test is disabled.\n");
4912 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4913 	return TEST_SKIPPED;
4914 #else
4915 	int iter;
4916 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4917 	uint16_t burst_sz = op_params->burst_sz;
4918 	const uint16_t num_to_process = op_params->num_to_process;
4919 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4920 	const uint16_t queue_id = ad->queue_ids[0];
4921 	struct rte_bbdev_info info;
4922 	const char *op_type_str;
4923 
4924 	deq_total_time = deq_max_time = 0;
4925 	deq_min_time = UINT64_MAX;
4926 
4927 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4928 			"BURST_SIZE should be <= %u", MAX_BURST);
4929 
4930 	rte_bbdev_info_get(ad->dev_id, &info);
4931 
4932 	op_type_str = rte_bbdev_op_type_str(op_type);
4933 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4934 
4935 	printf("+ ------------------------------------------------------- +\n");
4936 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4937 			info.dev_name, burst_sz, num_to_process, op_type_str);
4938 
4939 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4940 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4941 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4942 				num_to_process, burst_sz, &deq_total_time,
4943 				&deq_min_time, &deq_max_time, op_type);
4944 	else
4945 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4946 				num_to_process, burst_sz, &deq_total_time,
4947 				&deq_min_time, &deq_max_time, op_type);
4948 
4949 	if (iter <= 0)
4950 		return TEST_FAILED;
4951 
4952 	printf("Empty dequeue offload:\n"
4953 			"\tavg: %lg cycles, %lg us\n"
4954 			"\tmin: %lg cycles, %lg us\n"
4955 			"\tmax: %lg cycles, %lg us\n",
4956 			(double)deq_total_time / (double)iter,
4957 			(double)(deq_total_time * 1000000) / (double)iter /
4958 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4959 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4960 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4961 			rte_get_tsc_hz());
4962 
4963 	return TEST_SUCCESS;
4964 #endif
4965 }
4966 
4967 static int
4968 bler_tc(void)
4969 {
4970 	return run_test_case(bler_test);
4971 }
4972 
4973 static int
4974 throughput_tc(void)
4975 {
4976 	return run_test_case(throughput_test);
4977 }
4978 
4979 static int
4980 offload_cost_tc(void)
4981 {
4982 	return run_test_case(offload_cost_test);
4983 }
4984 
4985 static int
4986 offload_latency_empty_q_tc(void)
4987 {
4988 	return run_test_case(offload_latency_empty_q_test);
4989 }
4990 
4991 static int
4992 latency_tc(void)
4993 {
4994 	return run_test_case(latency_test);
4995 }
4996 
4997 static int
4998 validation_tc(void)
4999 {
5000 	return run_test_case(validation_test);
5001 }
5002 
5003 static int
5004 interrupt_tc(void)
5005 {
5006 	return run_test_case(throughput_test);
5007 }
5008 
5009 static struct unit_test_suite bbdev_bler_testsuite = {
5010 	.suite_name = "BBdev BLER Tests",
5011 	.setup = testsuite_setup,
5012 	.teardown = testsuite_teardown,
5013 	.unit_test_cases = {
5014 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5015 		TEST_CASES_END() /**< NULL terminate unit test array */
5016 	}
5017 };
5018 
5019 static struct unit_test_suite bbdev_throughput_testsuite = {
5020 	.suite_name = "BBdev Throughput Tests",
5021 	.setup = testsuite_setup,
5022 	.teardown = testsuite_teardown,
5023 	.unit_test_cases = {
5024 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5025 		TEST_CASES_END() /**< NULL terminate unit test array */
5026 	}
5027 };
5028 
5029 static struct unit_test_suite bbdev_validation_testsuite = {
5030 	.suite_name = "BBdev Validation Tests",
5031 	.setup = testsuite_setup,
5032 	.teardown = testsuite_teardown,
5033 	.unit_test_cases = {
5034 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5035 		TEST_CASES_END() /**< NULL terminate unit test array */
5036 	}
5037 };
5038 
5039 static struct unit_test_suite bbdev_latency_testsuite = {
5040 	.suite_name = "BBdev Latency Tests",
5041 	.setup = testsuite_setup,
5042 	.teardown = testsuite_teardown,
5043 	.unit_test_cases = {
5044 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5045 		TEST_CASES_END() /**< NULL terminate unit test array */
5046 	}
5047 };
5048 
5049 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5050 	.suite_name = "BBdev Offload Cost Tests",
5051 	.setup = testsuite_setup,
5052 	.teardown = testsuite_teardown,
5053 	.unit_test_cases = {
5054 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5055 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5056 		TEST_CASES_END() /**< NULL terminate unit test array */
5057 	}
5058 };
5059 
5060 static struct unit_test_suite bbdev_interrupt_testsuite = {
5061 	.suite_name = "BBdev Interrupt Tests",
5062 	.setup = interrupt_testsuite_setup,
5063 	.teardown = testsuite_teardown,
5064 	.unit_test_cases = {
5065 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5066 		TEST_CASES_END() /**< NULL terminate unit test array */
5067 	}
5068 };
5069 
5070 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5071 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5072 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5073 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5074 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5075 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5076