xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision bbbe38a6d59ccdda25917712701e629d0b10af6f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 100
28 #define WAIT_OFFLOAD_US 1000
29 
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42 
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #define FLR_5G_TIMEOUT 610
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67 
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70 
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74 
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89 
90 static struct test_bbdev_vector test_vector;
91 
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94 
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100 
101 /* Represents tested active devices */
102 static struct active_device {
103 	const char *driver_name;
104 	uint8_t dev_id;
105 	uint16_t supported_ops;
106 	uint16_t queue_ids[MAX_QUEUES];
107 	uint16_t nb_queues;
108 	struct rte_mempool *ops_mempool;
109 	struct rte_mempool *in_mbuf_pool;
110 	struct rte_mempool *hard_out_mbuf_pool;
111 	struct rte_mempool *soft_out_mbuf_pool;
112 	struct rte_mempool *harq_in_mbuf_pool;
113 	struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115 
116 static uint8_t nb_active_devs;
117 
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120 	struct rte_bbdev_op_data *inputs;
121 	struct rte_bbdev_op_data *hard_outputs;
122 	struct rte_bbdev_op_data *soft_outputs;
123 	struct rte_bbdev_op_data *harq_inputs;
124 	struct rte_bbdev_op_data *harq_outputs;
125 };
126 
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129 	struct rte_mempool *mp;
130 	struct rte_bbdev_dec_op *ref_dec_op;
131 	struct rte_bbdev_enc_op *ref_enc_op;
132 	uint16_t burst_sz;
133 	uint16_t num_to_process;
134 	uint16_t num_lcores;
135 	int vector_mask;
136 	rte_atomic16_t sync;
137 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139 
140 /* Contains per lcore params */
141 struct thread_params {
142 	uint8_t dev_id;
143 	uint16_t queue_id;
144 	uint32_t lcore_id;
145 	uint64_t start_time;
146 	double ops_per_sec;
147 	double mbps;
148 	uint8_t iter_count;
149 	double iter_average;
150 	double bler;
151 	rte_atomic16_t nb_dequeued;
152 	rte_atomic16_t processing_status;
153 	rte_atomic16_t burst_sz;
154 	struct test_op_params *op_params;
155 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158 
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162 	/* Stores software enqueue total working time */
163 	uint64_t enq_sw_total_time;
164 	/* Stores minimum value of software enqueue working time */
165 	uint64_t enq_sw_min_time;
166 	/* Stores maximum value of software enqueue working time */
167 	uint64_t enq_sw_max_time;
168 	/* Stores turbo enqueue total working time */
169 	uint64_t enq_acc_total_time;
170 	/* Stores minimum value of accelerator enqueue working time */
171 	uint64_t enq_acc_min_time;
172 	/* Stores maximum value of accelerator enqueue working time */
173 	uint64_t enq_acc_max_time;
174 	/* Stores dequeue total working time */
175 	uint64_t deq_total_time;
176 	/* Stores minimum value of dequeue working time */
177 	uint64_t deq_min_time;
178 	/* Stores maximum value of dequeue working time */
179 	uint64_t deq_max_time;
180 };
181 #endif
182 
183 typedef int (test_case_function)(struct active_device *ad,
184 		struct test_op_params *op_params);
185 
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189 	m->pkt_len = 0;
190 
191 	do {
192 		m->data_len = 0;
193 		m = m->next;
194 	} while (m != NULL);
195 }
196 
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201 	return bitmap & bitmask;
202 }
203 
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207 	ad->supported_ops |= (1 << op_type);
208 }
209 
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213 	return ad->supported_ops & (1 << op_type);
214 }
215 
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219 	return (flags_req & flags_present) == flags_req;
220 }
221 
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229 
230 static int
231 check_dev_cap(const struct rte_bbdev_info *dev_info)
232 {
233 	unsigned int i;
234 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
235 		nb_harq_inputs, nb_harq_outputs;
236 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
237 
238 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
239 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
240 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
241 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
242 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
243 
244 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
245 		if (op_cap->type != test_vector.op_type)
246 			continue;
247 
248 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
249 			const struct rte_bbdev_op_cap_turbo_dec *cap =
250 					&op_cap->cap.turbo_dec;
251 			/* Ignore lack of soft output capability, just skip
252 			 * checking if soft output is valid.
253 			 */
254 			if ((test_vector.turbo_dec.op_flags &
255 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
256 					!(cap->capability_flags &
257 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
258 				printf(
259 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
260 					dev_info->dev_name);
261 				clear_soft_out_cap(
262 					&test_vector.turbo_dec.op_flags);
263 			}
264 
265 			if (!flags_match(test_vector.turbo_dec.op_flags,
266 					cap->capability_flags))
267 				return TEST_FAILED;
268 			if (nb_inputs > cap->num_buffers_src) {
269 				printf("Too many inputs defined: %u, max: %u\n",
270 					nb_inputs, cap->num_buffers_src);
271 				return TEST_FAILED;
272 			}
273 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
274 					(test_vector.turbo_dec.op_flags &
275 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
276 				printf(
277 					"Too many soft outputs defined: %u, max: %u\n",
278 						nb_soft_outputs,
279 						cap->num_buffers_soft_out);
280 				return TEST_FAILED;
281 			}
282 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
283 				printf(
284 					"Too many hard outputs defined: %u, max: %u\n",
285 						nb_hard_outputs,
286 						cap->num_buffers_hard_out);
287 				return TEST_FAILED;
288 			}
289 			if (intr_enabled && !(cap->capability_flags &
290 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
291 				printf(
292 					"Dequeue interrupts are not supported!\n");
293 				return TEST_FAILED;
294 			}
295 
296 			return TEST_SUCCESS;
297 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
298 			const struct rte_bbdev_op_cap_turbo_enc *cap =
299 					&op_cap->cap.turbo_enc;
300 
301 			if (!flags_match(test_vector.turbo_enc.op_flags,
302 					cap->capability_flags))
303 				return TEST_FAILED;
304 			if (nb_inputs > cap->num_buffers_src) {
305 				printf("Too many inputs defined: %u, max: %u\n",
306 					nb_inputs, cap->num_buffers_src);
307 				return TEST_FAILED;
308 			}
309 			if (nb_hard_outputs > cap->num_buffers_dst) {
310 				printf(
311 					"Too many hard outputs defined: %u, max: %u\n",
312 					nb_hard_outputs, cap->num_buffers_dst);
313 				return TEST_FAILED;
314 			}
315 			if (intr_enabled && !(cap->capability_flags &
316 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
317 				printf(
318 					"Dequeue interrupts are not supported!\n");
319 				return TEST_FAILED;
320 			}
321 
322 			return TEST_SUCCESS;
323 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
324 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
325 					&op_cap->cap.ldpc_enc;
326 
327 			if (!flags_match(test_vector.ldpc_enc.op_flags,
328 					cap->capability_flags)){
329 				printf("Flag Mismatch\n");
330 				return TEST_FAILED;
331 			}
332 			if (nb_inputs > cap->num_buffers_src) {
333 				printf("Too many inputs defined: %u, max: %u\n",
334 					nb_inputs, cap->num_buffers_src);
335 				return TEST_FAILED;
336 			}
337 			if (nb_hard_outputs > cap->num_buffers_dst) {
338 				printf(
339 					"Too many hard outputs defined: %u, max: %u\n",
340 					nb_hard_outputs, cap->num_buffers_dst);
341 				return TEST_FAILED;
342 			}
343 			if (intr_enabled && !(cap->capability_flags &
344 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
345 				printf(
346 					"Dequeue interrupts are not supported!\n");
347 				return TEST_FAILED;
348 			}
349 
350 			return TEST_SUCCESS;
351 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
352 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
353 					&op_cap->cap.ldpc_dec;
354 
355 			if (!flags_match(test_vector.ldpc_dec.op_flags,
356 					cap->capability_flags)){
357 				printf("Flag Mismatch\n");
358 				return TEST_FAILED;
359 			}
360 			if (nb_inputs > cap->num_buffers_src) {
361 				printf("Too many inputs defined: %u, max: %u\n",
362 					nb_inputs, cap->num_buffers_src);
363 				return TEST_FAILED;
364 			}
365 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
366 				printf(
367 					"Too many hard outputs defined: %u, max: %u\n",
368 					nb_hard_outputs,
369 					cap->num_buffers_hard_out);
370 				return TEST_FAILED;
371 			}
372 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
373 				printf(
374 					"Too many HARQ inputs defined: %u, max: %u\n",
375 					nb_harq_inputs,
376 					cap->num_buffers_hard_out);
377 				return TEST_FAILED;
378 			}
379 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
380 				printf(
381 					"Too many HARQ outputs defined: %u, max: %u\n",
382 					nb_harq_outputs,
383 					cap->num_buffers_hard_out);
384 				return TEST_FAILED;
385 			}
386 			if (intr_enabled && !(cap->capability_flags &
387 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
388 				printf(
389 					"Dequeue interrupts are not supported!\n");
390 				return TEST_FAILED;
391 			}
392 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
393 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
394 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
395 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
396 					))) {
397 				printf("Skip loop-back with interrupt\n");
398 				return TEST_FAILED;
399 			}
400 			return TEST_SUCCESS;
401 		}
402 	}
403 
404 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
405 		return TEST_SUCCESS; /* Special case for NULL device */
406 
407 	return TEST_FAILED;
408 }
409 
410 /* calculates optimal mempool size not smaller than the val */
411 static unsigned int
412 optimal_mempool_size(unsigned int val)
413 {
414 	return rte_align32pow2(val + 1) - 1;
415 }
416 
417 /* allocates mbuf mempool for inputs and outputs */
418 static struct rte_mempool *
419 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
420 		int socket_id, unsigned int mbuf_pool_size,
421 		const char *op_type_str)
422 {
423 	unsigned int i;
424 	uint32_t max_seg_sz = 0;
425 	char pool_name[RTE_MEMPOOL_NAMESIZE];
426 
427 	/* find max input segment size */
428 	for (i = 0; i < entries->nb_segments; ++i)
429 		if (entries->segments[i].length > max_seg_sz)
430 			max_seg_sz = entries->segments[i].length;
431 
432 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
433 			dev_id);
434 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
435 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
436 					+ FILLER_HEADROOM,
437 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
438 }
439 
440 static int
441 create_mempools(struct active_device *ad, int socket_id,
442 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
443 {
444 	struct rte_mempool *mp;
445 	unsigned int ops_pool_size, mbuf_pool_size = 0;
446 	char pool_name[RTE_MEMPOOL_NAMESIZE];
447 	const char *op_type_str;
448 	enum rte_bbdev_op_type op_type = org_op_type;
449 
450 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
451 	struct op_data_entries *hard_out =
452 			&test_vector.entries[DATA_HARD_OUTPUT];
453 	struct op_data_entries *soft_out =
454 			&test_vector.entries[DATA_SOFT_OUTPUT];
455 	struct op_data_entries *harq_in =
456 			&test_vector.entries[DATA_HARQ_INPUT];
457 	struct op_data_entries *harq_out =
458 			&test_vector.entries[DATA_HARQ_OUTPUT];
459 
460 	/* allocate ops mempool */
461 	ops_pool_size = optimal_mempool_size(RTE_MAX(
462 			/* Ops used plus 1 reference op */
463 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
464 			/* Minimal cache size plus 1 reference op */
465 			(unsigned int)(1.5 * rte_lcore_count() *
466 					OPS_CACHE_SIZE + 1)),
467 			OPS_POOL_SIZE_MIN));
468 
469 	if (org_op_type == RTE_BBDEV_OP_NONE)
470 		op_type = RTE_BBDEV_OP_TURBO_ENC;
471 
472 	op_type_str = rte_bbdev_op_type_str(op_type);
473 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
474 
475 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476 			ad->dev_id);
477 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
478 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
479 	TEST_ASSERT_NOT_NULL(mp,
480 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
481 			ops_pool_size,
482 			ad->dev_id,
483 			socket_id);
484 	ad->ops_mempool = mp;
485 
486 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
487 	if (org_op_type == RTE_BBDEV_OP_NONE)
488 		return TEST_SUCCESS;
489 
490 	/* Inputs */
491 	if (in->nb_segments > 0) {
492 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
493 				in->nb_segments);
494 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
495 				mbuf_pool_size, "in");
496 		TEST_ASSERT_NOT_NULL(mp,
497 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
498 				mbuf_pool_size,
499 				ad->dev_id,
500 				socket_id);
501 		ad->in_mbuf_pool = mp;
502 	}
503 
504 	/* Hard outputs */
505 	if (hard_out->nb_segments > 0) {
506 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
507 				hard_out->nb_segments);
508 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
509 				mbuf_pool_size,
510 				"hard_out");
511 		TEST_ASSERT_NOT_NULL(mp,
512 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
513 				mbuf_pool_size,
514 				ad->dev_id,
515 				socket_id);
516 		ad->hard_out_mbuf_pool = mp;
517 	}
518 
519 	/* Soft outputs */
520 	if (soft_out->nb_segments > 0) {
521 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
522 				soft_out->nb_segments);
523 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
524 				mbuf_pool_size,
525 				"soft_out");
526 		TEST_ASSERT_NOT_NULL(mp,
527 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
528 				mbuf_pool_size,
529 				ad->dev_id,
530 				socket_id);
531 		ad->soft_out_mbuf_pool = mp;
532 	}
533 
534 	/* HARQ inputs */
535 	if (harq_in->nb_segments > 0) {
536 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
537 				harq_in->nb_segments);
538 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
539 				mbuf_pool_size,
540 				"harq_in");
541 		TEST_ASSERT_NOT_NULL(mp,
542 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
543 				mbuf_pool_size,
544 				ad->dev_id,
545 				socket_id);
546 		ad->harq_in_mbuf_pool = mp;
547 	}
548 
549 	/* HARQ outputs */
550 	if (harq_out->nb_segments > 0) {
551 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
552 				harq_out->nb_segments);
553 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
554 				mbuf_pool_size,
555 				"harq_out");
556 		TEST_ASSERT_NOT_NULL(mp,
557 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
558 				mbuf_pool_size,
559 				ad->dev_id,
560 				socket_id);
561 		ad->harq_out_mbuf_pool = mp;
562 	}
563 
564 	return TEST_SUCCESS;
565 }
566 
567 static int
568 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
569 		struct test_bbdev_vector *vector)
570 {
571 	int ret;
572 	unsigned int queue_id;
573 	struct rte_bbdev_queue_conf qconf;
574 	struct active_device *ad = &active_devs[nb_active_devs];
575 	unsigned int nb_queues;
576 	enum rte_bbdev_op_type op_type = vector->op_type;
577 
578 /* Configure fpga lte fec with PF & VF values
579  * if '-i' flag is set and using fpga device
580  */
581 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
582 	if ((get_init_device() == true) &&
583 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
584 		struct rte_fpga_lte_fec_conf conf;
585 		unsigned int i;
586 
587 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
588 				info->drv.driver_name);
589 
590 		/* clear default configuration before initialization */
591 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
592 
593 		/* Set PF mode :
594 		 * true if PF is used for data plane
595 		 * false for VFs
596 		 */
597 		conf.pf_mode_en = true;
598 
599 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
600 			/* Number of UL queues per VF (fpga supports 8 VFs) */
601 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
602 			/* Number of DL queues per VF (fpga supports 8 VFs) */
603 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
604 		}
605 
606 		/* UL bandwidth. Needed for schedule algorithm */
607 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
608 		/* DL bandwidth */
609 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
610 
611 		/* UL & DL load Balance Factor to 64 */
612 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
613 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
614 
615 		/**< FLR timeout value */
616 		conf.flr_time_out = FLR_4G_TIMEOUT;
617 
618 		/* setup FPGA PF with configuration information */
619 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
620 		TEST_ASSERT_SUCCESS(ret,
621 				"Failed to configure 4G FPGA PF for bbdev %s",
622 				info->dev_name);
623 	}
624 #endif
625 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
626 	if ((get_init_device() == true) &&
627 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
628 		struct rte_fpga_5gnr_fec_conf conf;
629 		unsigned int i;
630 
631 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
632 				info->drv.driver_name);
633 
634 		/* clear default configuration before initialization */
635 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
636 
637 		/* Set PF mode :
638 		 * true if PF is used for data plane
639 		 * false for VFs
640 		 */
641 		conf.pf_mode_en = true;
642 
643 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
644 			/* Number of UL queues per VF (fpga supports 8 VFs) */
645 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
646 			/* Number of DL queues per VF (fpga supports 8 VFs) */
647 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
648 		}
649 
650 		/* UL bandwidth. Needed for schedule algorithm */
651 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
652 		/* DL bandwidth */
653 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
654 
655 		/* UL & DL load Balance Factor to 64 */
656 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
657 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
658 
659 		/**< FLR timeout value */
660 		conf.flr_time_out = FLR_5G_TIMEOUT;
661 
662 		/* setup FPGA PF with configuration information */
663 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
664 		TEST_ASSERT_SUCCESS(ret,
665 				"Failed to configure 5G FPGA PF for bbdev %s",
666 				info->dev_name);
667 	}
668 #endif
669 #ifdef RTE_BASEBAND_ACC100
670 	if ((get_init_device() == true) &&
671 		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
672 		struct rte_acc100_conf conf;
673 		unsigned int i;
674 
675 		printf("Configure ACC100 FEC Driver %s with default values\n",
676 				info->drv.driver_name);
677 
678 		/* clear default configuration before initialization */
679 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
680 
681 		/* Always set in PF mode for built-in configuration */
682 		conf.pf_mode_en = true;
683 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
684 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
685 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
686 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
687 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
688 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
689 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
690 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
691 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
692 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
693 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
694 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
695 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
696 		}
697 
698 		conf.input_pos_llr_1_bit = true;
699 		conf.output_pos_llr_1_bit = true;
700 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
701 
702 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
703 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
704 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
705 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
706 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
707 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
708 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
709 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
710 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
711 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
712 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
713 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
714 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
715 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
716 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
717 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
718 
719 		/* setup PF with configuration information */
720 		ret = rte_acc100_configure(info->dev_name, &conf);
721 		TEST_ASSERT_SUCCESS(ret,
722 				"Failed to configure ACC100 PF for bbdev %s",
723 				info->dev_name);
724 	}
725 #endif
726 	/* Let's refresh this now this is configured */
727 	rte_bbdev_info_get(dev_id, info);
728 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
729 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
730 
731 	/* setup device */
732 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
733 	if (ret < 0) {
734 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
735 				dev_id, nb_queues, info->socket_id, ret);
736 		return TEST_FAILED;
737 	}
738 
739 	/* configure interrupts if needed */
740 	if (intr_enabled) {
741 		ret = rte_bbdev_intr_enable(dev_id);
742 		if (ret < 0) {
743 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
744 					ret);
745 			return TEST_FAILED;
746 		}
747 	}
748 
749 	/* setup device queues */
750 	qconf.socket = info->socket_id;
751 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
752 	qconf.priority = 0;
753 	qconf.deferred_start = 0;
754 	qconf.op_type = op_type;
755 
756 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
757 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
758 		if (ret != 0) {
759 			printf(
760 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
761 					queue_id, qconf.priority, dev_id);
762 			qconf.priority++;
763 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
764 					&qconf);
765 		}
766 		if (ret != 0) {
767 			printf("All queues on dev %u allocated: %u\n",
768 					dev_id, queue_id);
769 			break;
770 		}
771 		ad->queue_ids[queue_id] = queue_id;
772 	}
773 	TEST_ASSERT(queue_id != 0,
774 			"ERROR Failed to configure any queues on dev %u",
775 			dev_id);
776 	ad->nb_queues = queue_id;
777 
778 	set_avail_op(ad, op_type);
779 
780 	return TEST_SUCCESS;
781 }
782 
783 static int
784 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
785 		struct test_bbdev_vector *vector)
786 {
787 	int ret;
788 
789 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
790 	active_devs[nb_active_devs].dev_id = dev_id;
791 
792 	ret = add_bbdev_dev(dev_id, info, vector);
793 	if (ret == TEST_SUCCESS)
794 		++nb_active_devs;
795 	return ret;
796 }
797 
798 static uint8_t
799 populate_active_devices(void)
800 {
801 	int ret;
802 	uint8_t dev_id;
803 	uint8_t nb_devs_added = 0;
804 	struct rte_bbdev_info info;
805 
806 	RTE_BBDEV_FOREACH(dev_id) {
807 		rte_bbdev_info_get(dev_id, &info);
808 
809 		if (check_dev_cap(&info)) {
810 			printf(
811 				"Device %d (%s) does not support specified capabilities\n",
812 					dev_id, info.dev_name);
813 			continue;
814 		}
815 
816 		ret = add_active_device(dev_id, &info, &test_vector);
817 		if (ret != 0) {
818 			printf("Adding active bbdev %s skipped\n",
819 					info.dev_name);
820 			continue;
821 		}
822 		nb_devs_added++;
823 	}
824 
825 	return nb_devs_added;
826 }
827 
828 static int
829 read_test_vector(void)
830 {
831 	int ret;
832 
833 	memset(&test_vector, 0, sizeof(test_vector));
834 	printf("Test vector file = %s\n", get_vector_filename());
835 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
836 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
837 			get_vector_filename());
838 
839 	return TEST_SUCCESS;
840 }
841 
842 static int
843 testsuite_setup(void)
844 {
845 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
846 
847 	if (populate_active_devices() == 0) {
848 		printf("No suitable devices found!\n");
849 		return TEST_SKIPPED;
850 	}
851 
852 	return TEST_SUCCESS;
853 }
854 
855 static int
856 interrupt_testsuite_setup(void)
857 {
858 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
859 
860 	/* Enable interrupts */
861 	intr_enabled = true;
862 
863 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
864 	if (populate_active_devices() == 0 ||
865 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
866 		intr_enabled = false;
867 		printf("No suitable devices found!\n");
868 		return TEST_SKIPPED;
869 	}
870 
871 	return TEST_SUCCESS;
872 }
873 
874 static void
875 testsuite_teardown(void)
876 {
877 	uint8_t dev_id;
878 
879 	/* Unconfigure devices */
880 	RTE_BBDEV_FOREACH(dev_id)
881 		rte_bbdev_close(dev_id);
882 
883 	/* Clear active devices structs. */
884 	memset(active_devs, 0, sizeof(active_devs));
885 	nb_active_devs = 0;
886 
887 	/* Disable interrupts */
888 	intr_enabled = false;
889 }
890 
891 static int
892 ut_setup(void)
893 {
894 	uint8_t i, dev_id;
895 
896 	for (i = 0; i < nb_active_devs; i++) {
897 		dev_id = active_devs[i].dev_id;
898 		/* reset bbdev stats */
899 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
900 				"Failed to reset stats of bbdev %u", dev_id);
901 		/* start the device */
902 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
903 				"Failed to start bbdev %u", dev_id);
904 	}
905 
906 	return TEST_SUCCESS;
907 }
908 
909 static void
910 ut_teardown(void)
911 {
912 	uint8_t i, dev_id;
913 	struct rte_bbdev_stats stats;
914 
915 	for (i = 0; i < nb_active_devs; i++) {
916 		dev_id = active_devs[i].dev_id;
917 		/* read stats and print */
918 		rte_bbdev_stats_get(dev_id, &stats);
919 		/* Stop the device */
920 		rte_bbdev_stop(dev_id);
921 	}
922 }
923 
924 static int
925 init_op_data_objs(struct rte_bbdev_op_data *bufs,
926 		struct op_data_entries *ref_entries,
927 		struct rte_mempool *mbuf_pool, const uint16_t n,
928 		enum op_data_type op_type, uint16_t min_alignment)
929 {
930 	int ret;
931 	unsigned int i, j;
932 	bool large_input = false;
933 
934 	for (i = 0; i < n; ++i) {
935 		char *data;
936 		struct op_data_buf *seg = &ref_entries->segments[0];
937 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
938 		TEST_ASSERT_NOT_NULL(m_head,
939 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
940 				op_type, n * ref_entries->nb_segments,
941 				mbuf_pool->size);
942 
943 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
944 			/*
945 			 * Special case when DPDK mbuf cannot handle
946 			 * the required input size
947 			 */
948 			printf("Warning: Larger input size than DPDK mbuf %d\n",
949 					seg->length);
950 			large_input = true;
951 		}
952 		bufs[i].data = m_head;
953 		bufs[i].offset = 0;
954 		bufs[i].length = 0;
955 
956 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
957 			if ((op_type == DATA_INPUT) && large_input) {
958 				/* Allocate a fake overused mbuf */
959 				data = rte_malloc(NULL, seg->length, 0);
960 				TEST_ASSERT_NOT_NULL(data,
961 					"rte malloc failed with %u bytes",
962 					seg->length);
963 				memcpy(data, seg->addr, seg->length);
964 				m_head->buf_addr = data;
965 				m_head->buf_iova = rte_malloc_virt2iova(data);
966 				m_head->data_off = 0;
967 				m_head->data_len = seg->length;
968 			} else {
969 				data = rte_pktmbuf_append(m_head, seg->length);
970 				TEST_ASSERT_NOT_NULL(data,
971 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
972 					seg->length, op_type);
973 
974 				TEST_ASSERT(data == RTE_PTR_ALIGN(
975 						data, min_alignment),
976 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
977 					data, min_alignment);
978 				rte_memcpy(data, seg->addr, seg->length);
979 			}
980 
981 			bufs[i].length += seg->length;
982 
983 			for (j = 1; j < ref_entries->nb_segments; ++j) {
984 				struct rte_mbuf *m_tail =
985 						rte_pktmbuf_alloc(mbuf_pool);
986 				TEST_ASSERT_NOT_NULL(m_tail,
987 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
988 						op_type,
989 						n * ref_entries->nb_segments,
990 						mbuf_pool->size);
991 				seg += 1;
992 
993 				data = rte_pktmbuf_append(m_tail, seg->length);
994 				TEST_ASSERT_NOT_NULL(data,
995 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
996 						seg->length, op_type);
997 
998 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
999 						min_alignment),
1000 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1001 						data, min_alignment);
1002 				rte_memcpy(data, seg->addr, seg->length);
1003 				bufs[i].length += seg->length;
1004 
1005 				ret = rte_pktmbuf_chain(m_head, m_tail);
1006 				TEST_ASSERT_SUCCESS(ret,
1007 						"Couldn't chain mbufs from %d data type mbuf pool",
1008 						op_type);
1009 			}
1010 		} else {
1011 
1012 			/* allocate chained-mbuf for output buffer */
1013 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1014 				struct rte_mbuf *m_tail =
1015 						rte_pktmbuf_alloc(mbuf_pool);
1016 				TEST_ASSERT_NOT_NULL(m_tail,
1017 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1018 						op_type,
1019 						n * ref_entries->nb_segments,
1020 						mbuf_pool->size);
1021 
1022 				ret = rte_pktmbuf_chain(m_head, m_tail);
1023 				TEST_ASSERT_SUCCESS(ret,
1024 						"Couldn't chain mbufs from %d data type mbuf pool",
1025 						op_type);
1026 			}
1027 		}
1028 	}
1029 
1030 	return 0;
1031 }
1032 
1033 static int
1034 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1035 		const int socket)
1036 {
1037 	int i;
1038 
1039 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1040 	if (*buffers == NULL) {
1041 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1042 				socket);
1043 		/* try to allocate memory on other detected sockets */
1044 		for (i = 0; i < socket; i++) {
1045 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1046 			if (*buffers != NULL)
1047 				break;
1048 		}
1049 	}
1050 
1051 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1052 }
1053 
1054 static void
1055 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1056 		const uint16_t n, const int8_t max_llr_modulus)
1057 {
1058 	uint16_t i, byte_idx;
1059 
1060 	for (i = 0; i < n; ++i) {
1061 		struct rte_mbuf *m = input_ops[i].data;
1062 		while (m != NULL) {
1063 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1064 					input_ops[i].offset);
1065 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1066 					++byte_idx)
1067 				llr[byte_idx] = round((double)max_llr_modulus *
1068 						llr[byte_idx] / INT8_MAX);
1069 
1070 			m = m->next;
1071 		}
1072 	}
1073 }
1074 
1075 /*
1076  * We may have to insert filler bits
1077  * when they are required by the HARQ assumption
1078  */
1079 static void
1080 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1081 		const uint16_t n, struct test_op_params *op_params)
1082 {
1083 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1084 
1085 	if (input_ops == NULL)
1086 		return;
1087 	/* No need to add filler if not required by device */
1088 	if (!(ldpc_cap_flags &
1089 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1090 		return;
1091 	/* No need to add filler for loopback operation */
1092 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1093 		return;
1094 
1095 	uint16_t i, j, parity_offset;
1096 	for (i = 0; i < n; ++i) {
1097 		struct rte_mbuf *m = input_ops[i].data;
1098 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1099 				input_ops[i].offset);
1100 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1101 				* dec.z_c - dec.n_filler;
1102 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1103 		m->data_len = new_hin_size;
1104 		input_ops[i].length = new_hin_size;
1105 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1106 				j--)
1107 			llr[j] = llr[j - dec.n_filler];
1108 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1109 		for (j = 0; j < dec.n_filler; j++)
1110 			llr[parity_offset + j] = llr_max_pre_scaling;
1111 	}
1112 }
1113 
1114 static void
1115 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1116 		const uint16_t n, const int8_t llr_size,
1117 		const int8_t llr_decimals)
1118 {
1119 	if (input_ops == NULL)
1120 		return;
1121 
1122 	uint16_t i, byte_idx;
1123 
1124 	int16_t llr_max, llr_min, llr_tmp;
1125 	llr_max = (1 << (llr_size - 1)) - 1;
1126 	llr_min = -llr_max;
1127 	for (i = 0; i < n; ++i) {
1128 		struct rte_mbuf *m = input_ops[i].data;
1129 		while (m != NULL) {
1130 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1131 					input_ops[i].offset);
1132 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1133 					++byte_idx) {
1134 
1135 				llr_tmp = llr[byte_idx];
1136 				if (llr_decimals == 4)
1137 					llr_tmp *= 8;
1138 				else if (llr_decimals == 2)
1139 					llr_tmp *= 2;
1140 				else if (llr_decimals == 0)
1141 					llr_tmp /= 2;
1142 				llr_tmp = RTE_MIN(llr_max,
1143 						RTE_MAX(llr_min, llr_tmp));
1144 				llr[byte_idx] = (int8_t) llr_tmp;
1145 			}
1146 
1147 			m = m->next;
1148 		}
1149 	}
1150 }
1151 
1152 
1153 
1154 static int
1155 fill_queue_buffers(struct test_op_params *op_params,
1156 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1157 		struct rte_mempool *soft_out_mp,
1158 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1159 		uint16_t queue_id,
1160 		const struct rte_bbdev_op_cap *capabilities,
1161 		uint16_t min_alignment, const int socket_id)
1162 {
1163 	int ret;
1164 	enum op_data_type type;
1165 	const uint16_t n = op_params->num_to_process;
1166 
1167 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1168 		in_mp,
1169 		soft_out_mp,
1170 		hard_out_mp,
1171 		harq_in_mp,
1172 		harq_out_mp,
1173 	};
1174 
1175 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1176 		&op_params->q_bufs[socket_id][queue_id].inputs,
1177 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1178 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1179 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1180 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1181 	};
1182 
1183 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1184 		struct op_data_entries *ref_entries =
1185 				&test_vector.entries[type];
1186 		if (ref_entries->nb_segments == 0)
1187 			continue;
1188 
1189 		ret = allocate_buffers_on_socket(queue_ops[type],
1190 				n * sizeof(struct rte_bbdev_op_data),
1191 				socket_id);
1192 		TEST_ASSERT_SUCCESS(ret,
1193 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1194 
1195 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1196 				mbuf_pools[type], n, type, min_alignment);
1197 		TEST_ASSERT_SUCCESS(ret,
1198 				"Couldn't init rte_bbdev_op_data structs");
1199 	}
1200 
1201 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1202 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1203 			capabilities->cap.turbo_dec.max_llr_modulus);
1204 
1205 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1206 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1207 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1208 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1209 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1210 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1211 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1212 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1213 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1214 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1215 		if (!loopback && !llr_comp)
1216 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1217 					ldpc_llr_size, ldpc_llr_decimals);
1218 		if (!loopback && !harq_comp)
1219 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1220 					ldpc_llr_size, ldpc_llr_decimals);
1221 		if (!loopback)
1222 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1223 					op_params);
1224 	}
1225 
1226 	return 0;
1227 }
1228 
1229 static void
1230 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1231 {
1232 	unsigned int i, j;
1233 
1234 	rte_mempool_free(ad->ops_mempool);
1235 	rte_mempool_free(ad->in_mbuf_pool);
1236 	rte_mempool_free(ad->hard_out_mbuf_pool);
1237 	rte_mempool_free(ad->soft_out_mbuf_pool);
1238 	rte_mempool_free(ad->harq_in_mbuf_pool);
1239 	rte_mempool_free(ad->harq_out_mbuf_pool);
1240 
1241 	for (i = 0; i < rte_lcore_count(); ++i) {
1242 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1243 			rte_free(op_params->q_bufs[j][i].inputs);
1244 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1245 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1246 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1247 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1248 		}
1249 	}
1250 }
1251 
1252 static void
1253 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1254 		unsigned int start_idx,
1255 		struct rte_bbdev_op_data *inputs,
1256 		struct rte_bbdev_op_data *hard_outputs,
1257 		struct rte_bbdev_op_data *soft_outputs,
1258 		struct rte_bbdev_dec_op *ref_op)
1259 {
1260 	unsigned int i;
1261 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1262 
1263 	for (i = 0; i < n; ++i) {
1264 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1265 			ops[i]->turbo_dec.tb_params.ea =
1266 					turbo_dec->tb_params.ea;
1267 			ops[i]->turbo_dec.tb_params.eb =
1268 					turbo_dec->tb_params.eb;
1269 			ops[i]->turbo_dec.tb_params.k_pos =
1270 					turbo_dec->tb_params.k_pos;
1271 			ops[i]->turbo_dec.tb_params.k_neg =
1272 					turbo_dec->tb_params.k_neg;
1273 			ops[i]->turbo_dec.tb_params.c =
1274 					turbo_dec->tb_params.c;
1275 			ops[i]->turbo_dec.tb_params.c_neg =
1276 					turbo_dec->tb_params.c_neg;
1277 			ops[i]->turbo_dec.tb_params.cab =
1278 					turbo_dec->tb_params.cab;
1279 			ops[i]->turbo_dec.tb_params.r =
1280 					turbo_dec->tb_params.r;
1281 		} else {
1282 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1283 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1284 		}
1285 
1286 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1287 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1288 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1289 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1290 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1291 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1292 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1293 
1294 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1295 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1296 		if (soft_outputs != NULL)
1297 			ops[i]->turbo_dec.soft_output =
1298 				soft_outputs[start_idx + i];
1299 	}
1300 }
1301 
1302 static void
1303 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1304 		unsigned int start_idx,
1305 		struct rte_bbdev_op_data *inputs,
1306 		struct rte_bbdev_op_data *outputs,
1307 		struct rte_bbdev_enc_op *ref_op)
1308 {
1309 	unsigned int i;
1310 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1311 	for (i = 0; i < n; ++i) {
1312 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1313 			ops[i]->turbo_enc.tb_params.ea =
1314 					turbo_enc->tb_params.ea;
1315 			ops[i]->turbo_enc.tb_params.eb =
1316 					turbo_enc->tb_params.eb;
1317 			ops[i]->turbo_enc.tb_params.k_pos =
1318 					turbo_enc->tb_params.k_pos;
1319 			ops[i]->turbo_enc.tb_params.k_neg =
1320 					turbo_enc->tb_params.k_neg;
1321 			ops[i]->turbo_enc.tb_params.c =
1322 					turbo_enc->tb_params.c;
1323 			ops[i]->turbo_enc.tb_params.c_neg =
1324 					turbo_enc->tb_params.c_neg;
1325 			ops[i]->turbo_enc.tb_params.cab =
1326 					turbo_enc->tb_params.cab;
1327 			ops[i]->turbo_enc.tb_params.ncb_pos =
1328 					turbo_enc->tb_params.ncb_pos;
1329 			ops[i]->turbo_enc.tb_params.ncb_neg =
1330 					turbo_enc->tb_params.ncb_neg;
1331 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1332 		} else {
1333 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1334 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1335 			ops[i]->turbo_enc.cb_params.ncb =
1336 					turbo_enc->cb_params.ncb;
1337 		}
1338 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1339 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1340 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1341 
1342 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1343 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1344 	}
1345 }
1346 
1347 
1348 /* Returns a random number drawn from a normal distribution
1349  * with mean of 0 and variance of 1
1350  * Marsaglia algorithm
1351  */
1352 static double
1353 randn(int n)
1354 {
1355 	double S, Z, U1, U2, u, v, fac;
1356 
1357 	do {
1358 		U1 = (double)rand() / RAND_MAX;
1359 		U2 = (double)rand() / RAND_MAX;
1360 		u = 2. * U1 - 1.;
1361 		v = 2. * U2 - 1.;
1362 		S = u * u + v * v;
1363 	} while (S >= 1 || S == 0);
1364 	fac = sqrt(-2. * log(S) / S);
1365 	Z = (n % 2) ? u * fac : v * fac;
1366 	return Z;
1367 }
1368 
1369 static inline double
1370 maxstar(double A, double B)
1371 {
1372 	if (fabs(A - B) > 5)
1373 		return RTE_MAX(A, B);
1374 	else
1375 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1376 }
1377 
1378 /*
1379  * Generate Qm LLRS for Qm==8
1380  * Modulation, AWGN and LLR estimation from max log development
1381  */
1382 static void
1383 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1384 {
1385 	int qm = 8;
1386 	int qam = 256;
1387 	int m, k;
1388 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1389 	/* 5.1.4 of TS38.211 */
1390 	const double symbols_I[256] = {
1391 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1392 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1393 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1394 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1395 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1396 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1397 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1398 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1399 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1400 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1401 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1402 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1403 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1404 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1405 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1406 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1407 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1408 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1409 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1410 			-13, -13, -15, -15, -13, -13, -15, -15};
1411 	const double symbols_Q[256] = {
1412 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1413 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1414 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1415 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1416 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1417 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1418 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1419 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1420 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1421 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1422 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1423 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1424 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1425 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1426 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1427 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1428 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1429 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1430 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1431 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1432 	/* Average constellation point energy */
1433 	N0 *= 170.0;
1434 	for (k = 0; k < qm; k++)
1435 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1436 	/* 5.1.4 of TS38.211 */
1437 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1438 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1439 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1440 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1441 	/* AWGN channel */
1442 	I += sqrt(N0 / 2) * randn(0);
1443 	Q += sqrt(N0 / 2) * randn(1);
1444 	/*
1445 	 * Calculate the log of the probability that each of
1446 	 * the constellation points was transmitted
1447 	 */
1448 	for (m = 0; m < qam; m++)
1449 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1450 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1451 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1452 	for (k = 0; k < qm; k++) {
1453 		p0 = -999999;
1454 		p1 = -999999;
1455 		/* For each constellation point */
1456 		for (m = 0; m < qam; m++) {
1457 			if ((m >> (qm - k - 1)) & 1)
1458 				p1 = maxstar(p1, log_syml_prob[m]);
1459 			else
1460 				p0 = maxstar(p0, log_syml_prob[m]);
1461 		}
1462 		/* Calculate the LLR */
1463 		llr_ = p0 - p1;
1464 		llr_ *= (1 << ldpc_llr_decimals);
1465 		llr_ = round(llr_);
1466 		if (llr_ > llr_max)
1467 			llr_ = llr_max;
1468 		if (llr_ < -llr_max)
1469 			llr_ = -llr_max;
1470 		llrs[qm * i + k] = (int8_t) llr_;
1471 	}
1472 }
1473 
1474 
1475 /*
1476  * Generate Qm LLRS for Qm==6
1477  * Modulation, AWGN and LLR estimation from max log development
1478  */
1479 static void
1480 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1481 {
1482 	int qm = 6;
1483 	int qam = 64;
1484 	int m, k;
1485 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1486 	/* 5.1.4 of TS38.211 */
1487 	const double symbols_I[64] = {
1488 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1489 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1490 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1491 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1492 			-5, -5, -7, -7, -5, -5, -7, -7};
1493 	const double symbols_Q[64] = {
1494 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1495 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1496 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1497 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1498 			-3, -1, -3, -1, -5, -7, -5, -7};
1499 	/* Average constellation point energy */
1500 	N0 *= 42.0;
1501 	for (k = 0; k < qm; k++)
1502 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1503 	/* 5.1.4 of TS38.211 */
1504 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1505 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1506 	/* AWGN channel */
1507 	I += sqrt(N0 / 2) * randn(0);
1508 	Q += sqrt(N0 / 2) * randn(1);
1509 	/*
1510 	 * Calculate the log of the probability that each of
1511 	 * the constellation points was transmitted
1512 	 */
1513 	for (m = 0; m < qam; m++)
1514 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1515 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1516 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1517 	for (k = 0; k < qm; k++) {
1518 		p0 = -999999;
1519 		p1 = -999999;
1520 		/* For each constellation point */
1521 		for (m = 0; m < qam; m++) {
1522 			if ((m >> (qm - k - 1)) & 1)
1523 				p1 = maxstar(p1, log_syml_prob[m]);
1524 			else
1525 				p0 = maxstar(p0, log_syml_prob[m]);
1526 		}
1527 		/* Calculate the LLR */
1528 		llr_ = p0 - p1;
1529 		llr_ *= (1 << ldpc_llr_decimals);
1530 		llr_ = round(llr_);
1531 		if (llr_ > llr_max)
1532 			llr_ = llr_max;
1533 		if (llr_ < -llr_max)
1534 			llr_ = -llr_max;
1535 		llrs[qm * i + k] = (int8_t) llr_;
1536 	}
1537 }
1538 
1539 /*
1540  * Generate Qm LLRS for Qm==4
1541  * Modulation, AWGN and LLR estimation from max log development
1542  */
1543 static void
1544 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1545 {
1546 	int qm = 4;
1547 	int qam = 16;
1548 	int m, k;
1549 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1550 	/* 5.1.4 of TS38.211 */
1551 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1552 			-1, -1, -3, -3, -1, -1, -3, -3};
1553 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1554 			1, 3, 1, 3, -1, -3, -1, -3};
1555 	/* Average constellation point energy */
1556 	N0 *= 10.0;
1557 	for (k = 0; k < qm; k++)
1558 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1559 	/* 5.1.4 of TS38.211 */
1560 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1561 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1562 	/* AWGN channel */
1563 	I += sqrt(N0 / 2) * randn(0);
1564 	Q += sqrt(N0 / 2) * randn(1);
1565 	/*
1566 	 * Calculate the log of the probability that each of
1567 	 * the constellation points was transmitted
1568 	 */
1569 	for (m = 0; m < qam; m++)
1570 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1571 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1572 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1573 	for (k = 0; k < qm; k++) {
1574 		p0 = -999999;
1575 		p1 = -999999;
1576 		/* For each constellation point */
1577 		for (m = 0; m < qam; m++) {
1578 			if ((m >> (qm - k - 1)) & 1)
1579 				p1 = maxstar(p1, log_syml_prob[m]);
1580 			else
1581 				p0 = maxstar(p0, log_syml_prob[m]);
1582 		}
1583 		/* Calculate the LLR */
1584 		llr_ = p0 - p1;
1585 		llr_ *= (1 << ldpc_llr_decimals);
1586 		llr_ = round(llr_);
1587 		if (llr_ > llr_max)
1588 			llr_ = llr_max;
1589 		if (llr_ < -llr_max)
1590 			llr_ = -llr_max;
1591 		llrs[qm * i + k] = (int8_t) llr_;
1592 	}
1593 }
1594 
1595 static void
1596 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1597 {
1598 	double b, b1, n;
1599 	double coeff = 2.0 * sqrt(N0);
1600 
1601 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1602 	if (llrs[j] < 8 && llrs[j] > -8)
1603 		return;
1604 
1605 	/* Note don't change sign here */
1606 	n = randn(j % 2);
1607 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1608 			+ coeff * n) / N0;
1609 	b = b1 * (1 << ldpc_llr_decimals);
1610 	b = round(b);
1611 	if (b > llr_max)
1612 		b = llr_max;
1613 	if (b < -llr_max)
1614 		b = -llr_max;
1615 	llrs[j] = (int8_t) b;
1616 }
1617 
1618 /* Generate LLR for a given SNR */
1619 static void
1620 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1621 		struct rte_bbdev_dec_op *ref_op)
1622 {
1623 	struct rte_mbuf *m;
1624 	uint16_t qm;
1625 	uint32_t i, j, e, range;
1626 	double N0, llr_max;
1627 
1628 	e = ref_op->ldpc_dec.cb_params.e;
1629 	qm = ref_op->ldpc_dec.q_m;
1630 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1631 	range = e / qm;
1632 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1633 
1634 	for (i = 0; i < n; ++i) {
1635 		m = inputs[i].data;
1636 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1637 		if (qm == 8) {
1638 			for (j = 0; j < range; ++j)
1639 				gen_qm8_llr(llrs, j, N0, llr_max);
1640 		} else if (qm == 6) {
1641 			for (j = 0; j < range; ++j)
1642 				gen_qm6_llr(llrs, j, N0, llr_max);
1643 		} else if (qm == 4) {
1644 			for (j = 0; j < range; ++j)
1645 				gen_qm4_llr(llrs, j, N0, llr_max);
1646 		} else {
1647 			for (j = 0; j < e; ++j)
1648 				gen_qm2_llr(llrs, j, N0, llr_max);
1649 		}
1650 	}
1651 }
1652 
1653 static void
1654 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1655 		unsigned int start_idx,
1656 		struct rte_bbdev_op_data *inputs,
1657 		struct rte_bbdev_op_data *hard_outputs,
1658 		struct rte_bbdev_op_data *soft_outputs,
1659 		struct rte_bbdev_op_data *harq_inputs,
1660 		struct rte_bbdev_op_data *harq_outputs,
1661 		struct rte_bbdev_dec_op *ref_op)
1662 {
1663 	unsigned int i;
1664 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1665 
1666 	for (i = 0; i < n; ++i) {
1667 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1668 			ops[i]->ldpc_dec.tb_params.ea =
1669 					ldpc_dec->tb_params.ea;
1670 			ops[i]->ldpc_dec.tb_params.eb =
1671 					ldpc_dec->tb_params.eb;
1672 			ops[i]->ldpc_dec.tb_params.c =
1673 					ldpc_dec->tb_params.c;
1674 			ops[i]->ldpc_dec.tb_params.cab =
1675 					ldpc_dec->tb_params.cab;
1676 			ops[i]->ldpc_dec.tb_params.r =
1677 					ldpc_dec->tb_params.r;
1678 		} else {
1679 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1680 		}
1681 
1682 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1683 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1684 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1685 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1686 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1687 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1688 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1689 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1690 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1691 
1692 		if (hard_outputs != NULL)
1693 			ops[i]->ldpc_dec.hard_output =
1694 					hard_outputs[start_idx + i];
1695 		if (inputs != NULL)
1696 			ops[i]->ldpc_dec.input =
1697 					inputs[start_idx + i];
1698 		if (soft_outputs != NULL)
1699 			ops[i]->ldpc_dec.soft_output =
1700 					soft_outputs[start_idx + i];
1701 		if (harq_inputs != NULL)
1702 			ops[i]->ldpc_dec.harq_combined_input =
1703 					harq_inputs[start_idx + i];
1704 		if (harq_outputs != NULL)
1705 			ops[i]->ldpc_dec.harq_combined_output =
1706 					harq_outputs[start_idx + i];
1707 	}
1708 }
1709 
1710 
1711 static void
1712 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1713 		unsigned int start_idx,
1714 		struct rte_bbdev_op_data *inputs,
1715 		struct rte_bbdev_op_data *outputs,
1716 		struct rte_bbdev_enc_op *ref_op)
1717 {
1718 	unsigned int i;
1719 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1720 	for (i = 0; i < n; ++i) {
1721 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1722 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1723 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1724 			ops[i]->ldpc_enc.tb_params.cab =
1725 					ldpc_enc->tb_params.cab;
1726 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1727 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1728 		} else {
1729 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1730 		}
1731 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1732 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1733 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1734 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1735 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1736 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1737 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1738 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1739 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1740 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1741 	}
1742 }
1743 
1744 static int
1745 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1746 		unsigned int order_idx, const int expected_status)
1747 {
1748 	int status = op->status;
1749 	/* ignore parity mismatch false alarms for long iterations */
1750 	if (get_iter_max() >= 10) {
1751 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1752 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1753 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1754 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1755 		}
1756 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1757 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1758 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1759 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1760 		}
1761 	}
1762 
1763 	TEST_ASSERT(status == expected_status,
1764 			"op_status (%d) != expected_status (%d)",
1765 			op->status, expected_status);
1766 
1767 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1768 			"Ordering error, expected %p, got %p",
1769 			(void *)(uintptr_t)order_idx, op->opaque_data);
1770 
1771 	return TEST_SUCCESS;
1772 }
1773 
1774 static int
1775 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1776 		unsigned int order_idx, const int expected_status)
1777 {
1778 	TEST_ASSERT(op->status == expected_status,
1779 			"op_status (%d) != expected_status (%d)",
1780 			op->status, expected_status);
1781 
1782 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1783 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1784 				"Ordering error, expected %p, got %p",
1785 				(void *)(uintptr_t)order_idx, op->opaque_data);
1786 
1787 	return TEST_SUCCESS;
1788 }
1789 
1790 static inline int
1791 validate_op_chain(struct rte_bbdev_op_data *op,
1792 		struct op_data_entries *orig_op)
1793 {
1794 	uint8_t i;
1795 	struct rte_mbuf *m = op->data;
1796 	uint8_t nb_dst_segments = orig_op->nb_segments;
1797 	uint32_t total_data_size = 0;
1798 
1799 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1800 			"Number of segments differ in original (%u) and filled (%u) op",
1801 			nb_dst_segments, m->nb_segs);
1802 
1803 	/* Validate each mbuf segment length */
1804 	for (i = 0; i < nb_dst_segments; ++i) {
1805 		/* Apply offset to the first mbuf segment */
1806 		uint16_t offset = (i == 0) ? op->offset : 0;
1807 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1808 		total_data_size += orig_op->segments[i].length;
1809 
1810 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1811 				"Length of segment differ in original (%u) and filled (%u) op",
1812 				orig_op->segments[i].length, data_len);
1813 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1814 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1815 				data_len,
1816 				"Output buffers (CB=%u) are not equal", i);
1817 		m = m->next;
1818 	}
1819 
1820 	/* Validate total mbuf pkt length */
1821 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1822 	TEST_ASSERT(total_data_size == pkt_len,
1823 			"Length of data differ in original (%u) and filled (%u) op",
1824 			total_data_size, pkt_len);
1825 
1826 	return TEST_SUCCESS;
1827 }
1828 
1829 /*
1830  * Compute K0 for a given configuration for HARQ output length computation
1831  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1832  */
1833 static inline uint16_t
1834 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1835 {
1836 	if (rv_index == 0)
1837 		return 0;
1838 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1839 	if (n_cb == n) {
1840 		if (rv_index == 1)
1841 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1842 		else if (rv_index == 2)
1843 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1844 		else
1845 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1846 	}
1847 	/* LBRM case - includes a division by N */
1848 	if (rv_index == 1)
1849 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1850 				/ n) * z_c;
1851 	else if (rv_index == 2)
1852 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1853 				/ n) * z_c;
1854 	else
1855 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1856 				/ n) * z_c;
1857 }
1858 
1859 /* HARQ output length including the Filler bits */
1860 static inline uint16_t
1861 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1862 {
1863 	uint16_t k0 = 0;
1864 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1865 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1866 	/* Compute RM out size and number of rows */
1867 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1868 			* ops_ld->z_c - ops_ld->n_filler;
1869 	uint16_t deRmOutSize = RTE_MIN(
1870 			k0 + ops_ld->cb_params.e +
1871 			((k0 > parity_offset) ?
1872 					0 : ops_ld->n_filler),
1873 					ops_ld->n_cb);
1874 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1875 			/ ops_ld->z_c);
1876 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1877 	return harq_output_len;
1878 }
1879 
1880 static inline int
1881 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1882 		struct op_data_entries *orig_op,
1883 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1884 {
1885 	uint8_t i;
1886 	uint32_t j, jj, k;
1887 	struct rte_mbuf *m = op->data;
1888 	uint8_t nb_dst_segments = orig_op->nb_segments;
1889 	uint32_t total_data_size = 0;
1890 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1891 	uint32_t byte_error = 0, cum_error = 0, error;
1892 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1893 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1894 	uint16_t parity_offset;
1895 
1896 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1897 			"Number of segments differ in original (%u) and filled (%u) op",
1898 			nb_dst_segments, m->nb_segs);
1899 
1900 	/* Validate each mbuf segment length */
1901 	for (i = 0; i < nb_dst_segments; ++i) {
1902 		/* Apply offset to the first mbuf segment */
1903 		uint16_t offset = (i == 0) ? op->offset : 0;
1904 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1905 		total_data_size += orig_op->segments[i].length;
1906 
1907 		TEST_ASSERT(orig_op->segments[i].length <
1908 				(uint32_t)(data_len + 64),
1909 				"Length of segment differ in original (%u) and filled (%u) op",
1910 				orig_op->segments[i].length, data_len);
1911 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1912 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1913 
1914 		if (!(ldpc_cap_flags &
1915 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1916 				) || (ops_ld->op_flags &
1917 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1918 			data_len -= ops_ld->z_c;
1919 			parity_offset = data_len;
1920 		} else {
1921 			/* Compute RM out size and number of rows */
1922 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1923 					* ops_ld->z_c - ops_ld->n_filler;
1924 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1925 					ops_ld->n_filler;
1926 			if (data_len > deRmOutSize)
1927 				data_len = deRmOutSize;
1928 			if (data_len > orig_op->segments[i].length)
1929 				data_len = orig_op->segments[i].length;
1930 		}
1931 		/*
1932 		 * HARQ output can have minor differences
1933 		 * due to integer representation and related scaling
1934 		 */
1935 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1936 			if (j == parity_offset) {
1937 				/* Special Handling of the filler bits */
1938 				for (k = 0; k < ops_ld->n_filler; k++) {
1939 					if (harq_out[jj] !=
1940 							llr_max_pre_scaling) {
1941 						printf("HARQ Filler issue %d: %d %d\n",
1942 							jj, harq_out[jj],
1943 							llr_max);
1944 						byte_error++;
1945 					}
1946 					jj++;
1947 				}
1948 			}
1949 			if (!(ops_ld->op_flags &
1950 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1951 				if (ldpc_llr_decimals > 1)
1952 					harq_out[jj] = (harq_out[jj] + 1)
1953 						>> (ldpc_llr_decimals - 1);
1954 				/* Saturated to S7 */
1955 				if (harq_orig[j] > llr_max)
1956 					harq_orig[j] = llr_max;
1957 				if (harq_orig[j] < -llr_max)
1958 					harq_orig[j] = -llr_max;
1959 			}
1960 			if (harq_orig[j] != harq_out[jj]) {
1961 				error = (harq_orig[j] > harq_out[jj]) ?
1962 						harq_orig[j] - harq_out[jj] :
1963 						harq_out[jj] - harq_orig[j];
1964 				abs_harq_origin = harq_orig[j] > 0 ?
1965 							harq_orig[j] :
1966 							-harq_orig[j];
1967 				/* Residual quantization error */
1968 				if ((error > 8 && (abs_harq_origin <
1969 						(llr_max - 16))) ||
1970 						(error > 16)) {
1971 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1972 							j, harq_orig[j],
1973 							harq_out[jj], error);
1974 					byte_error++;
1975 					cum_error += error;
1976 				}
1977 			}
1978 		}
1979 		m = m->next;
1980 	}
1981 
1982 	if (byte_error)
1983 		TEST_ASSERT(byte_error <= 1,
1984 				"HARQ output mismatch (%d) %d",
1985 				byte_error, cum_error);
1986 
1987 	/* Validate total mbuf pkt length */
1988 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1989 	TEST_ASSERT(total_data_size < pkt_len + 64,
1990 			"Length of data differ in original (%u) and filled (%u) op",
1991 			total_data_size, pkt_len);
1992 
1993 	return TEST_SUCCESS;
1994 }
1995 
1996 static int
1997 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1998 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1999 {
2000 	unsigned int i;
2001 	int ret;
2002 	struct op_data_entries *hard_data_orig =
2003 			&test_vector.entries[DATA_HARD_OUTPUT];
2004 	struct op_data_entries *soft_data_orig =
2005 			&test_vector.entries[DATA_SOFT_OUTPUT];
2006 	struct rte_bbdev_op_turbo_dec *ops_td;
2007 	struct rte_bbdev_op_data *hard_output;
2008 	struct rte_bbdev_op_data *soft_output;
2009 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2010 
2011 	for (i = 0; i < n; ++i) {
2012 		ops_td = &ops[i]->turbo_dec;
2013 		hard_output = &ops_td->hard_output;
2014 		soft_output = &ops_td->soft_output;
2015 
2016 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2017 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2018 					"Returned iter_count (%d) > expected iter_count (%d)",
2019 					ops_td->iter_count, ref_td->iter_count);
2020 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2021 		TEST_ASSERT_SUCCESS(ret,
2022 				"Checking status and ordering for decoder failed");
2023 
2024 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2025 				hard_data_orig),
2026 				"Hard output buffers (CB=%u) are not equal",
2027 				i);
2028 
2029 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2030 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2031 					soft_data_orig),
2032 					"Soft output buffers (CB=%u) are not equal",
2033 					i);
2034 	}
2035 
2036 	return TEST_SUCCESS;
2037 }
2038 
2039 /* Check Number of code blocks errors */
2040 static int
2041 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2042 {
2043 	unsigned int i;
2044 	struct op_data_entries *hard_data_orig =
2045 			&test_vector.entries[DATA_HARD_OUTPUT];
2046 	struct rte_bbdev_op_ldpc_dec *ops_td;
2047 	struct rte_bbdev_op_data *hard_output;
2048 	int errors = 0;
2049 	struct rte_mbuf *m;
2050 
2051 	for (i = 0; i < n; ++i) {
2052 		ops_td = &ops[i]->ldpc_dec;
2053 		hard_output = &ops_td->hard_output;
2054 		m = hard_output->data;
2055 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2056 				hard_data_orig->segments[0].addr,
2057 				hard_data_orig->segments[0].length))
2058 			errors++;
2059 	}
2060 	return errors;
2061 }
2062 
2063 static int
2064 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2065 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2066 {
2067 	unsigned int i;
2068 	int ret;
2069 	struct op_data_entries *hard_data_orig =
2070 			&test_vector.entries[DATA_HARD_OUTPUT];
2071 	struct op_data_entries *soft_data_orig =
2072 			&test_vector.entries[DATA_SOFT_OUTPUT];
2073 	struct op_data_entries *harq_data_orig =
2074 				&test_vector.entries[DATA_HARQ_OUTPUT];
2075 	struct rte_bbdev_op_ldpc_dec *ops_td;
2076 	struct rte_bbdev_op_data *hard_output;
2077 	struct rte_bbdev_op_data *harq_output;
2078 	struct rte_bbdev_op_data *soft_output;
2079 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2080 
2081 	for (i = 0; i < n; ++i) {
2082 		ops_td = &ops[i]->ldpc_dec;
2083 		hard_output = &ops_td->hard_output;
2084 		harq_output = &ops_td->harq_combined_output;
2085 		soft_output = &ops_td->soft_output;
2086 
2087 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2088 		TEST_ASSERT_SUCCESS(ret,
2089 				"Checking status and ordering for decoder failed");
2090 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2091 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2092 					"Returned iter_count (%d) > expected iter_count (%d)",
2093 					ops_td->iter_count, ref_td->iter_count);
2094 		/*
2095 		 * We can ignore output data when the decoding failed to
2096 		 * converge or for loop-back cases
2097 		 */
2098 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2099 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2100 				) && (
2101 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2102 						)) == 0)
2103 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2104 					hard_data_orig),
2105 					"Hard output buffers (CB=%u) are not equal",
2106 					i);
2107 
2108 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2109 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2110 					soft_data_orig),
2111 					"Soft output buffers (CB=%u) are not equal",
2112 					i);
2113 		if (ref_op->ldpc_dec.op_flags &
2114 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2115 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2116 					harq_data_orig, ops_td),
2117 					"HARQ output buffers (CB=%u) are not equal",
2118 					i);
2119 		}
2120 		if (ref_op->ldpc_dec.op_flags &
2121 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2122 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2123 					harq_data_orig, ops_td),
2124 					"HARQ output buffers (CB=%u) are not equal",
2125 					i);
2126 
2127 	}
2128 
2129 	return TEST_SUCCESS;
2130 }
2131 
2132 
2133 static int
2134 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2135 		struct rte_bbdev_enc_op *ref_op)
2136 {
2137 	unsigned int i;
2138 	int ret;
2139 	struct op_data_entries *hard_data_orig =
2140 			&test_vector.entries[DATA_HARD_OUTPUT];
2141 
2142 	for (i = 0; i < n; ++i) {
2143 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2144 		TEST_ASSERT_SUCCESS(ret,
2145 				"Checking status and ordering for encoder failed");
2146 		TEST_ASSERT_SUCCESS(validate_op_chain(
2147 				&ops[i]->turbo_enc.output,
2148 				hard_data_orig),
2149 				"Output buffers (CB=%u) are not equal",
2150 				i);
2151 	}
2152 
2153 	return TEST_SUCCESS;
2154 }
2155 
2156 static int
2157 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2158 		struct rte_bbdev_enc_op *ref_op)
2159 {
2160 	unsigned int i;
2161 	int ret;
2162 	struct op_data_entries *hard_data_orig =
2163 			&test_vector.entries[DATA_HARD_OUTPUT];
2164 
2165 	for (i = 0; i < n; ++i) {
2166 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2167 		TEST_ASSERT_SUCCESS(ret,
2168 				"Checking status and ordering for encoder failed");
2169 		TEST_ASSERT_SUCCESS(validate_op_chain(
2170 				&ops[i]->ldpc_enc.output,
2171 				hard_data_orig),
2172 				"Output buffers (CB=%u) are not equal",
2173 				i);
2174 	}
2175 
2176 	return TEST_SUCCESS;
2177 }
2178 
2179 static void
2180 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2181 {
2182 	unsigned int i;
2183 	struct op_data_entries *entry;
2184 
2185 	op->turbo_dec = test_vector.turbo_dec;
2186 	entry = &test_vector.entries[DATA_INPUT];
2187 	for (i = 0; i < entry->nb_segments; ++i)
2188 		op->turbo_dec.input.length +=
2189 				entry->segments[i].length;
2190 }
2191 
2192 static void
2193 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2194 {
2195 	unsigned int i;
2196 	struct op_data_entries *entry;
2197 
2198 	op->ldpc_dec = test_vector.ldpc_dec;
2199 	entry = &test_vector.entries[DATA_INPUT];
2200 	for (i = 0; i < entry->nb_segments; ++i)
2201 		op->ldpc_dec.input.length +=
2202 				entry->segments[i].length;
2203 	if (test_vector.ldpc_dec.op_flags &
2204 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2205 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2206 		for (i = 0; i < entry->nb_segments; ++i)
2207 			op->ldpc_dec.harq_combined_input.length +=
2208 				entry->segments[i].length;
2209 	}
2210 }
2211 
2212 
2213 static void
2214 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2215 {
2216 	unsigned int i;
2217 	struct op_data_entries *entry;
2218 
2219 	op->turbo_enc = test_vector.turbo_enc;
2220 	entry = &test_vector.entries[DATA_INPUT];
2221 	for (i = 0; i < entry->nb_segments; ++i)
2222 		op->turbo_enc.input.length +=
2223 				entry->segments[i].length;
2224 }
2225 
2226 static void
2227 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2228 {
2229 	unsigned int i;
2230 	struct op_data_entries *entry;
2231 
2232 	op->ldpc_enc = test_vector.ldpc_enc;
2233 	entry = &test_vector.entries[DATA_INPUT];
2234 	for (i = 0; i < entry->nb_segments; ++i)
2235 		op->ldpc_enc.input.length +=
2236 				entry->segments[i].length;
2237 }
2238 
2239 static uint32_t
2240 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2241 {
2242 	uint8_t i;
2243 	uint32_t c, r, tb_size = 0;
2244 
2245 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2246 		tb_size = op->turbo_dec.tb_params.k_neg;
2247 	} else {
2248 		c = op->turbo_dec.tb_params.c;
2249 		r = op->turbo_dec.tb_params.r;
2250 		for (i = 0; i < c-r; i++)
2251 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2252 				op->turbo_dec.tb_params.k_neg :
2253 				op->turbo_dec.tb_params.k_pos;
2254 	}
2255 	return tb_size;
2256 }
2257 
2258 static uint32_t
2259 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2260 {
2261 	uint8_t i;
2262 	uint32_t c, r, tb_size = 0;
2263 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2264 
2265 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2266 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2267 	} else {
2268 		c = op->ldpc_dec.tb_params.c;
2269 		r = op->ldpc_dec.tb_params.r;
2270 		for (i = 0; i < c-r; i++)
2271 			tb_size += sys_cols * op->ldpc_dec.z_c
2272 					- op->ldpc_dec.n_filler;
2273 	}
2274 	return tb_size;
2275 }
2276 
2277 static uint32_t
2278 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2279 {
2280 	uint8_t i;
2281 	uint32_t c, r, tb_size = 0;
2282 
2283 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2284 		tb_size = op->turbo_enc.tb_params.k_neg;
2285 	} else {
2286 		c = op->turbo_enc.tb_params.c;
2287 		r = op->turbo_enc.tb_params.r;
2288 		for (i = 0; i < c-r; i++)
2289 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2290 				op->turbo_enc.tb_params.k_neg :
2291 				op->turbo_enc.tb_params.k_pos;
2292 	}
2293 	return tb_size;
2294 }
2295 
2296 static uint32_t
2297 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2298 {
2299 	uint8_t i;
2300 	uint32_t c, r, tb_size = 0;
2301 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2302 
2303 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2304 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2305 	} else {
2306 		c = op->turbo_enc.tb_params.c;
2307 		r = op->turbo_enc.tb_params.r;
2308 		for (i = 0; i < c-r; i++)
2309 			tb_size += sys_cols * op->ldpc_enc.z_c
2310 					- op->ldpc_enc.n_filler;
2311 	}
2312 	return tb_size;
2313 }
2314 
2315 
2316 static int
2317 init_test_op_params(struct test_op_params *op_params,
2318 		enum rte_bbdev_op_type op_type, const int expected_status,
2319 		const int vector_mask, struct rte_mempool *ops_mp,
2320 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2321 {
2322 	int ret = 0;
2323 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2324 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2325 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2326 				&op_params->ref_dec_op, 1);
2327 	else
2328 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2329 				&op_params->ref_enc_op, 1);
2330 
2331 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2332 
2333 	op_params->mp = ops_mp;
2334 	op_params->burst_sz = burst_sz;
2335 	op_params->num_to_process = num_to_process;
2336 	op_params->num_lcores = num_lcores;
2337 	op_params->vector_mask = vector_mask;
2338 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2339 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2340 		op_params->ref_dec_op->status = expected_status;
2341 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2342 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2343 		op_params->ref_enc_op->status = expected_status;
2344 	return 0;
2345 }
2346 
2347 static int
2348 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2349 		struct test_op_params *op_params)
2350 {
2351 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2352 	unsigned int i;
2353 	struct active_device *ad;
2354 	unsigned int burst_sz = get_burst_sz();
2355 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2356 	const struct rte_bbdev_op_cap *capabilities = NULL;
2357 
2358 	ad = &active_devs[dev_id];
2359 
2360 	/* Check if device supports op_type */
2361 	if (!is_avail_op(ad, test_vector.op_type))
2362 		return TEST_SUCCESS;
2363 
2364 	struct rte_bbdev_info info;
2365 	rte_bbdev_info_get(ad->dev_id, &info);
2366 	socket_id = GET_SOCKET(info.socket_id);
2367 
2368 	f_ret = create_mempools(ad, socket_id, op_type,
2369 			get_num_ops());
2370 	if (f_ret != TEST_SUCCESS) {
2371 		printf("Couldn't create mempools");
2372 		goto fail;
2373 	}
2374 	if (op_type == RTE_BBDEV_OP_NONE)
2375 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2376 
2377 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2378 			test_vector.expected_status,
2379 			test_vector.mask,
2380 			ad->ops_mempool,
2381 			burst_sz,
2382 			get_num_ops(),
2383 			get_num_lcores());
2384 	if (f_ret != TEST_SUCCESS) {
2385 		printf("Couldn't init test op params");
2386 		goto fail;
2387 	}
2388 
2389 
2390 	/* Find capabilities */
2391 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2392 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2393 		if (cap->type == test_vector.op_type) {
2394 			capabilities = cap;
2395 			break;
2396 		}
2397 		cap++;
2398 	}
2399 	TEST_ASSERT_NOT_NULL(capabilities,
2400 			"Couldn't find capabilities");
2401 
2402 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2403 		create_reference_dec_op(op_params->ref_dec_op);
2404 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2405 		create_reference_enc_op(op_params->ref_enc_op);
2406 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2407 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2408 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2409 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2410 
2411 	for (i = 0; i < ad->nb_queues; ++i) {
2412 		f_ret = fill_queue_buffers(op_params,
2413 				ad->in_mbuf_pool,
2414 				ad->hard_out_mbuf_pool,
2415 				ad->soft_out_mbuf_pool,
2416 				ad->harq_in_mbuf_pool,
2417 				ad->harq_out_mbuf_pool,
2418 				ad->queue_ids[i],
2419 				capabilities,
2420 				info.drv.min_alignment,
2421 				socket_id);
2422 		if (f_ret != TEST_SUCCESS) {
2423 			printf("Couldn't init queue buffers");
2424 			goto fail;
2425 		}
2426 	}
2427 
2428 	/* Run test case function */
2429 	t_ret = test_case_func(ad, op_params);
2430 
2431 	/* Free active device resources and return */
2432 	free_buffers(ad, op_params);
2433 	return t_ret;
2434 
2435 fail:
2436 	free_buffers(ad, op_params);
2437 	return TEST_FAILED;
2438 }
2439 
2440 /* Run given test function per active device per supported op type
2441  * per burst size.
2442  */
2443 static int
2444 run_test_case(test_case_function *test_case_func)
2445 {
2446 	int ret = 0;
2447 	uint8_t dev;
2448 
2449 	/* Alloc op_params */
2450 	struct test_op_params *op_params = rte_zmalloc(NULL,
2451 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2452 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2453 			RTE_ALIGN(sizeof(struct test_op_params),
2454 				RTE_CACHE_LINE_SIZE));
2455 
2456 	/* For each device run test case function */
2457 	for (dev = 0; dev < nb_active_devs; ++dev)
2458 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2459 
2460 	rte_free(op_params);
2461 
2462 	return ret;
2463 }
2464 
2465 
2466 /* Push back the HARQ output from DDR to host */
2467 static void
2468 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2469 		struct rte_bbdev_dec_op **ops,
2470 		const uint16_t n)
2471 {
2472 	uint16_t j;
2473 	int save_status, ret;
2474 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2475 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2476 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2477 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2478 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2479 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2480 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2481 	for (j = 0; j < n; ++j) {
2482 		if ((loopback && mem_out) || hc_out) {
2483 			save_status = ops[j]->status;
2484 			ops[j]->ldpc_dec.op_flags =
2485 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2486 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2487 			if (h_comp)
2488 				ops[j]->ldpc_dec.op_flags +=
2489 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2490 			ops[j]->ldpc_dec.harq_combined_input.offset =
2491 					harq_offset;
2492 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2493 			harq_offset += HARQ_INCR;
2494 			if (!loopback)
2495 				ops[j]->ldpc_dec.harq_combined_input.length =
2496 				ops[j]->ldpc_dec.harq_combined_output.length;
2497 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2498 					&ops[j], 1);
2499 			ret = 0;
2500 			while (ret == 0)
2501 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2502 						dev_id, queue_id,
2503 						&ops_deq[j], 1);
2504 			ops[j]->ldpc_dec.op_flags = flags;
2505 			ops[j]->status = save_status;
2506 		}
2507 	}
2508 }
2509 
2510 /*
2511  * Push back the HARQ output from HW DDR to Host
2512  * Preload HARQ memory input and adjust HARQ offset
2513  */
2514 static void
2515 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2516 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2517 		bool preload)
2518 {
2519 	uint16_t j;
2520 	int deq;
2521 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2522 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2523 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2524 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2525 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2526 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2527 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2528 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2529 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2530 	if ((mem_in || hc_in) && preload) {
2531 		for (j = 0; j < n; ++j) {
2532 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2533 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2534 			ops[j]->ldpc_dec.op_flags =
2535 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2536 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2537 			if (h_comp)
2538 				ops[j]->ldpc_dec.op_flags +=
2539 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2540 			ops[j]->ldpc_dec.harq_combined_output.offset =
2541 					harq_offset;
2542 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2543 			harq_offset += HARQ_INCR;
2544 		}
2545 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2546 		deq = 0;
2547 		while (deq != n)
2548 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2549 					dev_id, queue_id, &ops_deq[deq],
2550 					n - deq);
2551 		/* Restore the operations */
2552 		for (j = 0; j < n; ++j) {
2553 			ops[j]->ldpc_dec.op_flags = flags;
2554 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2555 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2556 		}
2557 	}
2558 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2559 	for (j = 0; j < n; ++j) {
2560 		/* Adjust HARQ offset when we reach external DDR */
2561 		if (mem_in || hc_in)
2562 			ops[j]->ldpc_dec.harq_combined_input.offset
2563 				= harq_offset;
2564 		if (mem_out || hc_out)
2565 			ops[j]->ldpc_dec.harq_combined_output.offset
2566 				= harq_offset;
2567 		harq_offset += HARQ_INCR;
2568 	}
2569 }
2570 
2571 static void
2572 dequeue_event_callback(uint16_t dev_id,
2573 		enum rte_bbdev_event_type event, void *cb_arg,
2574 		void *ret_param)
2575 {
2576 	int ret;
2577 	uint16_t i;
2578 	uint64_t total_time;
2579 	uint16_t deq, burst_sz, num_ops;
2580 	uint16_t queue_id = *(uint16_t *) ret_param;
2581 	struct rte_bbdev_info info;
2582 	double tb_len_bits;
2583 	struct thread_params *tp = cb_arg;
2584 
2585 	/* Find matching thread params using queue_id */
2586 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2587 		if (tp->queue_id == queue_id)
2588 			break;
2589 
2590 	if (i == MAX_QUEUES) {
2591 		printf("%s: Queue_id from interrupt details was not found!\n",
2592 				__func__);
2593 		return;
2594 	}
2595 
2596 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2597 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2598 		printf(
2599 			"Dequeue interrupt handler called for incorrect event!\n");
2600 		return;
2601 	}
2602 
2603 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2604 	num_ops = tp->op_params->num_to_process;
2605 
2606 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2607 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2608 				&tp->dec_ops[
2609 					rte_atomic16_read(&tp->nb_dequeued)],
2610 				burst_sz);
2611 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2612 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2613 				&tp->dec_ops[
2614 					rte_atomic16_read(&tp->nb_dequeued)],
2615 				burst_sz);
2616 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2617 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2618 				&tp->enc_ops[
2619 					rte_atomic16_read(&tp->nb_dequeued)],
2620 				burst_sz);
2621 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2622 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2623 				&tp->enc_ops[
2624 					rte_atomic16_read(&tp->nb_dequeued)],
2625 				burst_sz);
2626 
2627 	if (deq < burst_sz) {
2628 		printf(
2629 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2630 			burst_sz, deq);
2631 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2632 		return;
2633 	}
2634 
2635 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2636 		rte_atomic16_add(&tp->nb_dequeued, deq);
2637 		return;
2638 	}
2639 
2640 	total_time = rte_rdtsc_precise() - tp->start_time;
2641 
2642 	rte_bbdev_info_get(dev_id, &info);
2643 
2644 	ret = TEST_SUCCESS;
2645 
2646 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2647 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2648 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2649 				tp->op_params->vector_mask);
2650 		/* get the max of iter_count for all dequeued ops */
2651 		for (i = 0; i < num_ops; ++i)
2652 			tp->iter_count = RTE_MAX(
2653 					tp->dec_ops[i]->turbo_dec.iter_count,
2654 					tp->iter_count);
2655 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2656 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2657 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2658 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2659 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2660 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2661 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2662 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2663 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2664 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2665 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2666 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2667 				tp->op_params->vector_mask);
2668 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2669 	}
2670 
2671 	if (ret) {
2672 		printf("Buffers validation failed\n");
2673 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2674 	}
2675 
2676 	switch (test_vector.op_type) {
2677 	case RTE_BBDEV_OP_TURBO_DEC:
2678 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2679 		break;
2680 	case RTE_BBDEV_OP_TURBO_ENC:
2681 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2682 		break;
2683 	case RTE_BBDEV_OP_LDPC_DEC:
2684 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2685 		break;
2686 	case RTE_BBDEV_OP_LDPC_ENC:
2687 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2688 		break;
2689 	case RTE_BBDEV_OP_NONE:
2690 		tb_len_bits = 0.0;
2691 		break;
2692 	default:
2693 		printf("Unknown op type: %d\n", test_vector.op_type);
2694 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2695 		return;
2696 	}
2697 
2698 	tp->ops_per_sec += ((double)num_ops) /
2699 			((double)total_time / (double)rte_get_tsc_hz());
2700 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2701 			((double)total_time / (double)rte_get_tsc_hz());
2702 
2703 	rte_atomic16_add(&tp->nb_dequeued, deq);
2704 }
2705 
2706 static int
2707 throughput_intr_lcore_ldpc_dec(void *arg)
2708 {
2709 	struct thread_params *tp = arg;
2710 	unsigned int enqueued;
2711 	const uint16_t queue_id = tp->queue_id;
2712 	const uint16_t burst_sz = tp->op_params->burst_sz;
2713 	const uint16_t num_to_process = tp->op_params->num_to_process;
2714 	struct rte_bbdev_dec_op *ops[num_to_process];
2715 	struct test_buffers *bufs = NULL;
2716 	struct rte_bbdev_info info;
2717 	int ret, i, j;
2718 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2719 	uint16_t num_to_enq, enq;
2720 
2721 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2722 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2723 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2724 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2725 
2726 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2727 			"BURST_SIZE should be <= %u", MAX_BURST);
2728 
2729 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2730 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2731 			tp->dev_id, queue_id);
2732 
2733 	rte_bbdev_info_get(tp->dev_id, &info);
2734 
2735 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2736 			"NUM_OPS cannot exceed %u for this device",
2737 			info.drv.queue_size_lim);
2738 
2739 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2740 
2741 	rte_atomic16_clear(&tp->processing_status);
2742 	rte_atomic16_clear(&tp->nb_dequeued);
2743 
2744 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2745 		rte_pause();
2746 
2747 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2748 				num_to_process);
2749 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2750 			num_to_process);
2751 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2752 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2753 				bufs->hard_outputs, bufs->soft_outputs,
2754 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2755 
2756 	/* Set counter to validate the ordering */
2757 	for (j = 0; j < num_to_process; ++j)
2758 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2759 
2760 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2761 		for (i = 0; i < num_to_process; ++i) {
2762 			if (!loopback)
2763 				rte_pktmbuf_reset(
2764 					ops[i]->ldpc_dec.hard_output.data);
2765 			if (hc_out || loopback)
2766 				mbuf_reset(
2767 				ops[i]->ldpc_dec.harq_combined_output.data);
2768 		}
2769 
2770 		tp->start_time = rte_rdtsc_precise();
2771 		for (enqueued = 0; enqueued < num_to_process;) {
2772 			num_to_enq = burst_sz;
2773 
2774 			if (unlikely(num_to_process - enqueued < num_to_enq))
2775 				num_to_enq = num_to_process - enqueued;
2776 
2777 			enq = 0;
2778 			do {
2779 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2780 						tp->dev_id,
2781 						queue_id, &ops[enqueued],
2782 						num_to_enq);
2783 			} while (unlikely(num_to_enq != enq));
2784 			enqueued += enq;
2785 
2786 			/* Write to thread burst_sz current number of enqueued
2787 			 * descriptors. It ensures that proper number of
2788 			 * descriptors will be dequeued in callback
2789 			 * function - needed for last batch in case where
2790 			 * the number of operations is not a multiple of
2791 			 * burst size.
2792 			 */
2793 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2794 
2795 			/* Wait until processing of previous batch is
2796 			 * completed
2797 			 */
2798 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2799 					(int16_t) enqueued)
2800 				rte_pause();
2801 		}
2802 		if (j != TEST_REPETITIONS - 1)
2803 			rte_atomic16_clear(&tp->nb_dequeued);
2804 	}
2805 
2806 	return TEST_SUCCESS;
2807 }
2808 
2809 static int
2810 throughput_intr_lcore_dec(void *arg)
2811 {
2812 	struct thread_params *tp = arg;
2813 	unsigned int enqueued;
2814 	const uint16_t queue_id = tp->queue_id;
2815 	const uint16_t burst_sz = tp->op_params->burst_sz;
2816 	const uint16_t num_to_process = tp->op_params->num_to_process;
2817 	struct rte_bbdev_dec_op *ops[num_to_process];
2818 	struct test_buffers *bufs = NULL;
2819 	struct rte_bbdev_info info;
2820 	int ret, i, j;
2821 	uint16_t num_to_enq, enq;
2822 
2823 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2824 			"BURST_SIZE should be <= %u", MAX_BURST);
2825 
2826 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2827 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2828 			tp->dev_id, queue_id);
2829 
2830 	rte_bbdev_info_get(tp->dev_id, &info);
2831 
2832 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2833 			"NUM_OPS cannot exceed %u for this device",
2834 			info.drv.queue_size_lim);
2835 
2836 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2837 
2838 	rte_atomic16_clear(&tp->processing_status);
2839 	rte_atomic16_clear(&tp->nb_dequeued);
2840 
2841 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2842 		rte_pause();
2843 
2844 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2845 				num_to_process);
2846 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2847 			num_to_process);
2848 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2849 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2850 				bufs->hard_outputs, bufs->soft_outputs,
2851 				tp->op_params->ref_dec_op);
2852 
2853 	/* Set counter to validate the ordering */
2854 	for (j = 0; j < num_to_process; ++j)
2855 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2856 
2857 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2858 		for (i = 0; i < num_to_process; ++i)
2859 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2860 
2861 		tp->start_time = rte_rdtsc_precise();
2862 		for (enqueued = 0; enqueued < num_to_process;) {
2863 			num_to_enq = burst_sz;
2864 
2865 			if (unlikely(num_to_process - enqueued < num_to_enq))
2866 				num_to_enq = num_to_process - enqueued;
2867 
2868 			enq = 0;
2869 			do {
2870 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2871 						queue_id, &ops[enqueued],
2872 						num_to_enq);
2873 			} while (unlikely(num_to_enq != enq));
2874 			enqueued += enq;
2875 
2876 			/* Write to thread burst_sz current number of enqueued
2877 			 * descriptors. It ensures that proper number of
2878 			 * descriptors will be dequeued in callback
2879 			 * function - needed for last batch in case where
2880 			 * the number of operations is not a multiple of
2881 			 * burst size.
2882 			 */
2883 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2884 
2885 			/* Wait until processing of previous batch is
2886 			 * completed
2887 			 */
2888 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2889 					(int16_t) enqueued)
2890 				rte_pause();
2891 		}
2892 		if (j != TEST_REPETITIONS - 1)
2893 			rte_atomic16_clear(&tp->nb_dequeued);
2894 	}
2895 
2896 	return TEST_SUCCESS;
2897 }
2898 
2899 static int
2900 throughput_intr_lcore_enc(void *arg)
2901 {
2902 	struct thread_params *tp = arg;
2903 	unsigned int enqueued;
2904 	const uint16_t queue_id = tp->queue_id;
2905 	const uint16_t burst_sz = tp->op_params->burst_sz;
2906 	const uint16_t num_to_process = tp->op_params->num_to_process;
2907 	struct rte_bbdev_enc_op *ops[num_to_process];
2908 	struct test_buffers *bufs = NULL;
2909 	struct rte_bbdev_info info;
2910 	int ret, i, j;
2911 	uint16_t num_to_enq, enq;
2912 
2913 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2914 			"BURST_SIZE should be <= %u", MAX_BURST);
2915 
2916 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2917 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2918 			tp->dev_id, queue_id);
2919 
2920 	rte_bbdev_info_get(tp->dev_id, &info);
2921 
2922 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2923 			"NUM_OPS cannot exceed %u for this device",
2924 			info.drv.queue_size_lim);
2925 
2926 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2927 
2928 	rte_atomic16_clear(&tp->processing_status);
2929 	rte_atomic16_clear(&tp->nb_dequeued);
2930 
2931 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2932 		rte_pause();
2933 
2934 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2935 			num_to_process);
2936 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2937 			num_to_process);
2938 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2939 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2940 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2941 
2942 	/* Set counter to validate the ordering */
2943 	for (j = 0; j < num_to_process; ++j)
2944 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2945 
2946 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2947 		for (i = 0; i < num_to_process; ++i)
2948 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2949 
2950 		tp->start_time = rte_rdtsc_precise();
2951 		for (enqueued = 0; enqueued < num_to_process;) {
2952 			num_to_enq = burst_sz;
2953 
2954 			if (unlikely(num_to_process - enqueued < num_to_enq))
2955 				num_to_enq = num_to_process - enqueued;
2956 
2957 			enq = 0;
2958 			do {
2959 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2960 						queue_id, &ops[enqueued],
2961 						num_to_enq);
2962 			} while (unlikely(enq != num_to_enq));
2963 			enqueued += enq;
2964 
2965 			/* Write to thread burst_sz current number of enqueued
2966 			 * descriptors. It ensures that proper number of
2967 			 * descriptors will be dequeued in callback
2968 			 * function - needed for last batch in case where
2969 			 * the number of operations is not a multiple of
2970 			 * burst size.
2971 			 */
2972 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2973 
2974 			/* Wait until processing of previous batch is
2975 			 * completed
2976 			 */
2977 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2978 					(int16_t) enqueued)
2979 				rte_pause();
2980 		}
2981 		if (j != TEST_REPETITIONS - 1)
2982 			rte_atomic16_clear(&tp->nb_dequeued);
2983 	}
2984 
2985 	return TEST_SUCCESS;
2986 }
2987 
2988 
2989 static int
2990 throughput_intr_lcore_ldpc_enc(void *arg)
2991 {
2992 	struct thread_params *tp = arg;
2993 	unsigned int enqueued;
2994 	const uint16_t queue_id = tp->queue_id;
2995 	const uint16_t burst_sz = tp->op_params->burst_sz;
2996 	const uint16_t num_to_process = tp->op_params->num_to_process;
2997 	struct rte_bbdev_enc_op *ops[num_to_process];
2998 	struct test_buffers *bufs = NULL;
2999 	struct rte_bbdev_info info;
3000 	int ret, i, j;
3001 	uint16_t num_to_enq, enq;
3002 
3003 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3004 			"BURST_SIZE should be <= %u", MAX_BURST);
3005 
3006 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3007 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3008 			tp->dev_id, queue_id);
3009 
3010 	rte_bbdev_info_get(tp->dev_id, &info);
3011 
3012 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3013 			"NUM_OPS cannot exceed %u for this device",
3014 			info.drv.queue_size_lim);
3015 
3016 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3017 
3018 	rte_atomic16_clear(&tp->processing_status);
3019 	rte_atomic16_clear(&tp->nb_dequeued);
3020 
3021 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3022 		rte_pause();
3023 
3024 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3025 			num_to_process);
3026 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3027 			num_to_process);
3028 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3029 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3030 				bufs->inputs, bufs->hard_outputs,
3031 				tp->op_params->ref_enc_op);
3032 
3033 	/* Set counter to validate the ordering */
3034 	for (j = 0; j < num_to_process; ++j)
3035 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3036 
3037 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3038 		for (i = 0; i < num_to_process; ++i)
3039 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3040 
3041 		tp->start_time = rte_rdtsc_precise();
3042 		for (enqueued = 0; enqueued < num_to_process;) {
3043 			num_to_enq = burst_sz;
3044 
3045 			if (unlikely(num_to_process - enqueued < num_to_enq))
3046 				num_to_enq = num_to_process - enqueued;
3047 
3048 			enq = 0;
3049 			do {
3050 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3051 						tp->dev_id,
3052 						queue_id, &ops[enqueued],
3053 						num_to_enq);
3054 			} while (unlikely(enq != num_to_enq));
3055 			enqueued += enq;
3056 
3057 			/* Write to thread burst_sz current number of enqueued
3058 			 * descriptors. It ensures that proper number of
3059 			 * descriptors will be dequeued in callback
3060 			 * function - needed for last batch in case where
3061 			 * the number of operations is not a multiple of
3062 			 * burst size.
3063 			 */
3064 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
3065 
3066 			/* Wait until processing of previous batch is
3067 			 * completed
3068 			 */
3069 			while (rte_atomic16_read(&tp->nb_dequeued) !=
3070 					(int16_t) enqueued)
3071 				rte_pause();
3072 		}
3073 		if (j != TEST_REPETITIONS - 1)
3074 			rte_atomic16_clear(&tp->nb_dequeued);
3075 	}
3076 
3077 	return TEST_SUCCESS;
3078 }
3079 
3080 static int
3081 throughput_pmd_lcore_dec(void *arg)
3082 {
3083 	struct thread_params *tp = arg;
3084 	uint16_t enq, deq;
3085 	uint64_t total_time = 0, start_time;
3086 	const uint16_t queue_id = tp->queue_id;
3087 	const uint16_t burst_sz = tp->op_params->burst_sz;
3088 	const uint16_t num_ops = tp->op_params->num_to_process;
3089 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3090 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3091 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3092 	struct test_buffers *bufs = NULL;
3093 	int i, j, ret;
3094 	struct rte_bbdev_info info;
3095 	uint16_t num_to_enq;
3096 
3097 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3098 			"BURST_SIZE should be <= %u", MAX_BURST);
3099 
3100 	rte_bbdev_info_get(tp->dev_id, &info);
3101 
3102 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3103 			"NUM_OPS cannot exceed %u for this device",
3104 			info.drv.queue_size_lim);
3105 
3106 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3107 
3108 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3109 		rte_pause();
3110 
3111 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3112 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3113 
3114 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3115 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3116 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3117 
3118 	/* Set counter to validate the ordering */
3119 	for (j = 0; j < num_ops; ++j)
3120 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3121 
3122 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3123 
3124 		for (j = 0; j < num_ops; ++j)
3125 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3126 
3127 		start_time = rte_rdtsc_precise();
3128 
3129 		for (enq = 0, deq = 0; enq < num_ops;) {
3130 			num_to_enq = burst_sz;
3131 
3132 			if (unlikely(num_ops - enq < num_to_enq))
3133 				num_to_enq = num_ops - enq;
3134 
3135 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3136 					queue_id, &ops_enq[enq], num_to_enq);
3137 
3138 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3139 					queue_id, &ops_deq[deq], enq - deq);
3140 		}
3141 
3142 		/* dequeue the remaining */
3143 		while (deq < enq) {
3144 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3145 					queue_id, &ops_deq[deq], enq - deq);
3146 		}
3147 
3148 		total_time += rte_rdtsc_precise() - start_time;
3149 	}
3150 
3151 	tp->iter_count = 0;
3152 	/* get the max of iter_count for all dequeued ops */
3153 	for (i = 0; i < num_ops; ++i) {
3154 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3155 				tp->iter_count);
3156 	}
3157 
3158 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3159 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3160 				tp->op_params->vector_mask);
3161 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3162 	}
3163 
3164 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3165 
3166 	double tb_len_bits = calc_dec_TB_size(ref_op);
3167 
3168 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3169 			((double)total_time / (double)rte_get_tsc_hz());
3170 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3171 			1000000.0) / ((double)total_time /
3172 			(double)rte_get_tsc_hz());
3173 
3174 	return TEST_SUCCESS;
3175 }
3176 
3177 static int
3178 bler_pmd_lcore_ldpc_dec(void *arg)
3179 {
3180 	struct thread_params *tp = arg;
3181 	uint16_t enq, deq;
3182 	uint64_t total_time = 0, start_time;
3183 	const uint16_t queue_id = tp->queue_id;
3184 	const uint16_t burst_sz = tp->op_params->burst_sz;
3185 	const uint16_t num_ops = tp->op_params->num_to_process;
3186 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3187 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3188 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3189 	struct test_buffers *bufs = NULL;
3190 	int i, j, ret;
3191 	float parity_bler = 0;
3192 	struct rte_bbdev_info info;
3193 	uint16_t num_to_enq;
3194 	bool extDdr = check_bit(ldpc_cap_flags,
3195 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3196 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3197 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3198 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3199 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3200 
3201 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3202 			"BURST_SIZE should be <= %u", MAX_BURST);
3203 
3204 	rte_bbdev_info_get(tp->dev_id, &info);
3205 
3206 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3207 			"NUM_OPS cannot exceed %u for this device",
3208 			info.drv.queue_size_lim);
3209 
3210 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3211 
3212 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3213 		rte_pause();
3214 
3215 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3216 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3217 
3218 	/* For BLER tests we need to enable early termination */
3219 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3220 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3221 		ref_op->ldpc_dec.op_flags +=
3222 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3223 	ref_op->ldpc_dec.iter_max = get_iter_max();
3224 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3225 
3226 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3227 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3228 				bufs->hard_outputs, bufs->soft_outputs,
3229 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3230 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3231 
3232 	/* Set counter to validate the ordering */
3233 	for (j = 0; j < num_ops; ++j)
3234 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3235 
3236 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3237 		for (j = 0; j < num_ops; ++j) {
3238 			if (!loopback)
3239 				mbuf_reset(
3240 				ops_enq[j]->ldpc_dec.hard_output.data);
3241 			if (hc_out || loopback)
3242 				mbuf_reset(
3243 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3244 		}
3245 		if (extDdr)
3246 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3247 					num_ops, true);
3248 		start_time = rte_rdtsc_precise();
3249 
3250 		for (enq = 0, deq = 0; enq < num_ops;) {
3251 			num_to_enq = burst_sz;
3252 
3253 			if (unlikely(num_ops - enq < num_to_enq))
3254 				num_to_enq = num_ops - enq;
3255 
3256 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3257 					queue_id, &ops_enq[enq], num_to_enq);
3258 
3259 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3260 					queue_id, &ops_deq[deq], enq - deq);
3261 		}
3262 
3263 		/* dequeue the remaining */
3264 		while (deq < enq) {
3265 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3266 					queue_id, &ops_deq[deq], enq - deq);
3267 		}
3268 
3269 		total_time += rte_rdtsc_precise() - start_time;
3270 	}
3271 
3272 	tp->iter_count = 0;
3273 	tp->iter_average = 0;
3274 	/* get the max of iter_count for all dequeued ops */
3275 	for (i = 0; i < num_ops; ++i) {
3276 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3277 				tp->iter_count);
3278 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3279 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3280 			parity_bler += 1.0;
3281 	}
3282 
3283 	parity_bler /= num_ops; /* This one is based on SYND */
3284 	tp->iter_average /= num_ops;
3285 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3286 
3287 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3288 			&& tp->bler == 0
3289 			&& parity_bler == 0
3290 			&& !hc_out) {
3291 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3292 				tp->op_params->vector_mask);
3293 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3294 	}
3295 
3296 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3297 
3298 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3299 	tp->ops_per_sec = ((double)num_ops * 1) /
3300 			((double)total_time / (double)rte_get_tsc_hz());
3301 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3302 			1000000.0) / ((double)total_time /
3303 			(double)rte_get_tsc_hz());
3304 
3305 	return TEST_SUCCESS;
3306 }
3307 
3308 static int
3309 throughput_pmd_lcore_ldpc_dec(void *arg)
3310 {
3311 	struct thread_params *tp = arg;
3312 	uint16_t enq, deq;
3313 	uint64_t total_time = 0, start_time;
3314 	const uint16_t queue_id = tp->queue_id;
3315 	const uint16_t burst_sz = tp->op_params->burst_sz;
3316 	const uint16_t num_ops = tp->op_params->num_to_process;
3317 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3318 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3319 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3320 	struct test_buffers *bufs = NULL;
3321 	int i, j, ret;
3322 	struct rte_bbdev_info info;
3323 	uint16_t num_to_enq;
3324 	bool extDdr = check_bit(ldpc_cap_flags,
3325 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3326 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3327 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3328 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3329 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3330 
3331 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3332 			"BURST_SIZE should be <= %u", MAX_BURST);
3333 
3334 	rte_bbdev_info_get(tp->dev_id, &info);
3335 
3336 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3337 			"NUM_OPS cannot exceed %u for this device",
3338 			info.drv.queue_size_lim);
3339 
3340 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3341 
3342 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3343 		rte_pause();
3344 
3345 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3346 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3347 
3348 	/* For throughput tests we need to disable early termination */
3349 	if (check_bit(ref_op->ldpc_dec.op_flags,
3350 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3351 		ref_op->ldpc_dec.op_flags -=
3352 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3353 	ref_op->ldpc_dec.iter_max = get_iter_max();
3354 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3355 
3356 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3357 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3358 				bufs->hard_outputs, bufs->soft_outputs,
3359 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3360 
3361 	/* Set counter to validate the ordering */
3362 	for (j = 0; j < num_ops; ++j)
3363 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3364 
3365 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3366 		for (j = 0; j < num_ops; ++j) {
3367 			if (!loopback)
3368 				mbuf_reset(
3369 				ops_enq[j]->ldpc_dec.hard_output.data);
3370 			if (hc_out || loopback)
3371 				mbuf_reset(
3372 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3373 		}
3374 		if (extDdr)
3375 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3376 					num_ops, true);
3377 		start_time = rte_rdtsc_precise();
3378 
3379 		for (enq = 0, deq = 0; enq < num_ops;) {
3380 			num_to_enq = burst_sz;
3381 
3382 			if (unlikely(num_ops - enq < num_to_enq))
3383 				num_to_enq = num_ops - enq;
3384 
3385 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3386 					queue_id, &ops_enq[enq], num_to_enq);
3387 
3388 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3389 					queue_id, &ops_deq[deq], enq - deq);
3390 		}
3391 
3392 		/* dequeue the remaining */
3393 		while (deq < enq) {
3394 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3395 					queue_id, &ops_deq[deq], enq - deq);
3396 		}
3397 
3398 		total_time += rte_rdtsc_precise() - start_time;
3399 	}
3400 
3401 	tp->iter_count = 0;
3402 	/* get the max of iter_count for all dequeued ops */
3403 	for (i = 0; i < num_ops; ++i) {
3404 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3405 				tp->iter_count);
3406 	}
3407 	if (extDdr) {
3408 		/* Read loopback is not thread safe */
3409 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3410 	}
3411 
3412 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3413 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3414 				tp->op_params->vector_mask);
3415 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3416 	}
3417 
3418 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3419 
3420 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3421 
3422 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3423 			((double)total_time / (double)rte_get_tsc_hz());
3424 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3425 			1000000.0) / ((double)total_time /
3426 			(double)rte_get_tsc_hz());
3427 
3428 	return TEST_SUCCESS;
3429 }
3430 
3431 static int
3432 throughput_pmd_lcore_enc(void *arg)
3433 {
3434 	struct thread_params *tp = arg;
3435 	uint16_t enq, deq;
3436 	uint64_t total_time = 0, start_time;
3437 	const uint16_t queue_id = tp->queue_id;
3438 	const uint16_t burst_sz = tp->op_params->burst_sz;
3439 	const uint16_t num_ops = tp->op_params->num_to_process;
3440 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3441 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3442 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3443 	struct test_buffers *bufs = NULL;
3444 	int i, j, ret;
3445 	struct rte_bbdev_info info;
3446 	uint16_t num_to_enq;
3447 
3448 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3449 			"BURST_SIZE should be <= %u", MAX_BURST);
3450 
3451 	rte_bbdev_info_get(tp->dev_id, &info);
3452 
3453 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3454 			"NUM_OPS cannot exceed %u for this device",
3455 			info.drv.queue_size_lim);
3456 
3457 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3458 
3459 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3460 		rte_pause();
3461 
3462 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3463 			num_ops);
3464 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3465 			num_ops);
3466 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3467 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3468 				bufs->hard_outputs, ref_op);
3469 
3470 	/* Set counter to validate the ordering */
3471 	for (j = 0; j < num_ops; ++j)
3472 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3473 
3474 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3475 
3476 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3477 			for (j = 0; j < num_ops; ++j)
3478 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3479 
3480 		start_time = rte_rdtsc_precise();
3481 
3482 		for (enq = 0, deq = 0; enq < num_ops;) {
3483 			num_to_enq = burst_sz;
3484 
3485 			if (unlikely(num_ops - enq < num_to_enq))
3486 				num_to_enq = num_ops - enq;
3487 
3488 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3489 					queue_id, &ops_enq[enq], num_to_enq);
3490 
3491 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3492 					queue_id, &ops_deq[deq], enq - deq);
3493 		}
3494 
3495 		/* dequeue the remaining */
3496 		while (deq < enq) {
3497 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3498 					queue_id, &ops_deq[deq], enq - deq);
3499 		}
3500 
3501 		total_time += rte_rdtsc_precise() - start_time;
3502 	}
3503 
3504 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3505 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3506 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3507 	}
3508 
3509 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3510 
3511 	double tb_len_bits = calc_enc_TB_size(ref_op);
3512 
3513 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3514 			((double)total_time / (double)rte_get_tsc_hz());
3515 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3516 			/ 1000000.0) / ((double)total_time /
3517 			(double)rte_get_tsc_hz());
3518 
3519 	return TEST_SUCCESS;
3520 }
3521 
3522 static int
3523 throughput_pmd_lcore_ldpc_enc(void *arg)
3524 {
3525 	struct thread_params *tp = arg;
3526 	uint16_t enq, deq;
3527 	uint64_t total_time = 0, start_time;
3528 	const uint16_t queue_id = tp->queue_id;
3529 	const uint16_t burst_sz = tp->op_params->burst_sz;
3530 	const uint16_t num_ops = tp->op_params->num_to_process;
3531 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3532 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3533 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3534 	struct test_buffers *bufs = NULL;
3535 	int i, j, ret;
3536 	struct rte_bbdev_info info;
3537 	uint16_t num_to_enq;
3538 
3539 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3540 			"BURST_SIZE should be <= %u", MAX_BURST);
3541 
3542 	rte_bbdev_info_get(tp->dev_id, &info);
3543 
3544 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3545 			"NUM_OPS cannot exceed %u for this device",
3546 			info.drv.queue_size_lim);
3547 
3548 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3549 
3550 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3551 		rte_pause();
3552 
3553 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3554 			num_ops);
3555 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3556 			num_ops);
3557 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3558 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3559 				bufs->hard_outputs, ref_op);
3560 
3561 	/* Set counter to validate the ordering */
3562 	for (j = 0; j < num_ops; ++j)
3563 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3564 
3565 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3566 
3567 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3568 			for (j = 0; j < num_ops; ++j)
3569 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3570 
3571 		start_time = rte_rdtsc_precise();
3572 
3573 		for (enq = 0, deq = 0; enq < num_ops;) {
3574 			num_to_enq = burst_sz;
3575 
3576 			if (unlikely(num_ops - enq < num_to_enq))
3577 				num_to_enq = num_ops - enq;
3578 
3579 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3580 					queue_id, &ops_enq[enq], num_to_enq);
3581 
3582 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3583 					queue_id, &ops_deq[deq], enq - deq);
3584 		}
3585 
3586 		/* dequeue the remaining */
3587 		while (deq < enq) {
3588 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3589 					queue_id, &ops_deq[deq], enq - deq);
3590 		}
3591 
3592 		total_time += rte_rdtsc_precise() - start_time;
3593 	}
3594 
3595 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3596 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3597 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3598 	}
3599 
3600 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3601 
3602 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3603 
3604 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3605 			((double)total_time / (double)rte_get_tsc_hz());
3606 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3607 			/ 1000000.0) / ((double)total_time /
3608 			(double)rte_get_tsc_hz());
3609 
3610 	return TEST_SUCCESS;
3611 }
3612 
3613 static void
3614 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3615 {
3616 	unsigned int iter = 0;
3617 	double total_mops = 0, total_mbps = 0;
3618 
3619 	for (iter = 0; iter < used_cores; iter++) {
3620 		printf(
3621 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3622 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3623 			t_params[iter].mbps);
3624 		total_mops += t_params[iter].ops_per_sec;
3625 		total_mbps += t_params[iter].mbps;
3626 	}
3627 	printf(
3628 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3629 		used_cores, total_mops, total_mbps);
3630 }
3631 
3632 /* Aggregate the performance results over the number of cores used */
3633 static void
3634 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3635 {
3636 	unsigned int core_idx = 0;
3637 	double total_mops = 0, total_mbps = 0;
3638 	uint8_t iter_count = 0;
3639 
3640 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3641 		printf(
3642 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3643 			t_params[core_idx].lcore_id,
3644 			t_params[core_idx].ops_per_sec,
3645 			t_params[core_idx].mbps,
3646 			t_params[core_idx].iter_count);
3647 		total_mops += t_params[core_idx].ops_per_sec;
3648 		total_mbps += t_params[core_idx].mbps;
3649 		iter_count = RTE_MAX(iter_count,
3650 				t_params[core_idx].iter_count);
3651 	}
3652 	printf(
3653 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3654 		used_cores, total_mops, total_mbps, iter_count);
3655 }
3656 
3657 /* Aggregate the performance results over the number of cores used */
3658 static void
3659 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3660 {
3661 	unsigned int core_idx = 0;
3662 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3663 	double snr = get_snr();
3664 
3665 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3666 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3667 				t_params[core_idx].lcore_id,
3668 				t_params[core_idx].bler * 100,
3669 				t_params[core_idx].iter_average,
3670 				t_params[core_idx].mbps,
3671 				get_vector_filename());
3672 		total_mbps += t_params[core_idx].mbps;
3673 		total_bler += t_params[core_idx].bler;
3674 		total_iter += t_params[core_idx].iter_average;
3675 	}
3676 	total_bler /= used_cores;
3677 	total_iter /= used_cores;
3678 
3679 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3680 			snr, total_bler * 100, total_iter, get_iter_max(),
3681 			total_mbps, get_vector_filename());
3682 }
3683 
3684 /*
3685  * Test function that determines BLER wireless performance
3686  */
3687 static int
3688 bler_test(struct active_device *ad,
3689 		struct test_op_params *op_params)
3690 {
3691 	int ret;
3692 	unsigned int lcore_id, used_cores = 0;
3693 	struct thread_params *t_params;
3694 	struct rte_bbdev_info info;
3695 	lcore_function_t *bler_function;
3696 	uint16_t num_lcores;
3697 	const char *op_type_str;
3698 
3699 	rte_bbdev_info_get(ad->dev_id, &info);
3700 
3701 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3702 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3703 			test_vector.op_type);
3704 
3705 	printf("+ ------------------------------------------------------- +\n");
3706 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3707 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3708 			op_params->num_to_process, op_params->num_lcores,
3709 			op_type_str,
3710 			intr_enabled ? "Interrupt mode" : "PMD mode",
3711 			(double)rte_get_tsc_hz() / 1000000000.0);
3712 
3713 	/* Set number of lcores */
3714 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3715 			? ad->nb_queues
3716 			: op_params->num_lcores;
3717 
3718 	/* Allocate memory for thread parameters structure */
3719 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3720 			RTE_CACHE_LINE_SIZE);
3721 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3722 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3723 				RTE_CACHE_LINE_SIZE));
3724 
3725 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3726 			!check_bit(test_vector.ldpc_dec.op_flags,
3727 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3728 			&& !check_bit(test_vector.ldpc_dec.op_flags,
3729 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
3730 		bler_function = bler_pmd_lcore_ldpc_dec;
3731 	else
3732 		return TEST_SKIPPED;
3733 
3734 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3735 
3736 	/* Main core is set at first entry */
3737 	t_params[0].dev_id = ad->dev_id;
3738 	t_params[0].lcore_id = rte_lcore_id();
3739 	t_params[0].op_params = op_params;
3740 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3741 	t_params[0].iter_count = 0;
3742 
3743 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3744 		if (used_cores >= num_lcores)
3745 			break;
3746 
3747 		t_params[used_cores].dev_id = ad->dev_id;
3748 		t_params[used_cores].lcore_id = lcore_id;
3749 		t_params[used_cores].op_params = op_params;
3750 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3751 		t_params[used_cores].iter_count = 0;
3752 
3753 		rte_eal_remote_launch(bler_function,
3754 				&t_params[used_cores++], lcore_id);
3755 	}
3756 
3757 	rte_atomic16_set(&op_params->sync, SYNC_START);
3758 	ret = bler_function(&t_params[0]);
3759 
3760 	/* Main core is always used */
3761 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3762 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3763 
3764 	print_dec_bler(t_params, num_lcores);
3765 
3766 	/* Return if test failed */
3767 	if (ret) {
3768 		rte_free(t_params);
3769 		return ret;
3770 	}
3771 
3772 	/* Function to print something  here*/
3773 	rte_free(t_params);
3774 	return ret;
3775 }
3776 
3777 /*
3778  * Test function that determines how long an enqueue + dequeue of a burst
3779  * takes on available lcores.
3780  */
3781 static int
3782 throughput_test(struct active_device *ad,
3783 		struct test_op_params *op_params)
3784 {
3785 	int ret;
3786 	unsigned int lcore_id, used_cores = 0;
3787 	struct thread_params *t_params, *tp;
3788 	struct rte_bbdev_info info;
3789 	lcore_function_t *throughput_function;
3790 	uint16_t num_lcores;
3791 	const char *op_type_str;
3792 
3793 	rte_bbdev_info_get(ad->dev_id, &info);
3794 
3795 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3796 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3797 			test_vector.op_type);
3798 
3799 	printf("+ ------------------------------------------------------- +\n");
3800 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3801 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3802 			op_params->num_to_process, op_params->num_lcores,
3803 			op_type_str,
3804 			intr_enabled ? "Interrupt mode" : "PMD mode",
3805 			(double)rte_get_tsc_hz() / 1000000000.0);
3806 
3807 	/* Set number of lcores */
3808 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3809 			? ad->nb_queues
3810 			: op_params->num_lcores;
3811 
3812 	/* Allocate memory for thread parameters structure */
3813 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3814 			RTE_CACHE_LINE_SIZE);
3815 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3816 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3817 				RTE_CACHE_LINE_SIZE));
3818 
3819 	if (intr_enabled) {
3820 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3821 			throughput_function = throughput_intr_lcore_dec;
3822 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3823 			throughput_function = throughput_intr_lcore_ldpc_dec;
3824 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3825 			throughput_function = throughput_intr_lcore_enc;
3826 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3827 			throughput_function = throughput_intr_lcore_ldpc_enc;
3828 		else
3829 			throughput_function = throughput_intr_lcore_enc;
3830 
3831 		/* Dequeue interrupt callback registration */
3832 		ret = rte_bbdev_callback_register(ad->dev_id,
3833 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3834 				t_params);
3835 		if (ret < 0) {
3836 			rte_free(t_params);
3837 			return ret;
3838 		}
3839 	} else {
3840 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3841 			throughput_function = throughput_pmd_lcore_dec;
3842 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3843 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3844 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3845 			throughput_function = throughput_pmd_lcore_enc;
3846 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3847 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3848 		else
3849 			throughput_function = throughput_pmd_lcore_enc;
3850 	}
3851 
3852 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3853 
3854 	/* Main core is set at first entry */
3855 	t_params[0].dev_id = ad->dev_id;
3856 	t_params[0].lcore_id = rte_lcore_id();
3857 	t_params[0].op_params = op_params;
3858 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3859 	t_params[0].iter_count = 0;
3860 
3861 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3862 		if (used_cores >= num_lcores)
3863 			break;
3864 
3865 		t_params[used_cores].dev_id = ad->dev_id;
3866 		t_params[used_cores].lcore_id = lcore_id;
3867 		t_params[used_cores].op_params = op_params;
3868 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3869 		t_params[used_cores].iter_count = 0;
3870 
3871 		rte_eal_remote_launch(throughput_function,
3872 				&t_params[used_cores++], lcore_id);
3873 	}
3874 
3875 	rte_atomic16_set(&op_params->sync, SYNC_START);
3876 	ret = throughput_function(&t_params[0]);
3877 
3878 	/* Main core is always used */
3879 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3880 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3881 
3882 	/* Return if test failed */
3883 	if (ret) {
3884 		rte_free(t_params);
3885 		return ret;
3886 	}
3887 
3888 	/* Print throughput if interrupts are disabled and test passed */
3889 	if (!intr_enabled) {
3890 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3891 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3892 			print_dec_throughput(t_params, num_lcores);
3893 		else
3894 			print_enc_throughput(t_params, num_lcores);
3895 		rte_free(t_params);
3896 		return ret;
3897 	}
3898 
3899 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3900 	 * all pending operations. Skip waiting for queues which reported an
3901 	 * error using processing_status variable.
3902 	 * Wait for main lcore operations.
3903 	 */
3904 	tp = &t_params[0];
3905 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3906 			op_params->num_to_process) &&
3907 			(rte_atomic16_read(&tp->processing_status) !=
3908 			TEST_FAILED))
3909 		rte_pause();
3910 
3911 	tp->ops_per_sec /= TEST_REPETITIONS;
3912 	tp->mbps /= TEST_REPETITIONS;
3913 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3914 
3915 	/* Wait for worker lcores operations */
3916 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3917 		tp = &t_params[used_cores];
3918 
3919 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3920 				op_params->num_to_process) &&
3921 				(rte_atomic16_read(&tp->processing_status) !=
3922 				TEST_FAILED))
3923 			rte_pause();
3924 
3925 		tp->ops_per_sec /= TEST_REPETITIONS;
3926 		tp->mbps /= TEST_REPETITIONS;
3927 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3928 	}
3929 
3930 	/* Print throughput if test passed */
3931 	if (!ret) {
3932 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3933 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3934 			print_dec_throughput(t_params, num_lcores);
3935 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3936 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3937 			print_enc_throughput(t_params, num_lcores);
3938 	}
3939 
3940 	rte_free(t_params);
3941 	return ret;
3942 }
3943 
3944 static int
3945 latency_test_dec(struct rte_mempool *mempool,
3946 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3947 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3948 		const uint16_t num_to_process, uint16_t burst_sz,
3949 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3950 {
3951 	int ret = TEST_SUCCESS;
3952 	uint16_t i, j, dequeued;
3953 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3954 	uint64_t start_time = 0, last_time = 0;
3955 
3956 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3957 		uint16_t enq = 0, deq = 0;
3958 		bool first_time = true;
3959 		last_time = 0;
3960 
3961 		if (unlikely(num_to_process - dequeued < burst_sz))
3962 			burst_sz = num_to_process - dequeued;
3963 
3964 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3965 		TEST_ASSERT_SUCCESS(ret,
3966 				"rte_bbdev_dec_op_alloc_bulk() failed");
3967 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3968 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3969 					bufs->inputs,
3970 					bufs->hard_outputs,
3971 					bufs->soft_outputs,
3972 					ref_op);
3973 
3974 		/* Set counter to validate the ordering */
3975 		for (j = 0; j < burst_sz; ++j)
3976 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3977 
3978 		start_time = rte_rdtsc_precise();
3979 
3980 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3981 				burst_sz);
3982 		TEST_ASSERT(enq == burst_sz,
3983 				"Error enqueueing burst, expected %u, got %u",
3984 				burst_sz, enq);
3985 
3986 		/* Dequeue */
3987 		do {
3988 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3989 					&ops_deq[deq], burst_sz - deq);
3990 			if (likely(first_time && (deq > 0))) {
3991 				last_time = rte_rdtsc_precise() - start_time;
3992 				first_time = false;
3993 			}
3994 		} while (unlikely(burst_sz != deq));
3995 
3996 		*max_time = RTE_MAX(*max_time, last_time);
3997 		*min_time = RTE_MIN(*min_time, last_time);
3998 		*total_time += last_time;
3999 
4000 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4001 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4002 					vector_mask);
4003 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4004 		}
4005 
4006 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4007 		dequeued += deq;
4008 	}
4009 
4010 	return i;
4011 }
4012 
4013 /* Test case for latency/validation for LDPC Decoder */
4014 static int
4015 latency_test_ldpc_dec(struct rte_mempool *mempool,
4016 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4017 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4018 		const uint16_t num_to_process, uint16_t burst_sz,
4019 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4020 		bool disable_et)
4021 {
4022 	int ret = TEST_SUCCESS;
4023 	uint16_t i, j, dequeued;
4024 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4025 	uint64_t start_time = 0, last_time = 0;
4026 	bool extDdr = ldpc_cap_flags &
4027 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4028 
4029 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4030 		uint16_t enq = 0, deq = 0;
4031 		bool first_time = true;
4032 		last_time = 0;
4033 
4034 		if (unlikely(num_to_process - dequeued < burst_sz))
4035 			burst_sz = num_to_process - dequeued;
4036 
4037 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4038 		TEST_ASSERT_SUCCESS(ret,
4039 				"rte_bbdev_dec_op_alloc_bulk() failed");
4040 
4041 		/* For latency tests we need to disable early termination */
4042 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4043 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4044 			ref_op->ldpc_dec.op_flags -=
4045 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4046 		ref_op->ldpc_dec.iter_max = get_iter_max();
4047 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4048 
4049 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4050 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4051 					bufs->inputs,
4052 					bufs->hard_outputs,
4053 					bufs->soft_outputs,
4054 					bufs->harq_inputs,
4055 					bufs->harq_outputs,
4056 					ref_op);
4057 
4058 		if (extDdr)
4059 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4060 					burst_sz, true);
4061 
4062 		/* Set counter to validate the ordering */
4063 		for (j = 0; j < burst_sz; ++j)
4064 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4065 
4066 		start_time = rte_rdtsc_precise();
4067 
4068 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4069 				&ops_enq[enq], burst_sz);
4070 		TEST_ASSERT(enq == burst_sz,
4071 				"Error enqueueing burst, expected %u, got %u",
4072 				burst_sz, enq);
4073 
4074 		/* Dequeue */
4075 		do {
4076 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4077 					&ops_deq[deq], burst_sz - deq);
4078 			if (likely(first_time && (deq > 0))) {
4079 				last_time = rte_rdtsc_precise() - start_time;
4080 				first_time = false;
4081 			}
4082 		} while (unlikely(burst_sz != deq));
4083 
4084 		*max_time = RTE_MAX(*max_time, last_time);
4085 		*min_time = RTE_MIN(*min_time, last_time);
4086 		*total_time += last_time;
4087 
4088 		if (extDdr)
4089 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4090 
4091 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4092 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4093 					vector_mask);
4094 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4095 		}
4096 
4097 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4098 		dequeued += deq;
4099 	}
4100 	return i;
4101 }
4102 
4103 static int
4104 latency_test_enc(struct rte_mempool *mempool,
4105 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4106 		uint16_t dev_id, uint16_t queue_id,
4107 		const uint16_t num_to_process, uint16_t burst_sz,
4108 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4109 {
4110 	int ret = TEST_SUCCESS;
4111 	uint16_t i, j, dequeued;
4112 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4113 	uint64_t start_time = 0, last_time = 0;
4114 
4115 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4116 		uint16_t enq = 0, deq = 0;
4117 		bool first_time = true;
4118 		last_time = 0;
4119 
4120 		if (unlikely(num_to_process - dequeued < burst_sz))
4121 			burst_sz = num_to_process - dequeued;
4122 
4123 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4124 		TEST_ASSERT_SUCCESS(ret,
4125 				"rte_bbdev_enc_op_alloc_bulk() failed");
4126 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4127 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4128 					bufs->inputs,
4129 					bufs->hard_outputs,
4130 					ref_op);
4131 
4132 		/* Set counter to validate the ordering */
4133 		for (j = 0; j < burst_sz; ++j)
4134 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4135 
4136 		start_time = rte_rdtsc_precise();
4137 
4138 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4139 				burst_sz);
4140 		TEST_ASSERT(enq == burst_sz,
4141 				"Error enqueueing burst, expected %u, got %u",
4142 				burst_sz, enq);
4143 
4144 		/* Dequeue */
4145 		do {
4146 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4147 					&ops_deq[deq], burst_sz - deq);
4148 			if (likely(first_time && (deq > 0))) {
4149 				last_time += rte_rdtsc_precise() - start_time;
4150 				first_time = false;
4151 			}
4152 		} while (unlikely(burst_sz != deq));
4153 
4154 		*max_time = RTE_MAX(*max_time, last_time);
4155 		*min_time = RTE_MIN(*min_time, last_time);
4156 		*total_time += last_time;
4157 
4158 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4159 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4160 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4161 		}
4162 
4163 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4164 		dequeued += deq;
4165 	}
4166 
4167 	return i;
4168 }
4169 
4170 static int
4171 latency_test_ldpc_enc(struct rte_mempool *mempool,
4172 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4173 		uint16_t dev_id, uint16_t queue_id,
4174 		const uint16_t num_to_process, uint16_t burst_sz,
4175 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4176 {
4177 	int ret = TEST_SUCCESS;
4178 	uint16_t i, j, dequeued;
4179 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4180 	uint64_t start_time = 0, last_time = 0;
4181 
4182 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4183 		uint16_t enq = 0, deq = 0;
4184 		bool first_time = true;
4185 		last_time = 0;
4186 
4187 		if (unlikely(num_to_process - dequeued < burst_sz))
4188 			burst_sz = num_to_process - dequeued;
4189 
4190 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4191 		TEST_ASSERT_SUCCESS(ret,
4192 				"rte_bbdev_enc_op_alloc_bulk() failed");
4193 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4194 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4195 					bufs->inputs,
4196 					bufs->hard_outputs,
4197 					ref_op);
4198 
4199 		/* Set counter to validate the ordering */
4200 		for (j = 0; j < burst_sz; ++j)
4201 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4202 
4203 		start_time = rte_rdtsc_precise();
4204 
4205 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4206 				&ops_enq[enq], burst_sz);
4207 		TEST_ASSERT(enq == burst_sz,
4208 				"Error enqueueing burst, expected %u, got %u",
4209 				burst_sz, enq);
4210 
4211 		/* Dequeue */
4212 		do {
4213 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4214 					&ops_deq[deq], burst_sz - deq);
4215 			if (likely(first_time && (deq > 0))) {
4216 				last_time += rte_rdtsc_precise() - start_time;
4217 				first_time = false;
4218 			}
4219 		} while (unlikely(burst_sz != deq));
4220 
4221 		*max_time = RTE_MAX(*max_time, last_time);
4222 		*min_time = RTE_MIN(*min_time, last_time);
4223 		*total_time += last_time;
4224 
4225 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4226 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4227 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4228 		}
4229 
4230 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4231 		dequeued += deq;
4232 	}
4233 
4234 	return i;
4235 }
4236 
4237 /* Common function for running validation and latency test cases */
4238 static int
4239 validation_latency_test(struct active_device *ad,
4240 		struct test_op_params *op_params, bool latency_flag)
4241 {
4242 	int iter;
4243 	uint16_t burst_sz = op_params->burst_sz;
4244 	const uint16_t num_to_process = op_params->num_to_process;
4245 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4246 	const uint16_t queue_id = ad->queue_ids[0];
4247 	struct test_buffers *bufs = NULL;
4248 	struct rte_bbdev_info info;
4249 	uint64_t total_time, min_time, max_time;
4250 	const char *op_type_str;
4251 
4252 	total_time = max_time = 0;
4253 	min_time = UINT64_MAX;
4254 
4255 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4256 			"BURST_SIZE should be <= %u", MAX_BURST);
4257 
4258 	rte_bbdev_info_get(ad->dev_id, &info);
4259 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4260 
4261 	op_type_str = rte_bbdev_op_type_str(op_type);
4262 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4263 
4264 	printf("+ ------------------------------------------------------- +\n");
4265 	if (latency_flag)
4266 		printf("== test: latency\ndev:");
4267 	else
4268 		printf("== test: validation\ndev:");
4269 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4270 			info.dev_name, burst_sz, num_to_process, op_type_str);
4271 
4272 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4273 		iter = latency_test_dec(op_params->mp, bufs,
4274 				op_params->ref_dec_op, op_params->vector_mask,
4275 				ad->dev_id, queue_id, num_to_process,
4276 				burst_sz, &total_time, &min_time, &max_time);
4277 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4278 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4279 				op_params->ref_enc_op, ad->dev_id, queue_id,
4280 				num_to_process, burst_sz, &total_time,
4281 				&min_time, &max_time);
4282 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4283 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4284 				op_params->ref_dec_op, op_params->vector_mask,
4285 				ad->dev_id, queue_id, num_to_process,
4286 				burst_sz, &total_time, &min_time, &max_time,
4287 				latency_flag);
4288 	else /* RTE_BBDEV_OP_TURBO_ENC */
4289 		iter = latency_test_enc(op_params->mp, bufs,
4290 				op_params->ref_enc_op,
4291 				ad->dev_id, queue_id,
4292 				num_to_process, burst_sz, &total_time,
4293 				&min_time, &max_time);
4294 
4295 	if (iter <= 0)
4296 		return TEST_FAILED;
4297 
4298 	printf("Operation latency:\n"
4299 			"\tavg: %lg cycles, %lg us\n"
4300 			"\tmin: %lg cycles, %lg us\n"
4301 			"\tmax: %lg cycles, %lg us\n",
4302 			(double)total_time / (double)iter,
4303 			(double)(total_time * 1000000) / (double)iter /
4304 			(double)rte_get_tsc_hz(), (double)min_time,
4305 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4306 			(double)max_time, (double)(max_time * 1000000) /
4307 			(double)rte_get_tsc_hz());
4308 
4309 	return TEST_SUCCESS;
4310 }
4311 
4312 static int
4313 latency_test(struct active_device *ad, struct test_op_params *op_params)
4314 {
4315 	return validation_latency_test(ad, op_params, true);
4316 }
4317 
4318 static int
4319 validation_test(struct active_device *ad, struct test_op_params *op_params)
4320 {
4321 	return validation_latency_test(ad, op_params, false);
4322 }
4323 
4324 #ifdef RTE_BBDEV_OFFLOAD_COST
4325 static int
4326 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4327 		struct rte_bbdev_stats *stats)
4328 {
4329 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4330 	struct rte_bbdev_stats *q_stats;
4331 
4332 	if (queue_id >= dev->data->num_queues)
4333 		return -1;
4334 
4335 	q_stats = &dev->data->queues[queue_id].queue_stats;
4336 
4337 	stats->enqueued_count = q_stats->enqueued_count;
4338 	stats->dequeued_count = q_stats->dequeued_count;
4339 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4340 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4341 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4342 
4343 	return 0;
4344 }
4345 
4346 static int
4347 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4348 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4349 		uint16_t queue_id, const uint16_t num_to_process,
4350 		uint16_t burst_sz, struct test_time_stats *time_st)
4351 {
4352 	int i, dequeued, ret;
4353 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4354 	uint64_t enq_start_time, deq_start_time;
4355 	uint64_t enq_sw_last_time, deq_last_time;
4356 	struct rte_bbdev_stats stats;
4357 
4358 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4359 		uint16_t enq = 0, deq = 0;
4360 
4361 		if (unlikely(num_to_process - dequeued < burst_sz))
4362 			burst_sz = num_to_process - dequeued;
4363 
4364 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4365 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4366 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4367 					bufs->inputs,
4368 					bufs->hard_outputs,
4369 					bufs->soft_outputs,
4370 					ref_op);
4371 
4372 		/* Start time meas for enqueue function offload latency */
4373 		enq_start_time = rte_rdtsc_precise();
4374 		do {
4375 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4376 					&ops_enq[enq], burst_sz - enq);
4377 		} while (unlikely(burst_sz != enq));
4378 
4379 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4380 		TEST_ASSERT_SUCCESS(ret,
4381 				"Failed to get stats for queue (%u) of device (%u)",
4382 				queue_id, dev_id);
4383 
4384 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4385 				stats.acc_offload_cycles;
4386 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4387 				enq_sw_last_time);
4388 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4389 				enq_sw_last_time);
4390 		time_st->enq_sw_total_time += enq_sw_last_time;
4391 
4392 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4393 				stats.acc_offload_cycles);
4394 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4395 				stats.acc_offload_cycles);
4396 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4397 
4398 		/* give time for device to process ops */
4399 		rte_delay_us(WAIT_OFFLOAD_US);
4400 
4401 		/* Start time meas for dequeue function offload latency */
4402 		deq_start_time = rte_rdtsc_precise();
4403 		/* Dequeue one operation */
4404 		do {
4405 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4406 					&ops_deq[deq], enq);
4407 		} while (unlikely(deq == 0));
4408 
4409 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4410 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4411 				deq_last_time);
4412 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4413 				deq_last_time);
4414 		time_st->deq_total_time += deq_last_time;
4415 
4416 		/* Dequeue remaining operations if needed*/
4417 		while (burst_sz != deq)
4418 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4419 					&ops_deq[deq], burst_sz - deq);
4420 
4421 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4422 		dequeued += deq;
4423 	}
4424 
4425 	return i;
4426 }
4427 
4428 static int
4429 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4430 		struct test_buffers *bufs,
4431 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4432 		uint16_t queue_id, const uint16_t num_to_process,
4433 		uint16_t burst_sz, struct test_time_stats *time_st)
4434 {
4435 	int i, dequeued, ret;
4436 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4437 	uint64_t enq_start_time, deq_start_time;
4438 	uint64_t enq_sw_last_time, deq_last_time;
4439 	struct rte_bbdev_stats stats;
4440 	bool extDdr = ldpc_cap_flags &
4441 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4442 
4443 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4444 		uint16_t enq = 0, deq = 0;
4445 
4446 		if (unlikely(num_to_process - dequeued < burst_sz))
4447 			burst_sz = num_to_process - dequeued;
4448 
4449 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4450 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4451 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4452 					bufs->inputs,
4453 					bufs->hard_outputs,
4454 					bufs->soft_outputs,
4455 					bufs->harq_inputs,
4456 					bufs->harq_outputs,
4457 					ref_op);
4458 
4459 		if (extDdr)
4460 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4461 					burst_sz, true);
4462 
4463 		/* Start time meas for enqueue function offload latency */
4464 		enq_start_time = rte_rdtsc_precise();
4465 		do {
4466 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4467 					&ops_enq[enq], burst_sz - enq);
4468 		} while (unlikely(burst_sz != enq));
4469 
4470 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4471 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4472 		TEST_ASSERT_SUCCESS(ret,
4473 				"Failed to get stats for queue (%u) of device (%u)",
4474 				queue_id, dev_id);
4475 
4476 		enq_sw_last_time -= stats.acc_offload_cycles;
4477 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4478 				enq_sw_last_time);
4479 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4480 				enq_sw_last_time);
4481 		time_st->enq_sw_total_time += enq_sw_last_time;
4482 
4483 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4484 				stats.acc_offload_cycles);
4485 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4486 				stats.acc_offload_cycles);
4487 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4488 
4489 		/* give time for device to process ops */
4490 		rte_delay_us(WAIT_OFFLOAD_US);
4491 
4492 		/* Start time meas for dequeue function offload latency */
4493 		deq_start_time = rte_rdtsc_precise();
4494 		/* Dequeue one operation */
4495 		do {
4496 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4497 					&ops_deq[deq], enq);
4498 		} while (unlikely(deq == 0));
4499 
4500 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4501 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4502 				deq_last_time);
4503 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4504 				deq_last_time);
4505 		time_st->deq_total_time += deq_last_time;
4506 
4507 		/* Dequeue remaining operations if needed*/
4508 		while (burst_sz != deq)
4509 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4510 					&ops_deq[deq], burst_sz - deq);
4511 
4512 		if (extDdr) {
4513 			/* Read loopback is not thread safe */
4514 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4515 		}
4516 
4517 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4518 		dequeued += deq;
4519 	}
4520 
4521 	return i;
4522 }
4523 
4524 static int
4525 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4526 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4527 		uint16_t queue_id, const uint16_t num_to_process,
4528 		uint16_t burst_sz, struct test_time_stats *time_st)
4529 {
4530 	int i, dequeued, ret;
4531 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4532 	uint64_t enq_start_time, deq_start_time;
4533 	uint64_t enq_sw_last_time, deq_last_time;
4534 	struct rte_bbdev_stats stats;
4535 
4536 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4537 		uint16_t enq = 0, deq = 0;
4538 
4539 		if (unlikely(num_to_process - dequeued < burst_sz))
4540 			burst_sz = num_to_process - dequeued;
4541 
4542 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4543 		TEST_ASSERT_SUCCESS(ret,
4544 				"rte_bbdev_enc_op_alloc_bulk() failed");
4545 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4546 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4547 					bufs->inputs,
4548 					bufs->hard_outputs,
4549 					ref_op);
4550 
4551 		/* Start time meas for enqueue function offload latency */
4552 		enq_start_time = rte_rdtsc_precise();
4553 		do {
4554 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4555 					&ops_enq[enq], burst_sz - enq);
4556 		} while (unlikely(burst_sz != enq));
4557 
4558 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4559 
4560 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4561 		TEST_ASSERT_SUCCESS(ret,
4562 				"Failed to get stats for queue (%u) of device (%u)",
4563 				queue_id, dev_id);
4564 		enq_sw_last_time -= stats.acc_offload_cycles;
4565 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4566 				enq_sw_last_time);
4567 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4568 				enq_sw_last_time);
4569 		time_st->enq_sw_total_time += enq_sw_last_time;
4570 
4571 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4572 				stats.acc_offload_cycles);
4573 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4574 				stats.acc_offload_cycles);
4575 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4576 
4577 		/* give time for device to process ops */
4578 		rte_delay_us(WAIT_OFFLOAD_US);
4579 
4580 		/* Start time meas for dequeue function offload latency */
4581 		deq_start_time = rte_rdtsc_precise();
4582 		/* Dequeue one operation */
4583 		do {
4584 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4585 					&ops_deq[deq], enq);
4586 		} while (unlikely(deq == 0));
4587 
4588 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4589 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4590 				deq_last_time);
4591 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4592 				deq_last_time);
4593 		time_st->deq_total_time += deq_last_time;
4594 
4595 		while (burst_sz != deq)
4596 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4597 					&ops_deq[deq], burst_sz - deq);
4598 
4599 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4600 		dequeued += deq;
4601 	}
4602 
4603 	return i;
4604 }
4605 
4606 static int
4607 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4608 		struct test_buffers *bufs,
4609 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4610 		uint16_t queue_id, const uint16_t num_to_process,
4611 		uint16_t burst_sz, struct test_time_stats *time_st)
4612 {
4613 	int i, dequeued, ret;
4614 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4615 	uint64_t enq_start_time, deq_start_time;
4616 	uint64_t enq_sw_last_time, deq_last_time;
4617 	struct rte_bbdev_stats stats;
4618 
4619 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4620 		uint16_t enq = 0, deq = 0;
4621 
4622 		if (unlikely(num_to_process - dequeued < burst_sz))
4623 			burst_sz = num_to_process - dequeued;
4624 
4625 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4626 		TEST_ASSERT_SUCCESS(ret,
4627 				"rte_bbdev_enc_op_alloc_bulk() failed");
4628 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4629 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4630 					bufs->inputs,
4631 					bufs->hard_outputs,
4632 					ref_op);
4633 
4634 		/* Start time meas for enqueue function offload latency */
4635 		enq_start_time = rte_rdtsc_precise();
4636 		do {
4637 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4638 					&ops_enq[enq], burst_sz - enq);
4639 		} while (unlikely(burst_sz != enq));
4640 
4641 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4642 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4643 		TEST_ASSERT_SUCCESS(ret,
4644 				"Failed to get stats for queue (%u) of device (%u)",
4645 				queue_id, dev_id);
4646 
4647 		enq_sw_last_time -= stats.acc_offload_cycles;
4648 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4649 				enq_sw_last_time);
4650 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4651 				enq_sw_last_time);
4652 		time_st->enq_sw_total_time += enq_sw_last_time;
4653 
4654 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4655 				stats.acc_offload_cycles);
4656 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4657 				stats.acc_offload_cycles);
4658 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4659 
4660 		/* give time for device to process ops */
4661 		rte_delay_us(WAIT_OFFLOAD_US);
4662 
4663 		/* Start time meas for dequeue function offload latency */
4664 		deq_start_time = rte_rdtsc_precise();
4665 		/* Dequeue one operation */
4666 		do {
4667 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4668 					&ops_deq[deq], enq);
4669 		} while (unlikely(deq == 0));
4670 
4671 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4672 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4673 				deq_last_time);
4674 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4675 				deq_last_time);
4676 		time_st->deq_total_time += deq_last_time;
4677 
4678 		while (burst_sz != deq)
4679 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4680 					&ops_deq[deq], burst_sz - deq);
4681 
4682 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4683 		dequeued += deq;
4684 	}
4685 
4686 	return i;
4687 }
4688 #endif
4689 
4690 static int
4691 offload_cost_test(struct active_device *ad,
4692 		struct test_op_params *op_params)
4693 {
4694 #ifndef RTE_BBDEV_OFFLOAD_COST
4695 	RTE_SET_USED(ad);
4696 	RTE_SET_USED(op_params);
4697 	printf("Offload latency test is disabled.\n");
4698 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4699 	return TEST_SKIPPED;
4700 #else
4701 	int iter;
4702 	uint16_t burst_sz = op_params->burst_sz;
4703 	const uint16_t num_to_process = op_params->num_to_process;
4704 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4705 	const uint16_t queue_id = ad->queue_ids[0];
4706 	struct test_buffers *bufs = NULL;
4707 	struct rte_bbdev_info info;
4708 	const char *op_type_str;
4709 	struct test_time_stats time_st;
4710 
4711 	memset(&time_st, 0, sizeof(struct test_time_stats));
4712 	time_st.enq_sw_min_time = UINT64_MAX;
4713 	time_st.enq_acc_min_time = UINT64_MAX;
4714 	time_st.deq_min_time = UINT64_MAX;
4715 
4716 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4717 			"BURST_SIZE should be <= %u", MAX_BURST);
4718 
4719 	rte_bbdev_info_get(ad->dev_id, &info);
4720 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4721 
4722 	op_type_str = rte_bbdev_op_type_str(op_type);
4723 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4724 
4725 	printf("+ ------------------------------------------------------- +\n");
4726 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4727 			info.dev_name, burst_sz, num_to_process, op_type_str);
4728 
4729 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4730 		iter = offload_latency_test_dec(op_params->mp, bufs,
4731 				op_params->ref_dec_op, ad->dev_id, queue_id,
4732 				num_to_process, burst_sz, &time_st);
4733 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4734 		iter = offload_latency_test_enc(op_params->mp, bufs,
4735 				op_params->ref_enc_op, ad->dev_id, queue_id,
4736 				num_to_process, burst_sz, &time_st);
4737 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4738 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4739 				op_params->ref_enc_op, ad->dev_id, queue_id,
4740 				num_to_process, burst_sz, &time_st);
4741 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4742 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4743 			op_params->ref_dec_op, ad->dev_id, queue_id,
4744 			num_to_process, burst_sz, &time_st);
4745 	else
4746 		iter = offload_latency_test_enc(op_params->mp, bufs,
4747 				op_params->ref_enc_op, ad->dev_id, queue_id,
4748 				num_to_process, burst_sz, &time_st);
4749 
4750 	if (iter <= 0)
4751 		return TEST_FAILED;
4752 
4753 	printf("Enqueue driver offload cost latency:\n"
4754 			"\tavg: %lg cycles, %lg us\n"
4755 			"\tmin: %lg cycles, %lg us\n"
4756 			"\tmax: %lg cycles, %lg us\n"
4757 			"Enqueue accelerator offload cost latency:\n"
4758 			"\tavg: %lg cycles, %lg us\n"
4759 			"\tmin: %lg cycles, %lg us\n"
4760 			"\tmax: %lg cycles, %lg us\n",
4761 			(double)time_st.enq_sw_total_time / (double)iter,
4762 			(double)(time_st.enq_sw_total_time * 1000000) /
4763 			(double)iter / (double)rte_get_tsc_hz(),
4764 			(double)time_st.enq_sw_min_time,
4765 			(double)(time_st.enq_sw_min_time * 1000000) /
4766 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4767 			(double)(time_st.enq_sw_max_time * 1000000) /
4768 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4769 			(double)iter,
4770 			(double)(time_st.enq_acc_total_time * 1000000) /
4771 			(double)iter / (double)rte_get_tsc_hz(),
4772 			(double)time_st.enq_acc_min_time,
4773 			(double)(time_st.enq_acc_min_time * 1000000) /
4774 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4775 			(double)(time_st.enq_acc_max_time * 1000000) /
4776 			rte_get_tsc_hz());
4777 
4778 	printf("Dequeue offload cost latency - one op:\n"
4779 			"\tavg: %lg cycles, %lg us\n"
4780 			"\tmin: %lg cycles, %lg us\n"
4781 			"\tmax: %lg cycles, %lg us\n",
4782 			(double)time_st.deq_total_time / (double)iter,
4783 			(double)(time_st.deq_total_time * 1000000) /
4784 			(double)iter / (double)rte_get_tsc_hz(),
4785 			(double)time_st.deq_min_time,
4786 			(double)(time_st.deq_min_time * 1000000) /
4787 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4788 			(double)(time_st.deq_max_time * 1000000) /
4789 			rte_get_tsc_hz());
4790 
4791 	struct rte_bbdev_stats stats = {0};
4792 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4793 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4794 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4795 				"Mismatch in enqueue count %10"PRIu64" %d",
4796 				stats.enqueued_count, num_to_process);
4797 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4798 				"Mismatch in dequeue count %10"PRIu64" %d",
4799 				stats.dequeued_count, num_to_process);
4800 	}
4801 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4802 			"Enqueue count Error %10"PRIu64"",
4803 			stats.enqueue_err_count);
4804 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4805 			"Dequeue count Error (%10"PRIu64"",
4806 			stats.dequeue_err_count);
4807 
4808 	return TEST_SUCCESS;
4809 #endif
4810 }
4811 
4812 #ifdef RTE_BBDEV_OFFLOAD_COST
4813 static int
4814 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4815 		const uint16_t num_to_process, uint16_t burst_sz,
4816 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4817 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4818 {
4819 	int i, deq_total;
4820 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4821 	uint64_t deq_start_time, deq_last_time;
4822 
4823 	/* Test deq offload latency from an empty queue */
4824 
4825 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4826 			++i, deq_total += burst_sz) {
4827 		deq_start_time = rte_rdtsc_precise();
4828 
4829 		if (unlikely(num_to_process - deq_total < burst_sz))
4830 			burst_sz = num_to_process - deq_total;
4831 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4832 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4833 					burst_sz);
4834 		else
4835 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4836 					burst_sz);
4837 
4838 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4839 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4840 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4841 		*deq_total_time += deq_last_time;
4842 	}
4843 
4844 	return i;
4845 }
4846 
4847 static int
4848 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4849 		const uint16_t num_to_process, uint16_t burst_sz,
4850 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4851 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4852 {
4853 	int i, deq_total;
4854 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4855 	uint64_t deq_start_time, deq_last_time;
4856 
4857 	/* Test deq offload latency from an empty queue */
4858 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4859 			++i, deq_total += burst_sz) {
4860 		deq_start_time = rte_rdtsc_precise();
4861 
4862 		if (unlikely(num_to_process - deq_total < burst_sz))
4863 			burst_sz = num_to_process - deq_total;
4864 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4865 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4866 					burst_sz);
4867 		else
4868 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4869 					burst_sz);
4870 
4871 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4872 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4873 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4874 		*deq_total_time += deq_last_time;
4875 	}
4876 
4877 	return i;
4878 }
4879 
4880 #endif
4881 
4882 static int
4883 offload_latency_empty_q_test(struct active_device *ad,
4884 		struct test_op_params *op_params)
4885 {
4886 #ifndef RTE_BBDEV_OFFLOAD_COST
4887 	RTE_SET_USED(ad);
4888 	RTE_SET_USED(op_params);
4889 	printf("Offload latency empty dequeue test is disabled.\n");
4890 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4891 	return TEST_SKIPPED;
4892 #else
4893 	int iter;
4894 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4895 	uint16_t burst_sz = op_params->burst_sz;
4896 	const uint16_t num_to_process = op_params->num_to_process;
4897 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4898 	const uint16_t queue_id = ad->queue_ids[0];
4899 	struct rte_bbdev_info info;
4900 	const char *op_type_str;
4901 
4902 	deq_total_time = deq_max_time = 0;
4903 	deq_min_time = UINT64_MAX;
4904 
4905 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4906 			"BURST_SIZE should be <= %u", MAX_BURST);
4907 
4908 	rte_bbdev_info_get(ad->dev_id, &info);
4909 
4910 	op_type_str = rte_bbdev_op_type_str(op_type);
4911 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4912 
4913 	printf("+ ------------------------------------------------------- +\n");
4914 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4915 			info.dev_name, burst_sz, num_to_process, op_type_str);
4916 
4917 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4918 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4919 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4920 				num_to_process, burst_sz, &deq_total_time,
4921 				&deq_min_time, &deq_max_time, op_type);
4922 	else
4923 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4924 				num_to_process, burst_sz, &deq_total_time,
4925 				&deq_min_time, &deq_max_time, op_type);
4926 
4927 	if (iter <= 0)
4928 		return TEST_FAILED;
4929 
4930 	printf("Empty dequeue offload:\n"
4931 			"\tavg: %lg cycles, %lg us\n"
4932 			"\tmin: %lg cycles, %lg us\n"
4933 			"\tmax: %lg cycles, %lg us\n",
4934 			(double)deq_total_time / (double)iter,
4935 			(double)(deq_total_time * 1000000) / (double)iter /
4936 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4937 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4938 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4939 			rte_get_tsc_hz());
4940 
4941 	return TEST_SUCCESS;
4942 #endif
4943 }
4944 
4945 static int
4946 bler_tc(void)
4947 {
4948 	return run_test_case(bler_test);
4949 }
4950 
4951 static int
4952 throughput_tc(void)
4953 {
4954 	return run_test_case(throughput_test);
4955 }
4956 
4957 static int
4958 offload_cost_tc(void)
4959 {
4960 	return run_test_case(offload_cost_test);
4961 }
4962 
4963 static int
4964 offload_latency_empty_q_tc(void)
4965 {
4966 	return run_test_case(offload_latency_empty_q_test);
4967 }
4968 
4969 static int
4970 latency_tc(void)
4971 {
4972 	return run_test_case(latency_test);
4973 }
4974 
4975 static int
4976 validation_tc(void)
4977 {
4978 	return run_test_case(validation_test);
4979 }
4980 
4981 static int
4982 interrupt_tc(void)
4983 {
4984 	return run_test_case(throughput_test);
4985 }
4986 
4987 static struct unit_test_suite bbdev_bler_testsuite = {
4988 	.suite_name = "BBdev BLER Tests",
4989 	.setup = testsuite_setup,
4990 	.teardown = testsuite_teardown,
4991 	.unit_test_cases = {
4992 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4993 		TEST_CASES_END() /**< NULL terminate unit test array */
4994 	}
4995 };
4996 
4997 static struct unit_test_suite bbdev_throughput_testsuite = {
4998 	.suite_name = "BBdev Throughput Tests",
4999 	.setup = testsuite_setup,
5000 	.teardown = testsuite_teardown,
5001 	.unit_test_cases = {
5002 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5003 		TEST_CASES_END() /**< NULL terminate unit test array */
5004 	}
5005 };
5006 
5007 static struct unit_test_suite bbdev_validation_testsuite = {
5008 	.suite_name = "BBdev Validation Tests",
5009 	.setup = testsuite_setup,
5010 	.teardown = testsuite_teardown,
5011 	.unit_test_cases = {
5012 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5013 		TEST_CASES_END() /**< NULL terminate unit test array */
5014 	}
5015 };
5016 
5017 static struct unit_test_suite bbdev_latency_testsuite = {
5018 	.suite_name = "BBdev Latency Tests",
5019 	.setup = testsuite_setup,
5020 	.teardown = testsuite_teardown,
5021 	.unit_test_cases = {
5022 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5023 		TEST_CASES_END() /**< NULL terminate unit test array */
5024 	}
5025 };
5026 
5027 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5028 	.suite_name = "BBdev Offload Cost Tests",
5029 	.setup = testsuite_setup,
5030 	.teardown = testsuite_teardown,
5031 	.unit_test_cases = {
5032 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5033 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5034 		TEST_CASES_END() /**< NULL terminate unit test array */
5035 	}
5036 };
5037 
5038 static struct unit_test_suite bbdev_interrupt_testsuite = {
5039 	.suite_name = "BBdev Interrupt Tests",
5040 	.setup = interrupt_testsuite_setup,
5041 	.teardown = testsuite_teardown,
5042 	.unit_test_cases = {
5043 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5044 		TEST_CASES_END() /**< NULL terminate unit test array */
5045 	}
5046 };
5047 
5048 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5049 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5050 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5051 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5052 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5053 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5054