xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 100
28 #define WAIT_OFFLOAD_US 1000
29 
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42 
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #define FLR_5G_TIMEOUT 610
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67 
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70 
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74 
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89 
90 static struct test_bbdev_vector test_vector;
91 
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94 
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100 
101 /* Represents tested active devices */
102 static struct active_device {
103 	const char *driver_name;
104 	uint8_t dev_id;
105 	uint16_t supported_ops;
106 	uint16_t queue_ids[MAX_QUEUES];
107 	uint16_t nb_queues;
108 	struct rte_mempool *ops_mempool;
109 	struct rte_mempool *in_mbuf_pool;
110 	struct rte_mempool *hard_out_mbuf_pool;
111 	struct rte_mempool *soft_out_mbuf_pool;
112 	struct rte_mempool *harq_in_mbuf_pool;
113 	struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115 
116 static uint8_t nb_active_devs;
117 
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120 	struct rte_bbdev_op_data *inputs;
121 	struct rte_bbdev_op_data *hard_outputs;
122 	struct rte_bbdev_op_data *soft_outputs;
123 	struct rte_bbdev_op_data *harq_inputs;
124 	struct rte_bbdev_op_data *harq_outputs;
125 };
126 
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129 	struct rte_mempool *mp;
130 	struct rte_bbdev_dec_op *ref_dec_op;
131 	struct rte_bbdev_enc_op *ref_enc_op;
132 	uint16_t burst_sz;
133 	uint16_t num_to_process;
134 	uint16_t num_lcores;
135 	int vector_mask;
136 	rte_atomic16_t sync;
137 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139 
140 /* Contains per lcore params */
141 struct thread_params {
142 	uint8_t dev_id;
143 	uint16_t queue_id;
144 	uint32_t lcore_id;
145 	uint64_t start_time;
146 	double ops_per_sec;
147 	double mbps;
148 	uint8_t iter_count;
149 	double iter_average;
150 	double bler;
151 	rte_atomic16_t nb_dequeued;
152 	rte_atomic16_t processing_status;
153 	rte_atomic16_t burst_sz;
154 	struct test_op_params *op_params;
155 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158 
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162 	/* Stores software enqueue total working time */
163 	uint64_t enq_sw_total_time;
164 	/* Stores minimum value of software enqueue working time */
165 	uint64_t enq_sw_min_time;
166 	/* Stores maximum value of software enqueue working time */
167 	uint64_t enq_sw_max_time;
168 	/* Stores turbo enqueue total working time */
169 	uint64_t enq_acc_total_time;
170 	/* Stores minimum value of accelerator enqueue working time */
171 	uint64_t enq_acc_min_time;
172 	/* Stores maximum value of accelerator enqueue working time */
173 	uint64_t enq_acc_max_time;
174 	/* Stores dequeue total working time */
175 	uint64_t deq_total_time;
176 	/* Stores minimum value of dequeue working time */
177 	uint64_t deq_min_time;
178 	/* Stores maximum value of dequeue working time */
179 	uint64_t deq_max_time;
180 };
181 #endif
182 
183 typedef int (test_case_function)(struct active_device *ad,
184 		struct test_op_params *op_params);
185 
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189 	m->pkt_len = 0;
190 
191 	do {
192 		m->data_len = 0;
193 		m = m->next;
194 	} while (m != NULL);
195 }
196 
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201 	return bitmap & bitmask;
202 }
203 
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207 	ad->supported_ops |= (1 << op_type);
208 }
209 
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213 	return ad->supported_ops & (1 << op_type);
214 }
215 
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219 	return (flags_req & flags_present) == flags_req;
220 }
221 
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229 
230 /* This API is to convert all the test vector op data entries
231  * to big endian format. It is used when the device supports
232  * the input in the big endian format.
233  */
234 static inline void
235 convert_op_data_to_be(void)
236 {
237 	struct op_data_entries *op;
238 	enum op_data_type type;
239 	uint8_t nb_segs, *rem_data, temp;
240 	uint32_t *data, len;
241 	int complete, rem, i, j;
242 
243 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
244 		nb_segs = test_vector.entries[type].nb_segments;
245 		op = &test_vector.entries[type];
246 
247 		/* Invert byte endianness for all the segments */
248 		for (i = 0; i < nb_segs; ++i) {
249 			len = op->segments[i].length;
250 			data = op->segments[i].addr;
251 
252 			/* Swap complete u32 bytes */
253 			complete = len / 4;
254 			for (j = 0; j < complete; j++)
255 				data[j] = rte_bswap32(data[j]);
256 
257 			/* Swap any remaining bytes */
258 			rem = len % 4;
259 			rem_data = (uint8_t *)&data[j];
260 			for (j = 0; j < rem/2; j++) {
261 				temp = rem_data[j];
262 				rem_data[j] = rem_data[rem - j - 1];
263 				rem_data[rem - j - 1] = temp;
264 			}
265 		}
266 	}
267 }
268 
269 static int
270 check_dev_cap(const struct rte_bbdev_info *dev_info)
271 {
272 	unsigned int i;
273 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
274 		nb_harq_inputs, nb_harq_outputs;
275 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
276 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
277 
278 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
279 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
280 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
281 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
282 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
283 
284 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
285 		if (op_cap->type != test_vector.op_type)
286 			continue;
287 
288 		if (dev_data_endianness == RTE_BIG_ENDIAN)
289 			convert_op_data_to_be();
290 
291 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
292 			const struct rte_bbdev_op_cap_turbo_dec *cap =
293 					&op_cap->cap.turbo_dec;
294 			/* Ignore lack of soft output capability, just skip
295 			 * checking if soft output is valid.
296 			 */
297 			if ((test_vector.turbo_dec.op_flags &
298 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
299 					!(cap->capability_flags &
300 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
301 				printf(
302 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
303 					dev_info->dev_name);
304 				clear_soft_out_cap(
305 					&test_vector.turbo_dec.op_flags);
306 			}
307 
308 			if (!flags_match(test_vector.turbo_dec.op_flags,
309 					cap->capability_flags))
310 				return TEST_FAILED;
311 			if (nb_inputs > cap->num_buffers_src) {
312 				printf("Too many inputs defined: %u, max: %u\n",
313 					nb_inputs, cap->num_buffers_src);
314 				return TEST_FAILED;
315 			}
316 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
317 					(test_vector.turbo_dec.op_flags &
318 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319 				printf(
320 					"Too many soft outputs defined: %u, max: %u\n",
321 						nb_soft_outputs,
322 						cap->num_buffers_soft_out);
323 				return TEST_FAILED;
324 			}
325 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
326 				printf(
327 					"Too many hard outputs defined: %u, max: %u\n",
328 						nb_hard_outputs,
329 						cap->num_buffers_hard_out);
330 				return TEST_FAILED;
331 			}
332 			if (intr_enabled && !(cap->capability_flags &
333 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
334 				printf(
335 					"Dequeue interrupts are not supported!\n");
336 				return TEST_FAILED;
337 			}
338 
339 			return TEST_SUCCESS;
340 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
341 			const struct rte_bbdev_op_cap_turbo_enc *cap =
342 					&op_cap->cap.turbo_enc;
343 
344 			if (!flags_match(test_vector.turbo_enc.op_flags,
345 					cap->capability_flags))
346 				return TEST_FAILED;
347 			if (nb_inputs > cap->num_buffers_src) {
348 				printf("Too many inputs defined: %u, max: %u\n",
349 					nb_inputs, cap->num_buffers_src);
350 				return TEST_FAILED;
351 			}
352 			if (nb_hard_outputs > cap->num_buffers_dst) {
353 				printf(
354 					"Too many hard outputs defined: %u, max: %u\n",
355 					nb_hard_outputs, cap->num_buffers_dst);
356 				return TEST_FAILED;
357 			}
358 			if (intr_enabled && !(cap->capability_flags &
359 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
360 				printf(
361 					"Dequeue interrupts are not supported!\n");
362 				return TEST_FAILED;
363 			}
364 
365 			return TEST_SUCCESS;
366 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
367 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
368 					&op_cap->cap.ldpc_enc;
369 
370 			if (!flags_match(test_vector.ldpc_enc.op_flags,
371 					cap->capability_flags)){
372 				printf("Flag Mismatch\n");
373 				return TEST_FAILED;
374 			}
375 			if (nb_inputs > cap->num_buffers_src) {
376 				printf("Too many inputs defined: %u, max: %u\n",
377 					nb_inputs, cap->num_buffers_src);
378 				return TEST_FAILED;
379 			}
380 			if (nb_hard_outputs > cap->num_buffers_dst) {
381 				printf(
382 					"Too many hard outputs defined: %u, max: %u\n",
383 					nb_hard_outputs, cap->num_buffers_dst);
384 				return TEST_FAILED;
385 			}
386 			if (intr_enabled && !(cap->capability_flags &
387 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
388 				printf(
389 					"Dequeue interrupts are not supported!\n");
390 				return TEST_FAILED;
391 			}
392 
393 			return TEST_SUCCESS;
394 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
395 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
396 					&op_cap->cap.ldpc_dec;
397 
398 			if (!flags_match(test_vector.ldpc_dec.op_flags,
399 					cap->capability_flags)){
400 				printf("Flag Mismatch\n");
401 				return TEST_FAILED;
402 			}
403 			if (nb_inputs > cap->num_buffers_src) {
404 				printf("Too many inputs defined: %u, max: %u\n",
405 					nb_inputs, cap->num_buffers_src);
406 				return TEST_FAILED;
407 			}
408 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
409 				printf(
410 					"Too many hard outputs defined: %u, max: %u\n",
411 					nb_hard_outputs,
412 					cap->num_buffers_hard_out);
413 				return TEST_FAILED;
414 			}
415 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
416 				printf(
417 					"Too many HARQ inputs defined: %u, max: %u\n",
418 					nb_harq_inputs,
419 					cap->num_buffers_hard_out);
420 				return TEST_FAILED;
421 			}
422 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
423 				printf(
424 					"Too many HARQ outputs defined: %u, max: %u\n",
425 					nb_harq_outputs,
426 					cap->num_buffers_hard_out);
427 				return TEST_FAILED;
428 			}
429 			if (intr_enabled && !(cap->capability_flags &
430 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
431 				printf(
432 					"Dequeue interrupts are not supported!\n");
433 				return TEST_FAILED;
434 			}
435 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
436 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
437 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
438 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
439 					))) {
440 				printf("Skip loop-back with interrupt\n");
441 				return TEST_FAILED;
442 			}
443 			return TEST_SUCCESS;
444 		}
445 	}
446 
447 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
448 		return TEST_SUCCESS; /* Special case for NULL device */
449 
450 	return TEST_FAILED;
451 }
452 
453 /* calculates optimal mempool size not smaller than the val */
454 static unsigned int
455 optimal_mempool_size(unsigned int val)
456 {
457 	return rte_align32pow2(val + 1) - 1;
458 }
459 
460 /* allocates mbuf mempool for inputs and outputs */
461 static struct rte_mempool *
462 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
463 		int socket_id, unsigned int mbuf_pool_size,
464 		const char *op_type_str)
465 {
466 	unsigned int i;
467 	uint32_t max_seg_sz = 0;
468 	char pool_name[RTE_MEMPOOL_NAMESIZE];
469 
470 	/* find max input segment size */
471 	for (i = 0; i < entries->nb_segments; ++i)
472 		if (entries->segments[i].length > max_seg_sz)
473 			max_seg_sz = entries->segments[i].length;
474 
475 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476 			dev_id);
477 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
478 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
479 					+ FILLER_HEADROOM,
480 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
481 }
482 
483 static int
484 create_mempools(struct active_device *ad, int socket_id,
485 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
486 {
487 	struct rte_mempool *mp;
488 	unsigned int ops_pool_size, mbuf_pool_size = 0;
489 	char pool_name[RTE_MEMPOOL_NAMESIZE];
490 	const char *op_type_str;
491 	enum rte_bbdev_op_type op_type = org_op_type;
492 
493 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
494 	struct op_data_entries *hard_out =
495 			&test_vector.entries[DATA_HARD_OUTPUT];
496 	struct op_data_entries *soft_out =
497 			&test_vector.entries[DATA_SOFT_OUTPUT];
498 	struct op_data_entries *harq_in =
499 			&test_vector.entries[DATA_HARQ_INPUT];
500 	struct op_data_entries *harq_out =
501 			&test_vector.entries[DATA_HARQ_OUTPUT];
502 
503 	/* allocate ops mempool */
504 	ops_pool_size = optimal_mempool_size(RTE_MAX(
505 			/* Ops used plus 1 reference op */
506 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
507 			/* Minimal cache size plus 1 reference op */
508 			(unsigned int)(1.5 * rte_lcore_count() *
509 					OPS_CACHE_SIZE + 1)),
510 			OPS_POOL_SIZE_MIN));
511 
512 	if (org_op_type == RTE_BBDEV_OP_NONE)
513 		op_type = RTE_BBDEV_OP_TURBO_ENC;
514 
515 	op_type_str = rte_bbdev_op_type_str(op_type);
516 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
517 
518 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
519 			ad->dev_id);
520 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
521 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
522 	TEST_ASSERT_NOT_NULL(mp,
523 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
524 			ops_pool_size,
525 			ad->dev_id,
526 			socket_id);
527 	ad->ops_mempool = mp;
528 
529 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
530 	if (org_op_type == RTE_BBDEV_OP_NONE)
531 		return TEST_SUCCESS;
532 
533 	/* Inputs */
534 	if (in->nb_segments > 0) {
535 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
536 				in->nb_segments);
537 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
538 				mbuf_pool_size, "in");
539 		TEST_ASSERT_NOT_NULL(mp,
540 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
541 				mbuf_pool_size,
542 				ad->dev_id,
543 				socket_id);
544 		ad->in_mbuf_pool = mp;
545 	}
546 
547 	/* Hard outputs */
548 	if (hard_out->nb_segments > 0) {
549 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
550 				hard_out->nb_segments);
551 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
552 				mbuf_pool_size,
553 				"hard_out");
554 		TEST_ASSERT_NOT_NULL(mp,
555 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
556 				mbuf_pool_size,
557 				ad->dev_id,
558 				socket_id);
559 		ad->hard_out_mbuf_pool = mp;
560 	}
561 
562 	/* Soft outputs */
563 	if (soft_out->nb_segments > 0) {
564 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
565 				soft_out->nb_segments);
566 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
567 				mbuf_pool_size,
568 				"soft_out");
569 		TEST_ASSERT_NOT_NULL(mp,
570 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
571 				mbuf_pool_size,
572 				ad->dev_id,
573 				socket_id);
574 		ad->soft_out_mbuf_pool = mp;
575 	}
576 
577 	/* HARQ inputs */
578 	if (harq_in->nb_segments > 0) {
579 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
580 				harq_in->nb_segments);
581 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
582 				mbuf_pool_size,
583 				"harq_in");
584 		TEST_ASSERT_NOT_NULL(mp,
585 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
586 				mbuf_pool_size,
587 				ad->dev_id,
588 				socket_id);
589 		ad->harq_in_mbuf_pool = mp;
590 	}
591 
592 	/* HARQ outputs */
593 	if (harq_out->nb_segments > 0) {
594 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
595 				harq_out->nb_segments);
596 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
597 				mbuf_pool_size,
598 				"harq_out");
599 		TEST_ASSERT_NOT_NULL(mp,
600 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
601 				mbuf_pool_size,
602 				ad->dev_id,
603 				socket_id);
604 		ad->harq_out_mbuf_pool = mp;
605 	}
606 
607 	return TEST_SUCCESS;
608 }
609 
610 static int
611 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
612 		struct test_bbdev_vector *vector)
613 {
614 	int ret;
615 	unsigned int queue_id;
616 	struct rte_bbdev_queue_conf qconf;
617 	struct active_device *ad = &active_devs[nb_active_devs];
618 	unsigned int nb_queues;
619 	enum rte_bbdev_op_type op_type = vector->op_type;
620 
621 /* Configure fpga lte fec with PF & VF values
622  * if '-i' flag is set and using fpga device
623  */
624 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
625 	if ((get_init_device() == true) &&
626 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
627 		struct rte_fpga_lte_fec_conf conf;
628 		unsigned int i;
629 
630 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
631 				info->drv.driver_name);
632 
633 		/* clear default configuration before initialization */
634 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
635 
636 		/* Set PF mode :
637 		 * true if PF is used for data plane
638 		 * false for VFs
639 		 */
640 		conf.pf_mode_en = true;
641 
642 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
643 			/* Number of UL queues per VF (fpga supports 8 VFs) */
644 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
645 			/* Number of DL queues per VF (fpga supports 8 VFs) */
646 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
647 		}
648 
649 		/* UL bandwidth. Needed for schedule algorithm */
650 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
651 		/* DL bandwidth */
652 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
653 
654 		/* UL & DL load Balance Factor to 64 */
655 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
656 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
657 
658 		/**< FLR timeout value */
659 		conf.flr_time_out = FLR_4G_TIMEOUT;
660 
661 		/* setup FPGA PF with configuration information */
662 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
663 		TEST_ASSERT_SUCCESS(ret,
664 				"Failed to configure 4G FPGA PF for bbdev %s",
665 				info->dev_name);
666 	}
667 #endif
668 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
669 	if ((get_init_device() == true) &&
670 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
671 		struct rte_fpga_5gnr_fec_conf conf;
672 		unsigned int i;
673 
674 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
675 				info->drv.driver_name);
676 
677 		/* clear default configuration before initialization */
678 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
679 
680 		/* Set PF mode :
681 		 * true if PF is used for data plane
682 		 * false for VFs
683 		 */
684 		conf.pf_mode_en = true;
685 
686 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
687 			/* Number of UL queues per VF (fpga supports 8 VFs) */
688 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
689 			/* Number of DL queues per VF (fpga supports 8 VFs) */
690 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
691 		}
692 
693 		/* UL bandwidth. Needed for schedule algorithm */
694 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
695 		/* DL bandwidth */
696 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
697 
698 		/* UL & DL load Balance Factor to 64 */
699 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
700 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
701 
702 		/**< FLR timeout value */
703 		conf.flr_time_out = FLR_5G_TIMEOUT;
704 
705 		/* setup FPGA PF with configuration information */
706 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
707 		TEST_ASSERT_SUCCESS(ret,
708 				"Failed to configure 5G FPGA PF for bbdev %s",
709 				info->dev_name);
710 	}
711 #endif
712 #ifdef RTE_BASEBAND_ACC100
713 	if ((get_init_device() == true) &&
714 		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
715 		struct rte_acc100_conf conf;
716 		unsigned int i;
717 
718 		printf("Configure ACC100 FEC Driver %s with default values\n",
719 				info->drv.driver_name);
720 
721 		/* clear default configuration before initialization */
722 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
723 
724 		/* Always set in PF mode for built-in configuration */
725 		conf.pf_mode_en = true;
726 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
727 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
728 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
729 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
730 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
731 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
732 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
733 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
734 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
735 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
736 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
737 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
738 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
739 		}
740 
741 		conf.input_pos_llr_1_bit = true;
742 		conf.output_pos_llr_1_bit = true;
743 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
744 
745 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
746 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
747 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
748 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
749 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
750 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
751 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
752 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
753 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
754 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
755 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
756 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
757 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
758 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
759 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
760 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
761 
762 		/* setup PF with configuration information */
763 		ret = rte_acc100_configure(info->dev_name, &conf);
764 		TEST_ASSERT_SUCCESS(ret,
765 				"Failed to configure ACC100 PF for bbdev %s",
766 				info->dev_name);
767 	}
768 #endif
769 	/* Let's refresh this now this is configured */
770 	rte_bbdev_info_get(dev_id, info);
771 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
772 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
773 
774 	/* setup device */
775 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
776 	if (ret < 0) {
777 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
778 				dev_id, nb_queues, info->socket_id, ret);
779 		return TEST_FAILED;
780 	}
781 
782 	/* configure interrupts if needed */
783 	if (intr_enabled) {
784 		ret = rte_bbdev_intr_enable(dev_id);
785 		if (ret < 0) {
786 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
787 					ret);
788 			return TEST_FAILED;
789 		}
790 	}
791 
792 	/* setup device queues */
793 	qconf.socket = info->socket_id;
794 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
795 	qconf.priority = 0;
796 	qconf.deferred_start = 0;
797 	qconf.op_type = op_type;
798 
799 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
800 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
801 		if (ret != 0) {
802 			printf(
803 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
804 					queue_id, qconf.priority, dev_id);
805 			qconf.priority++;
806 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
807 					&qconf);
808 		}
809 		if (ret != 0) {
810 			printf("All queues on dev %u allocated: %u\n",
811 					dev_id, queue_id);
812 			break;
813 		}
814 		ad->queue_ids[queue_id] = queue_id;
815 	}
816 	TEST_ASSERT(queue_id != 0,
817 			"ERROR Failed to configure any queues on dev %u",
818 			dev_id);
819 	ad->nb_queues = queue_id;
820 
821 	set_avail_op(ad, op_type);
822 
823 	return TEST_SUCCESS;
824 }
825 
826 static int
827 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
828 		struct test_bbdev_vector *vector)
829 {
830 	int ret;
831 
832 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
833 	active_devs[nb_active_devs].dev_id = dev_id;
834 
835 	ret = add_bbdev_dev(dev_id, info, vector);
836 	if (ret == TEST_SUCCESS)
837 		++nb_active_devs;
838 	return ret;
839 }
840 
841 static uint8_t
842 populate_active_devices(void)
843 {
844 	int ret;
845 	uint8_t dev_id;
846 	uint8_t nb_devs_added = 0;
847 	struct rte_bbdev_info info;
848 
849 	RTE_BBDEV_FOREACH(dev_id) {
850 		rte_bbdev_info_get(dev_id, &info);
851 
852 		if (check_dev_cap(&info)) {
853 			printf(
854 				"Device %d (%s) does not support specified capabilities\n",
855 					dev_id, info.dev_name);
856 			continue;
857 		}
858 
859 		ret = add_active_device(dev_id, &info, &test_vector);
860 		if (ret != 0) {
861 			printf("Adding active bbdev %s skipped\n",
862 					info.dev_name);
863 			continue;
864 		}
865 		nb_devs_added++;
866 	}
867 
868 	return nb_devs_added;
869 }
870 
871 static int
872 read_test_vector(void)
873 {
874 	int ret;
875 
876 	memset(&test_vector, 0, sizeof(test_vector));
877 	printf("Test vector file = %s\n", get_vector_filename());
878 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
879 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
880 			get_vector_filename());
881 
882 	return TEST_SUCCESS;
883 }
884 
885 static int
886 testsuite_setup(void)
887 {
888 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
889 
890 	if (populate_active_devices() == 0) {
891 		printf("No suitable devices found!\n");
892 		return TEST_SKIPPED;
893 	}
894 
895 	return TEST_SUCCESS;
896 }
897 
898 static int
899 interrupt_testsuite_setup(void)
900 {
901 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
902 
903 	/* Enable interrupts */
904 	intr_enabled = true;
905 
906 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
907 	if (populate_active_devices() == 0 ||
908 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
909 		intr_enabled = false;
910 		printf("No suitable devices found!\n");
911 		return TEST_SKIPPED;
912 	}
913 
914 	return TEST_SUCCESS;
915 }
916 
917 static void
918 testsuite_teardown(void)
919 {
920 	uint8_t dev_id;
921 
922 	/* Unconfigure devices */
923 	RTE_BBDEV_FOREACH(dev_id)
924 		rte_bbdev_close(dev_id);
925 
926 	/* Clear active devices structs. */
927 	memset(active_devs, 0, sizeof(active_devs));
928 	nb_active_devs = 0;
929 
930 	/* Disable interrupts */
931 	intr_enabled = false;
932 }
933 
934 static int
935 ut_setup(void)
936 {
937 	uint8_t i, dev_id;
938 
939 	for (i = 0; i < nb_active_devs; i++) {
940 		dev_id = active_devs[i].dev_id;
941 		/* reset bbdev stats */
942 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
943 				"Failed to reset stats of bbdev %u", dev_id);
944 		/* start the device */
945 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
946 				"Failed to start bbdev %u", dev_id);
947 	}
948 
949 	return TEST_SUCCESS;
950 }
951 
952 static void
953 ut_teardown(void)
954 {
955 	uint8_t i, dev_id;
956 	struct rte_bbdev_stats stats;
957 
958 	for (i = 0; i < nb_active_devs; i++) {
959 		dev_id = active_devs[i].dev_id;
960 		/* read stats and print */
961 		rte_bbdev_stats_get(dev_id, &stats);
962 		/* Stop the device */
963 		rte_bbdev_stop(dev_id);
964 	}
965 }
966 
967 static int
968 init_op_data_objs(struct rte_bbdev_op_data *bufs,
969 		struct op_data_entries *ref_entries,
970 		struct rte_mempool *mbuf_pool, const uint16_t n,
971 		enum op_data_type op_type, uint16_t min_alignment)
972 {
973 	int ret;
974 	unsigned int i, j;
975 	bool large_input = false;
976 
977 	for (i = 0; i < n; ++i) {
978 		char *data;
979 		struct op_data_buf *seg = &ref_entries->segments[0];
980 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
981 		TEST_ASSERT_NOT_NULL(m_head,
982 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
983 				op_type, n * ref_entries->nb_segments,
984 				mbuf_pool->size);
985 
986 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
987 			/*
988 			 * Special case when DPDK mbuf cannot handle
989 			 * the required input size
990 			 */
991 			printf("Warning: Larger input size than DPDK mbuf %d\n",
992 					seg->length);
993 			large_input = true;
994 		}
995 		bufs[i].data = m_head;
996 		bufs[i].offset = 0;
997 		bufs[i].length = 0;
998 
999 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1000 			if ((op_type == DATA_INPUT) && large_input) {
1001 				/* Allocate a fake overused mbuf */
1002 				data = rte_malloc(NULL, seg->length, 0);
1003 				TEST_ASSERT_NOT_NULL(data,
1004 					"rte malloc failed with %u bytes",
1005 					seg->length);
1006 				memcpy(data, seg->addr, seg->length);
1007 				m_head->buf_addr = data;
1008 				m_head->buf_iova = rte_malloc_virt2iova(data);
1009 				m_head->data_off = 0;
1010 				m_head->data_len = seg->length;
1011 			} else {
1012 				data = rte_pktmbuf_append(m_head, seg->length);
1013 				TEST_ASSERT_NOT_NULL(data,
1014 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1015 					seg->length, op_type);
1016 
1017 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1018 						data, min_alignment),
1019 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1020 					data, min_alignment);
1021 				rte_memcpy(data, seg->addr, seg->length);
1022 			}
1023 
1024 			bufs[i].length += seg->length;
1025 
1026 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1027 				struct rte_mbuf *m_tail =
1028 						rte_pktmbuf_alloc(mbuf_pool);
1029 				TEST_ASSERT_NOT_NULL(m_tail,
1030 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1031 						op_type,
1032 						n * ref_entries->nb_segments,
1033 						mbuf_pool->size);
1034 				seg += 1;
1035 
1036 				data = rte_pktmbuf_append(m_tail, seg->length);
1037 				TEST_ASSERT_NOT_NULL(data,
1038 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1039 						seg->length, op_type);
1040 
1041 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1042 						min_alignment),
1043 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1044 						data, min_alignment);
1045 				rte_memcpy(data, seg->addr, seg->length);
1046 				bufs[i].length += seg->length;
1047 
1048 				ret = rte_pktmbuf_chain(m_head, m_tail);
1049 				TEST_ASSERT_SUCCESS(ret,
1050 						"Couldn't chain mbufs from %d data type mbuf pool",
1051 						op_type);
1052 			}
1053 		} else {
1054 
1055 			/* allocate chained-mbuf for output buffer */
1056 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1057 				struct rte_mbuf *m_tail =
1058 						rte_pktmbuf_alloc(mbuf_pool);
1059 				TEST_ASSERT_NOT_NULL(m_tail,
1060 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1061 						op_type,
1062 						n * ref_entries->nb_segments,
1063 						mbuf_pool->size);
1064 
1065 				ret = rte_pktmbuf_chain(m_head, m_tail);
1066 				TEST_ASSERT_SUCCESS(ret,
1067 						"Couldn't chain mbufs from %d data type mbuf pool",
1068 						op_type);
1069 			}
1070 		}
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 static int
1077 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1078 		const int socket)
1079 {
1080 	int i;
1081 
1082 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1083 	if (*buffers == NULL) {
1084 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1085 				socket);
1086 		/* try to allocate memory on other detected sockets */
1087 		for (i = 0; i < socket; i++) {
1088 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1089 			if (*buffers != NULL)
1090 				break;
1091 		}
1092 	}
1093 
1094 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1095 }
1096 
1097 static void
1098 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1099 		const uint16_t n, const int8_t max_llr_modulus)
1100 {
1101 	uint16_t i, byte_idx;
1102 
1103 	for (i = 0; i < n; ++i) {
1104 		struct rte_mbuf *m = input_ops[i].data;
1105 		while (m != NULL) {
1106 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1107 					input_ops[i].offset);
1108 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1109 					++byte_idx)
1110 				llr[byte_idx] = round((double)max_llr_modulus *
1111 						llr[byte_idx] / INT8_MAX);
1112 
1113 			m = m->next;
1114 		}
1115 	}
1116 }
1117 
1118 /*
1119  * We may have to insert filler bits
1120  * when they are required by the HARQ assumption
1121  */
1122 static void
1123 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1124 		const uint16_t n, struct test_op_params *op_params)
1125 {
1126 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1127 
1128 	if (input_ops == NULL)
1129 		return;
1130 	/* No need to add filler if not required by device */
1131 	if (!(ldpc_cap_flags &
1132 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1133 		return;
1134 	/* No need to add filler for loopback operation */
1135 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1136 		return;
1137 
1138 	uint16_t i, j, parity_offset;
1139 	for (i = 0; i < n; ++i) {
1140 		struct rte_mbuf *m = input_ops[i].data;
1141 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1142 				input_ops[i].offset);
1143 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1144 				* dec.z_c - dec.n_filler;
1145 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1146 		m->data_len = new_hin_size;
1147 		input_ops[i].length = new_hin_size;
1148 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1149 				j--)
1150 			llr[j] = llr[j - dec.n_filler];
1151 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1152 		for (j = 0; j < dec.n_filler; j++)
1153 			llr[parity_offset + j] = llr_max_pre_scaling;
1154 	}
1155 }
1156 
1157 static void
1158 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1159 		const uint16_t n, const int8_t llr_size,
1160 		const int8_t llr_decimals)
1161 {
1162 	if (input_ops == NULL)
1163 		return;
1164 
1165 	uint16_t i, byte_idx;
1166 
1167 	int16_t llr_max, llr_min, llr_tmp;
1168 	llr_max = (1 << (llr_size - 1)) - 1;
1169 	llr_min = -llr_max;
1170 	for (i = 0; i < n; ++i) {
1171 		struct rte_mbuf *m = input_ops[i].data;
1172 		while (m != NULL) {
1173 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1174 					input_ops[i].offset);
1175 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1176 					++byte_idx) {
1177 
1178 				llr_tmp = llr[byte_idx];
1179 				if (llr_decimals == 4)
1180 					llr_tmp *= 8;
1181 				else if (llr_decimals == 2)
1182 					llr_tmp *= 2;
1183 				else if (llr_decimals == 0)
1184 					llr_tmp /= 2;
1185 				llr_tmp = RTE_MIN(llr_max,
1186 						RTE_MAX(llr_min, llr_tmp));
1187 				llr[byte_idx] = (int8_t) llr_tmp;
1188 			}
1189 
1190 			m = m->next;
1191 		}
1192 	}
1193 }
1194 
1195 
1196 
1197 static int
1198 fill_queue_buffers(struct test_op_params *op_params,
1199 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1200 		struct rte_mempool *soft_out_mp,
1201 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1202 		uint16_t queue_id,
1203 		const struct rte_bbdev_op_cap *capabilities,
1204 		uint16_t min_alignment, const int socket_id)
1205 {
1206 	int ret;
1207 	enum op_data_type type;
1208 	const uint16_t n = op_params->num_to_process;
1209 
1210 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1211 		in_mp,
1212 		soft_out_mp,
1213 		hard_out_mp,
1214 		harq_in_mp,
1215 		harq_out_mp,
1216 	};
1217 
1218 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1219 		&op_params->q_bufs[socket_id][queue_id].inputs,
1220 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1221 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1222 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1223 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1224 	};
1225 
1226 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1227 		struct op_data_entries *ref_entries =
1228 				&test_vector.entries[type];
1229 		if (ref_entries->nb_segments == 0)
1230 			continue;
1231 
1232 		ret = allocate_buffers_on_socket(queue_ops[type],
1233 				n * sizeof(struct rte_bbdev_op_data),
1234 				socket_id);
1235 		TEST_ASSERT_SUCCESS(ret,
1236 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1237 
1238 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1239 				mbuf_pools[type], n, type, min_alignment);
1240 		TEST_ASSERT_SUCCESS(ret,
1241 				"Couldn't init rte_bbdev_op_data structs");
1242 	}
1243 
1244 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1245 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1246 			capabilities->cap.turbo_dec.max_llr_modulus);
1247 
1248 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1249 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1250 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1251 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1252 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1253 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1254 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1255 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1256 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1257 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1258 		if (!loopback && !llr_comp)
1259 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1260 					ldpc_llr_size, ldpc_llr_decimals);
1261 		if (!loopback && !harq_comp)
1262 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1263 					ldpc_llr_size, ldpc_llr_decimals);
1264 		if (!loopback)
1265 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1266 					op_params);
1267 	}
1268 
1269 	return 0;
1270 }
1271 
1272 static void
1273 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1274 {
1275 	unsigned int i, j;
1276 
1277 	rte_mempool_free(ad->ops_mempool);
1278 	rte_mempool_free(ad->in_mbuf_pool);
1279 	rte_mempool_free(ad->hard_out_mbuf_pool);
1280 	rte_mempool_free(ad->soft_out_mbuf_pool);
1281 	rte_mempool_free(ad->harq_in_mbuf_pool);
1282 	rte_mempool_free(ad->harq_out_mbuf_pool);
1283 
1284 	for (i = 0; i < rte_lcore_count(); ++i) {
1285 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1286 			rte_free(op_params->q_bufs[j][i].inputs);
1287 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1288 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1289 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1290 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1291 		}
1292 	}
1293 }
1294 
1295 static void
1296 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1297 		unsigned int start_idx,
1298 		struct rte_bbdev_op_data *inputs,
1299 		struct rte_bbdev_op_data *hard_outputs,
1300 		struct rte_bbdev_op_data *soft_outputs,
1301 		struct rte_bbdev_dec_op *ref_op)
1302 {
1303 	unsigned int i;
1304 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1305 
1306 	for (i = 0; i < n; ++i) {
1307 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1308 			ops[i]->turbo_dec.tb_params.ea =
1309 					turbo_dec->tb_params.ea;
1310 			ops[i]->turbo_dec.tb_params.eb =
1311 					turbo_dec->tb_params.eb;
1312 			ops[i]->turbo_dec.tb_params.k_pos =
1313 					turbo_dec->tb_params.k_pos;
1314 			ops[i]->turbo_dec.tb_params.k_neg =
1315 					turbo_dec->tb_params.k_neg;
1316 			ops[i]->turbo_dec.tb_params.c =
1317 					turbo_dec->tb_params.c;
1318 			ops[i]->turbo_dec.tb_params.c_neg =
1319 					turbo_dec->tb_params.c_neg;
1320 			ops[i]->turbo_dec.tb_params.cab =
1321 					turbo_dec->tb_params.cab;
1322 			ops[i]->turbo_dec.tb_params.r =
1323 					turbo_dec->tb_params.r;
1324 		} else {
1325 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1326 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1327 		}
1328 
1329 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1330 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1331 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1332 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1333 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1334 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1335 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1336 
1337 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1338 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1339 		if (soft_outputs != NULL)
1340 			ops[i]->turbo_dec.soft_output =
1341 				soft_outputs[start_idx + i];
1342 	}
1343 }
1344 
1345 static void
1346 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1347 		unsigned int start_idx,
1348 		struct rte_bbdev_op_data *inputs,
1349 		struct rte_bbdev_op_data *outputs,
1350 		struct rte_bbdev_enc_op *ref_op)
1351 {
1352 	unsigned int i;
1353 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1354 	for (i = 0; i < n; ++i) {
1355 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1356 			ops[i]->turbo_enc.tb_params.ea =
1357 					turbo_enc->tb_params.ea;
1358 			ops[i]->turbo_enc.tb_params.eb =
1359 					turbo_enc->tb_params.eb;
1360 			ops[i]->turbo_enc.tb_params.k_pos =
1361 					turbo_enc->tb_params.k_pos;
1362 			ops[i]->turbo_enc.tb_params.k_neg =
1363 					turbo_enc->tb_params.k_neg;
1364 			ops[i]->turbo_enc.tb_params.c =
1365 					turbo_enc->tb_params.c;
1366 			ops[i]->turbo_enc.tb_params.c_neg =
1367 					turbo_enc->tb_params.c_neg;
1368 			ops[i]->turbo_enc.tb_params.cab =
1369 					turbo_enc->tb_params.cab;
1370 			ops[i]->turbo_enc.tb_params.ncb_pos =
1371 					turbo_enc->tb_params.ncb_pos;
1372 			ops[i]->turbo_enc.tb_params.ncb_neg =
1373 					turbo_enc->tb_params.ncb_neg;
1374 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1375 		} else {
1376 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1377 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1378 			ops[i]->turbo_enc.cb_params.ncb =
1379 					turbo_enc->cb_params.ncb;
1380 		}
1381 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1382 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1383 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1384 
1385 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1386 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1387 	}
1388 }
1389 
1390 
1391 /* Returns a random number drawn from a normal distribution
1392  * with mean of 0 and variance of 1
1393  * Marsaglia algorithm
1394  */
1395 static double
1396 randn(int n)
1397 {
1398 	double S, Z, U1, U2, u, v, fac;
1399 
1400 	do {
1401 		U1 = (double)rand() / RAND_MAX;
1402 		U2 = (double)rand() / RAND_MAX;
1403 		u = 2. * U1 - 1.;
1404 		v = 2. * U2 - 1.;
1405 		S = u * u + v * v;
1406 	} while (S >= 1 || S == 0);
1407 	fac = sqrt(-2. * log(S) / S);
1408 	Z = (n % 2) ? u * fac : v * fac;
1409 	return Z;
1410 }
1411 
1412 static inline double
1413 maxstar(double A, double B)
1414 {
1415 	if (fabs(A - B) > 5)
1416 		return RTE_MAX(A, B);
1417 	else
1418 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1419 }
1420 
1421 /*
1422  * Generate Qm LLRS for Qm==8
1423  * Modulation, AWGN and LLR estimation from max log development
1424  */
1425 static void
1426 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1427 {
1428 	int qm = 8;
1429 	int qam = 256;
1430 	int m, k;
1431 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1432 	/* 5.1.4 of TS38.211 */
1433 	const double symbols_I[256] = {
1434 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1435 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1436 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1437 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1438 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1439 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1440 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1441 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1442 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1443 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1444 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1445 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1446 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1447 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1448 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1449 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1450 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1451 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1452 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1453 			-13, -13, -15, -15, -13, -13, -15, -15};
1454 	const double symbols_Q[256] = {
1455 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1456 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1457 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1458 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1459 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1460 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1461 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1462 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1463 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1464 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1465 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1466 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1467 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1468 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1469 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1470 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1471 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1472 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1473 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1474 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1475 	/* Average constellation point energy */
1476 	N0 *= 170.0;
1477 	for (k = 0; k < qm; k++)
1478 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1479 	/* 5.1.4 of TS38.211 */
1480 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1481 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1482 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1483 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1484 	/* AWGN channel */
1485 	I += sqrt(N0 / 2) * randn(0);
1486 	Q += sqrt(N0 / 2) * randn(1);
1487 	/*
1488 	 * Calculate the log of the probability that each of
1489 	 * the constellation points was transmitted
1490 	 */
1491 	for (m = 0; m < qam; m++)
1492 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1493 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1494 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1495 	for (k = 0; k < qm; k++) {
1496 		p0 = -999999;
1497 		p1 = -999999;
1498 		/* For each constellation point */
1499 		for (m = 0; m < qam; m++) {
1500 			if ((m >> (qm - k - 1)) & 1)
1501 				p1 = maxstar(p1, log_syml_prob[m]);
1502 			else
1503 				p0 = maxstar(p0, log_syml_prob[m]);
1504 		}
1505 		/* Calculate the LLR */
1506 		llr_ = p0 - p1;
1507 		llr_ *= (1 << ldpc_llr_decimals);
1508 		llr_ = round(llr_);
1509 		if (llr_ > llr_max)
1510 			llr_ = llr_max;
1511 		if (llr_ < -llr_max)
1512 			llr_ = -llr_max;
1513 		llrs[qm * i + k] = (int8_t) llr_;
1514 	}
1515 }
1516 
1517 
1518 /*
1519  * Generate Qm LLRS for Qm==6
1520  * Modulation, AWGN and LLR estimation from max log development
1521  */
1522 static void
1523 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1524 {
1525 	int qm = 6;
1526 	int qam = 64;
1527 	int m, k;
1528 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1529 	/* 5.1.4 of TS38.211 */
1530 	const double symbols_I[64] = {
1531 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1532 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1533 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1534 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1535 			-5, -5, -7, -7, -5, -5, -7, -7};
1536 	const double symbols_Q[64] = {
1537 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1538 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1539 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1540 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1541 			-3, -1, -3, -1, -5, -7, -5, -7};
1542 	/* Average constellation point energy */
1543 	N0 *= 42.0;
1544 	for (k = 0; k < qm; k++)
1545 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1546 	/* 5.1.4 of TS38.211 */
1547 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1548 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1549 	/* AWGN channel */
1550 	I += sqrt(N0 / 2) * randn(0);
1551 	Q += sqrt(N0 / 2) * randn(1);
1552 	/*
1553 	 * Calculate the log of the probability that each of
1554 	 * the constellation points was transmitted
1555 	 */
1556 	for (m = 0; m < qam; m++)
1557 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1558 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1559 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1560 	for (k = 0; k < qm; k++) {
1561 		p0 = -999999;
1562 		p1 = -999999;
1563 		/* For each constellation point */
1564 		for (m = 0; m < qam; m++) {
1565 			if ((m >> (qm - k - 1)) & 1)
1566 				p1 = maxstar(p1, log_syml_prob[m]);
1567 			else
1568 				p0 = maxstar(p0, log_syml_prob[m]);
1569 		}
1570 		/* Calculate the LLR */
1571 		llr_ = p0 - p1;
1572 		llr_ *= (1 << ldpc_llr_decimals);
1573 		llr_ = round(llr_);
1574 		if (llr_ > llr_max)
1575 			llr_ = llr_max;
1576 		if (llr_ < -llr_max)
1577 			llr_ = -llr_max;
1578 		llrs[qm * i + k] = (int8_t) llr_;
1579 	}
1580 }
1581 
1582 /*
1583  * Generate Qm LLRS for Qm==4
1584  * Modulation, AWGN and LLR estimation from max log development
1585  */
1586 static void
1587 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1588 {
1589 	int qm = 4;
1590 	int qam = 16;
1591 	int m, k;
1592 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1593 	/* 5.1.4 of TS38.211 */
1594 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1595 			-1, -1, -3, -3, -1, -1, -3, -3};
1596 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1597 			1, 3, 1, 3, -1, -3, -1, -3};
1598 	/* Average constellation point energy */
1599 	N0 *= 10.0;
1600 	for (k = 0; k < qm; k++)
1601 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1602 	/* 5.1.4 of TS38.211 */
1603 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1604 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1605 	/* AWGN channel */
1606 	I += sqrt(N0 / 2) * randn(0);
1607 	Q += sqrt(N0 / 2) * randn(1);
1608 	/*
1609 	 * Calculate the log of the probability that each of
1610 	 * the constellation points was transmitted
1611 	 */
1612 	for (m = 0; m < qam; m++)
1613 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1614 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1615 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1616 	for (k = 0; k < qm; k++) {
1617 		p0 = -999999;
1618 		p1 = -999999;
1619 		/* For each constellation point */
1620 		for (m = 0; m < qam; m++) {
1621 			if ((m >> (qm - k - 1)) & 1)
1622 				p1 = maxstar(p1, log_syml_prob[m]);
1623 			else
1624 				p0 = maxstar(p0, log_syml_prob[m]);
1625 		}
1626 		/* Calculate the LLR */
1627 		llr_ = p0 - p1;
1628 		llr_ *= (1 << ldpc_llr_decimals);
1629 		llr_ = round(llr_);
1630 		if (llr_ > llr_max)
1631 			llr_ = llr_max;
1632 		if (llr_ < -llr_max)
1633 			llr_ = -llr_max;
1634 		llrs[qm * i + k] = (int8_t) llr_;
1635 	}
1636 }
1637 
1638 static void
1639 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1640 {
1641 	double b, b1, n;
1642 	double coeff = 2.0 * sqrt(N0);
1643 
1644 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1645 	if (llrs[j] < 8 && llrs[j] > -8)
1646 		return;
1647 
1648 	/* Note don't change sign here */
1649 	n = randn(j % 2);
1650 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1651 			+ coeff * n) / N0;
1652 	b = b1 * (1 << ldpc_llr_decimals);
1653 	b = round(b);
1654 	if (b > llr_max)
1655 		b = llr_max;
1656 	if (b < -llr_max)
1657 		b = -llr_max;
1658 	llrs[j] = (int8_t) b;
1659 }
1660 
1661 /* Generate LLR for a given SNR */
1662 static void
1663 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1664 		struct rte_bbdev_dec_op *ref_op)
1665 {
1666 	struct rte_mbuf *m;
1667 	uint16_t qm;
1668 	uint32_t i, j, e, range;
1669 	double N0, llr_max;
1670 
1671 	e = ref_op->ldpc_dec.cb_params.e;
1672 	qm = ref_op->ldpc_dec.q_m;
1673 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1674 	range = e / qm;
1675 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1676 
1677 	for (i = 0; i < n; ++i) {
1678 		m = inputs[i].data;
1679 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1680 		if (qm == 8) {
1681 			for (j = 0; j < range; ++j)
1682 				gen_qm8_llr(llrs, j, N0, llr_max);
1683 		} else if (qm == 6) {
1684 			for (j = 0; j < range; ++j)
1685 				gen_qm6_llr(llrs, j, N0, llr_max);
1686 		} else if (qm == 4) {
1687 			for (j = 0; j < range; ++j)
1688 				gen_qm4_llr(llrs, j, N0, llr_max);
1689 		} else {
1690 			for (j = 0; j < e; ++j)
1691 				gen_qm2_llr(llrs, j, N0, llr_max);
1692 		}
1693 	}
1694 }
1695 
1696 static void
1697 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1698 		unsigned int start_idx,
1699 		struct rte_bbdev_op_data *inputs,
1700 		struct rte_bbdev_op_data *hard_outputs,
1701 		struct rte_bbdev_op_data *soft_outputs,
1702 		struct rte_bbdev_op_data *harq_inputs,
1703 		struct rte_bbdev_op_data *harq_outputs,
1704 		struct rte_bbdev_dec_op *ref_op)
1705 {
1706 	unsigned int i;
1707 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1708 
1709 	for (i = 0; i < n; ++i) {
1710 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1711 			ops[i]->ldpc_dec.tb_params.ea =
1712 					ldpc_dec->tb_params.ea;
1713 			ops[i]->ldpc_dec.tb_params.eb =
1714 					ldpc_dec->tb_params.eb;
1715 			ops[i]->ldpc_dec.tb_params.c =
1716 					ldpc_dec->tb_params.c;
1717 			ops[i]->ldpc_dec.tb_params.cab =
1718 					ldpc_dec->tb_params.cab;
1719 			ops[i]->ldpc_dec.tb_params.r =
1720 					ldpc_dec->tb_params.r;
1721 		} else {
1722 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1723 		}
1724 
1725 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1726 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1727 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1728 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1729 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1730 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1731 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1732 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1733 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1734 
1735 		if (hard_outputs != NULL)
1736 			ops[i]->ldpc_dec.hard_output =
1737 					hard_outputs[start_idx + i];
1738 		if (inputs != NULL)
1739 			ops[i]->ldpc_dec.input =
1740 					inputs[start_idx + i];
1741 		if (soft_outputs != NULL)
1742 			ops[i]->ldpc_dec.soft_output =
1743 					soft_outputs[start_idx + i];
1744 		if (harq_inputs != NULL)
1745 			ops[i]->ldpc_dec.harq_combined_input =
1746 					harq_inputs[start_idx + i];
1747 		if (harq_outputs != NULL)
1748 			ops[i]->ldpc_dec.harq_combined_output =
1749 					harq_outputs[start_idx + i];
1750 	}
1751 }
1752 
1753 
1754 static void
1755 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1756 		unsigned int start_idx,
1757 		struct rte_bbdev_op_data *inputs,
1758 		struct rte_bbdev_op_data *outputs,
1759 		struct rte_bbdev_enc_op *ref_op)
1760 {
1761 	unsigned int i;
1762 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1763 	for (i = 0; i < n; ++i) {
1764 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1765 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1766 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1767 			ops[i]->ldpc_enc.tb_params.cab =
1768 					ldpc_enc->tb_params.cab;
1769 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1770 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1771 		} else {
1772 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1773 		}
1774 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1775 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1776 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1777 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1778 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1779 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1780 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1781 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1782 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1783 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1784 	}
1785 }
1786 
1787 static int
1788 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1789 		unsigned int order_idx, const int expected_status)
1790 {
1791 	int status = op->status;
1792 	/* ignore parity mismatch false alarms for long iterations */
1793 	if (get_iter_max() >= 10) {
1794 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1795 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1796 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1797 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1798 		}
1799 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1800 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1801 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1802 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1803 		}
1804 	}
1805 
1806 	TEST_ASSERT(status == expected_status,
1807 			"op_status (%d) != expected_status (%d)",
1808 			op->status, expected_status);
1809 
1810 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1811 			"Ordering error, expected %p, got %p",
1812 			(void *)(uintptr_t)order_idx, op->opaque_data);
1813 
1814 	return TEST_SUCCESS;
1815 }
1816 
1817 static int
1818 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1819 		unsigned int order_idx, const int expected_status)
1820 {
1821 	TEST_ASSERT(op->status == expected_status,
1822 			"op_status (%d) != expected_status (%d)",
1823 			op->status, expected_status);
1824 
1825 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1826 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1827 				"Ordering error, expected %p, got %p",
1828 				(void *)(uintptr_t)order_idx, op->opaque_data);
1829 
1830 	return TEST_SUCCESS;
1831 }
1832 
1833 static inline int
1834 validate_op_chain(struct rte_bbdev_op_data *op,
1835 		struct op_data_entries *orig_op)
1836 {
1837 	uint8_t i;
1838 	struct rte_mbuf *m = op->data;
1839 	uint8_t nb_dst_segments = orig_op->nb_segments;
1840 	uint32_t total_data_size = 0;
1841 
1842 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1843 			"Number of segments differ in original (%u) and filled (%u) op",
1844 			nb_dst_segments, m->nb_segs);
1845 
1846 	/* Validate each mbuf segment length */
1847 	for (i = 0; i < nb_dst_segments; ++i) {
1848 		/* Apply offset to the first mbuf segment */
1849 		uint16_t offset = (i == 0) ? op->offset : 0;
1850 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1851 		total_data_size += orig_op->segments[i].length;
1852 
1853 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1854 				"Length of segment differ in original (%u) and filled (%u) op",
1855 				orig_op->segments[i].length, data_len);
1856 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1857 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1858 				data_len,
1859 				"Output buffers (CB=%u) are not equal", i);
1860 		m = m->next;
1861 	}
1862 
1863 	/* Validate total mbuf pkt length */
1864 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1865 	TEST_ASSERT(total_data_size == pkt_len,
1866 			"Length of data differ in original (%u) and filled (%u) op",
1867 			total_data_size, pkt_len);
1868 
1869 	return TEST_SUCCESS;
1870 }
1871 
1872 /*
1873  * Compute K0 for a given configuration for HARQ output length computation
1874  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1875  */
1876 static inline uint16_t
1877 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1878 {
1879 	if (rv_index == 0)
1880 		return 0;
1881 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1882 	if (n_cb == n) {
1883 		if (rv_index == 1)
1884 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1885 		else if (rv_index == 2)
1886 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1887 		else
1888 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1889 	}
1890 	/* LBRM case - includes a division by N */
1891 	if (rv_index == 1)
1892 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1893 				/ n) * z_c;
1894 	else if (rv_index == 2)
1895 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1896 				/ n) * z_c;
1897 	else
1898 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1899 				/ n) * z_c;
1900 }
1901 
1902 /* HARQ output length including the Filler bits */
1903 static inline uint16_t
1904 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1905 {
1906 	uint16_t k0 = 0;
1907 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1908 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1909 	/* Compute RM out size and number of rows */
1910 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1911 			* ops_ld->z_c - ops_ld->n_filler;
1912 	uint16_t deRmOutSize = RTE_MIN(
1913 			k0 + ops_ld->cb_params.e +
1914 			((k0 > parity_offset) ?
1915 					0 : ops_ld->n_filler),
1916 					ops_ld->n_cb);
1917 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1918 			/ ops_ld->z_c);
1919 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1920 	return harq_output_len;
1921 }
1922 
1923 static inline int
1924 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1925 		struct op_data_entries *orig_op,
1926 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1927 {
1928 	uint8_t i;
1929 	uint32_t j, jj, k;
1930 	struct rte_mbuf *m = op->data;
1931 	uint8_t nb_dst_segments = orig_op->nb_segments;
1932 	uint32_t total_data_size = 0;
1933 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1934 	uint32_t byte_error = 0, cum_error = 0, error;
1935 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1936 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1937 	uint16_t parity_offset;
1938 
1939 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1940 			"Number of segments differ in original (%u) and filled (%u) op",
1941 			nb_dst_segments, m->nb_segs);
1942 
1943 	/* Validate each mbuf segment length */
1944 	for (i = 0; i < nb_dst_segments; ++i) {
1945 		/* Apply offset to the first mbuf segment */
1946 		uint16_t offset = (i == 0) ? op->offset : 0;
1947 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1948 		total_data_size += orig_op->segments[i].length;
1949 
1950 		TEST_ASSERT(orig_op->segments[i].length <
1951 				(uint32_t)(data_len + 64),
1952 				"Length of segment differ in original (%u) and filled (%u) op",
1953 				orig_op->segments[i].length, data_len);
1954 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1955 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1956 
1957 		if (!(ldpc_cap_flags &
1958 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1959 				) || (ops_ld->op_flags &
1960 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1961 			data_len -= ops_ld->z_c;
1962 			parity_offset = data_len;
1963 		} else {
1964 			/* Compute RM out size and number of rows */
1965 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1966 					* ops_ld->z_c - ops_ld->n_filler;
1967 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1968 					ops_ld->n_filler;
1969 			if (data_len > deRmOutSize)
1970 				data_len = deRmOutSize;
1971 			if (data_len > orig_op->segments[i].length)
1972 				data_len = orig_op->segments[i].length;
1973 		}
1974 		/*
1975 		 * HARQ output can have minor differences
1976 		 * due to integer representation and related scaling
1977 		 */
1978 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1979 			if (j == parity_offset) {
1980 				/* Special Handling of the filler bits */
1981 				for (k = 0; k < ops_ld->n_filler; k++) {
1982 					if (harq_out[jj] !=
1983 							llr_max_pre_scaling) {
1984 						printf("HARQ Filler issue %d: %d %d\n",
1985 							jj, harq_out[jj],
1986 							llr_max);
1987 						byte_error++;
1988 					}
1989 					jj++;
1990 				}
1991 			}
1992 			if (!(ops_ld->op_flags &
1993 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1994 				if (ldpc_llr_decimals > 1)
1995 					harq_out[jj] = (harq_out[jj] + 1)
1996 						>> (ldpc_llr_decimals - 1);
1997 				/* Saturated to S7 */
1998 				if (harq_orig[j] > llr_max)
1999 					harq_orig[j] = llr_max;
2000 				if (harq_orig[j] < -llr_max)
2001 					harq_orig[j] = -llr_max;
2002 			}
2003 			if (harq_orig[j] != harq_out[jj]) {
2004 				error = (harq_orig[j] > harq_out[jj]) ?
2005 						harq_orig[j] - harq_out[jj] :
2006 						harq_out[jj] - harq_orig[j];
2007 				abs_harq_origin = harq_orig[j] > 0 ?
2008 							harq_orig[j] :
2009 							-harq_orig[j];
2010 				/* Residual quantization error */
2011 				if ((error > 8 && (abs_harq_origin <
2012 						(llr_max - 16))) ||
2013 						(error > 16)) {
2014 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2015 							j, harq_orig[j],
2016 							harq_out[jj], error);
2017 					byte_error++;
2018 					cum_error += error;
2019 				}
2020 			}
2021 		}
2022 		m = m->next;
2023 	}
2024 
2025 	if (byte_error)
2026 		TEST_ASSERT(byte_error <= 1,
2027 				"HARQ output mismatch (%d) %d",
2028 				byte_error, cum_error);
2029 
2030 	/* Validate total mbuf pkt length */
2031 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2032 	TEST_ASSERT(total_data_size < pkt_len + 64,
2033 			"Length of data differ in original (%u) and filled (%u) op",
2034 			total_data_size, pkt_len);
2035 
2036 	return TEST_SUCCESS;
2037 }
2038 
2039 static int
2040 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2041 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2042 {
2043 	unsigned int i;
2044 	int ret;
2045 	struct op_data_entries *hard_data_orig =
2046 			&test_vector.entries[DATA_HARD_OUTPUT];
2047 	struct op_data_entries *soft_data_orig =
2048 			&test_vector.entries[DATA_SOFT_OUTPUT];
2049 	struct rte_bbdev_op_turbo_dec *ops_td;
2050 	struct rte_bbdev_op_data *hard_output;
2051 	struct rte_bbdev_op_data *soft_output;
2052 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2053 
2054 	for (i = 0; i < n; ++i) {
2055 		ops_td = &ops[i]->turbo_dec;
2056 		hard_output = &ops_td->hard_output;
2057 		soft_output = &ops_td->soft_output;
2058 
2059 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2060 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2061 					"Returned iter_count (%d) > expected iter_count (%d)",
2062 					ops_td->iter_count, ref_td->iter_count);
2063 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2064 		TEST_ASSERT_SUCCESS(ret,
2065 				"Checking status and ordering for decoder failed");
2066 
2067 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2068 				hard_data_orig),
2069 				"Hard output buffers (CB=%u) are not equal",
2070 				i);
2071 
2072 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2073 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2074 					soft_data_orig),
2075 					"Soft output buffers (CB=%u) are not equal",
2076 					i);
2077 	}
2078 
2079 	return TEST_SUCCESS;
2080 }
2081 
2082 /* Check Number of code blocks errors */
2083 static int
2084 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2085 {
2086 	unsigned int i;
2087 	struct op_data_entries *hard_data_orig =
2088 			&test_vector.entries[DATA_HARD_OUTPUT];
2089 	struct rte_bbdev_op_ldpc_dec *ops_td;
2090 	struct rte_bbdev_op_data *hard_output;
2091 	int errors = 0;
2092 	struct rte_mbuf *m;
2093 
2094 	for (i = 0; i < n; ++i) {
2095 		ops_td = &ops[i]->ldpc_dec;
2096 		hard_output = &ops_td->hard_output;
2097 		m = hard_output->data;
2098 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2099 				hard_data_orig->segments[0].addr,
2100 				hard_data_orig->segments[0].length))
2101 			errors++;
2102 	}
2103 	return errors;
2104 }
2105 
2106 static int
2107 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2108 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2109 {
2110 	unsigned int i;
2111 	int ret;
2112 	struct op_data_entries *hard_data_orig =
2113 			&test_vector.entries[DATA_HARD_OUTPUT];
2114 	struct op_data_entries *soft_data_orig =
2115 			&test_vector.entries[DATA_SOFT_OUTPUT];
2116 	struct op_data_entries *harq_data_orig =
2117 				&test_vector.entries[DATA_HARQ_OUTPUT];
2118 	struct rte_bbdev_op_ldpc_dec *ops_td;
2119 	struct rte_bbdev_op_data *hard_output;
2120 	struct rte_bbdev_op_data *harq_output;
2121 	struct rte_bbdev_op_data *soft_output;
2122 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2123 
2124 	for (i = 0; i < n; ++i) {
2125 		ops_td = &ops[i]->ldpc_dec;
2126 		hard_output = &ops_td->hard_output;
2127 		harq_output = &ops_td->harq_combined_output;
2128 		soft_output = &ops_td->soft_output;
2129 
2130 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2131 		TEST_ASSERT_SUCCESS(ret,
2132 				"Checking status and ordering for decoder failed");
2133 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2134 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2135 					"Returned iter_count (%d) > expected iter_count (%d)",
2136 					ops_td->iter_count, ref_td->iter_count);
2137 		/*
2138 		 * We can ignore output data when the decoding failed to
2139 		 * converge or for loop-back cases
2140 		 */
2141 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2142 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2143 				) && (
2144 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2145 						)) == 0)
2146 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2147 					hard_data_orig),
2148 					"Hard output buffers (CB=%u) are not equal",
2149 					i);
2150 
2151 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2152 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2153 					soft_data_orig),
2154 					"Soft output buffers (CB=%u) are not equal",
2155 					i);
2156 		if (ref_op->ldpc_dec.op_flags &
2157 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2158 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2159 					harq_data_orig, ops_td),
2160 					"HARQ output buffers (CB=%u) are not equal",
2161 					i);
2162 		}
2163 		if (ref_op->ldpc_dec.op_flags &
2164 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2165 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2166 					harq_data_orig, ops_td),
2167 					"HARQ output buffers (CB=%u) are not equal",
2168 					i);
2169 
2170 	}
2171 
2172 	return TEST_SUCCESS;
2173 }
2174 
2175 
2176 static int
2177 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2178 		struct rte_bbdev_enc_op *ref_op)
2179 {
2180 	unsigned int i;
2181 	int ret;
2182 	struct op_data_entries *hard_data_orig =
2183 			&test_vector.entries[DATA_HARD_OUTPUT];
2184 
2185 	for (i = 0; i < n; ++i) {
2186 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2187 		TEST_ASSERT_SUCCESS(ret,
2188 				"Checking status and ordering for encoder failed");
2189 		TEST_ASSERT_SUCCESS(validate_op_chain(
2190 				&ops[i]->turbo_enc.output,
2191 				hard_data_orig),
2192 				"Output buffers (CB=%u) are not equal",
2193 				i);
2194 	}
2195 
2196 	return TEST_SUCCESS;
2197 }
2198 
2199 static int
2200 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2201 		struct rte_bbdev_enc_op *ref_op)
2202 {
2203 	unsigned int i;
2204 	int ret;
2205 	struct op_data_entries *hard_data_orig =
2206 			&test_vector.entries[DATA_HARD_OUTPUT];
2207 
2208 	for (i = 0; i < n; ++i) {
2209 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2210 		TEST_ASSERT_SUCCESS(ret,
2211 				"Checking status and ordering for encoder failed");
2212 		TEST_ASSERT_SUCCESS(validate_op_chain(
2213 				&ops[i]->ldpc_enc.output,
2214 				hard_data_orig),
2215 				"Output buffers (CB=%u) are not equal",
2216 				i);
2217 	}
2218 
2219 	return TEST_SUCCESS;
2220 }
2221 
2222 static void
2223 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2224 {
2225 	unsigned int i;
2226 	struct op_data_entries *entry;
2227 
2228 	op->turbo_dec = test_vector.turbo_dec;
2229 	entry = &test_vector.entries[DATA_INPUT];
2230 	for (i = 0; i < entry->nb_segments; ++i)
2231 		op->turbo_dec.input.length +=
2232 				entry->segments[i].length;
2233 }
2234 
2235 static void
2236 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2237 {
2238 	unsigned int i;
2239 	struct op_data_entries *entry;
2240 
2241 	op->ldpc_dec = test_vector.ldpc_dec;
2242 	entry = &test_vector.entries[DATA_INPUT];
2243 	for (i = 0; i < entry->nb_segments; ++i)
2244 		op->ldpc_dec.input.length +=
2245 				entry->segments[i].length;
2246 	if (test_vector.ldpc_dec.op_flags &
2247 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2248 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2249 		for (i = 0; i < entry->nb_segments; ++i)
2250 			op->ldpc_dec.harq_combined_input.length +=
2251 				entry->segments[i].length;
2252 	}
2253 }
2254 
2255 
2256 static void
2257 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2258 {
2259 	unsigned int i;
2260 	struct op_data_entries *entry;
2261 
2262 	op->turbo_enc = test_vector.turbo_enc;
2263 	entry = &test_vector.entries[DATA_INPUT];
2264 	for (i = 0; i < entry->nb_segments; ++i)
2265 		op->turbo_enc.input.length +=
2266 				entry->segments[i].length;
2267 }
2268 
2269 static void
2270 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2271 {
2272 	unsigned int i;
2273 	struct op_data_entries *entry;
2274 
2275 	op->ldpc_enc = test_vector.ldpc_enc;
2276 	entry = &test_vector.entries[DATA_INPUT];
2277 	for (i = 0; i < entry->nb_segments; ++i)
2278 		op->ldpc_enc.input.length +=
2279 				entry->segments[i].length;
2280 }
2281 
2282 static uint32_t
2283 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2284 {
2285 	uint8_t i;
2286 	uint32_t c, r, tb_size = 0;
2287 
2288 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2289 		tb_size = op->turbo_dec.tb_params.k_neg;
2290 	} else {
2291 		c = op->turbo_dec.tb_params.c;
2292 		r = op->turbo_dec.tb_params.r;
2293 		for (i = 0; i < c-r; i++)
2294 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2295 				op->turbo_dec.tb_params.k_neg :
2296 				op->turbo_dec.tb_params.k_pos;
2297 	}
2298 	return tb_size;
2299 }
2300 
2301 static uint32_t
2302 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2303 {
2304 	uint8_t i;
2305 	uint32_t c, r, tb_size = 0;
2306 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2307 
2308 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2309 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2310 	} else {
2311 		c = op->ldpc_dec.tb_params.c;
2312 		r = op->ldpc_dec.tb_params.r;
2313 		for (i = 0; i < c-r; i++)
2314 			tb_size += sys_cols * op->ldpc_dec.z_c
2315 					- op->ldpc_dec.n_filler;
2316 	}
2317 	return tb_size;
2318 }
2319 
2320 static uint32_t
2321 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2322 {
2323 	uint8_t i;
2324 	uint32_t c, r, tb_size = 0;
2325 
2326 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2327 		tb_size = op->turbo_enc.tb_params.k_neg;
2328 	} else {
2329 		c = op->turbo_enc.tb_params.c;
2330 		r = op->turbo_enc.tb_params.r;
2331 		for (i = 0; i < c-r; i++)
2332 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2333 				op->turbo_enc.tb_params.k_neg :
2334 				op->turbo_enc.tb_params.k_pos;
2335 	}
2336 	return tb_size;
2337 }
2338 
2339 static uint32_t
2340 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2341 {
2342 	uint8_t i;
2343 	uint32_t c, r, tb_size = 0;
2344 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2345 
2346 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2347 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2348 	} else {
2349 		c = op->turbo_enc.tb_params.c;
2350 		r = op->turbo_enc.tb_params.r;
2351 		for (i = 0; i < c-r; i++)
2352 			tb_size += sys_cols * op->ldpc_enc.z_c
2353 					- op->ldpc_enc.n_filler;
2354 	}
2355 	return tb_size;
2356 }
2357 
2358 
2359 static int
2360 init_test_op_params(struct test_op_params *op_params,
2361 		enum rte_bbdev_op_type op_type, const int expected_status,
2362 		const int vector_mask, struct rte_mempool *ops_mp,
2363 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2364 {
2365 	int ret = 0;
2366 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2367 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2368 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2369 				&op_params->ref_dec_op, 1);
2370 	else
2371 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2372 				&op_params->ref_enc_op, 1);
2373 
2374 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2375 
2376 	op_params->mp = ops_mp;
2377 	op_params->burst_sz = burst_sz;
2378 	op_params->num_to_process = num_to_process;
2379 	op_params->num_lcores = num_lcores;
2380 	op_params->vector_mask = vector_mask;
2381 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2382 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2383 		op_params->ref_dec_op->status = expected_status;
2384 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2385 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2386 		op_params->ref_enc_op->status = expected_status;
2387 	return 0;
2388 }
2389 
2390 static int
2391 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2392 		struct test_op_params *op_params)
2393 {
2394 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2395 	unsigned int i;
2396 	struct active_device *ad;
2397 	unsigned int burst_sz = get_burst_sz();
2398 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2399 	const struct rte_bbdev_op_cap *capabilities = NULL;
2400 
2401 	ad = &active_devs[dev_id];
2402 
2403 	/* Check if device supports op_type */
2404 	if (!is_avail_op(ad, test_vector.op_type))
2405 		return TEST_SUCCESS;
2406 
2407 	struct rte_bbdev_info info;
2408 	rte_bbdev_info_get(ad->dev_id, &info);
2409 	socket_id = GET_SOCKET(info.socket_id);
2410 
2411 	f_ret = create_mempools(ad, socket_id, op_type,
2412 			get_num_ops());
2413 	if (f_ret != TEST_SUCCESS) {
2414 		printf("Couldn't create mempools");
2415 		goto fail;
2416 	}
2417 	if (op_type == RTE_BBDEV_OP_NONE)
2418 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2419 
2420 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2421 			test_vector.expected_status,
2422 			test_vector.mask,
2423 			ad->ops_mempool,
2424 			burst_sz,
2425 			get_num_ops(),
2426 			get_num_lcores());
2427 	if (f_ret != TEST_SUCCESS) {
2428 		printf("Couldn't init test op params");
2429 		goto fail;
2430 	}
2431 
2432 
2433 	/* Find capabilities */
2434 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2435 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2436 		if (cap->type == test_vector.op_type) {
2437 			capabilities = cap;
2438 			break;
2439 		}
2440 		cap++;
2441 	}
2442 	TEST_ASSERT_NOT_NULL(capabilities,
2443 			"Couldn't find capabilities");
2444 
2445 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2446 		create_reference_dec_op(op_params->ref_dec_op);
2447 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2448 		create_reference_enc_op(op_params->ref_enc_op);
2449 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2450 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2451 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2452 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2453 
2454 	for (i = 0; i < ad->nb_queues; ++i) {
2455 		f_ret = fill_queue_buffers(op_params,
2456 				ad->in_mbuf_pool,
2457 				ad->hard_out_mbuf_pool,
2458 				ad->soft_out_mbuf_pool,
2459 				ad->harq_in_mbuf_pool,
2460 				ad->harq_out_mbuf_pool,
2461 				ad->queue_ids[i],
2462 				capabilities,
2463 				info.drv.min_alignment,
2464 				socket_id);
2465 		if (f_ret != TEST_SUCCESS) {
2466 			printf("Couldn't init queue buffers");
2467 			goto fail;
2468 		}
2469 	}
2470 
2471 	/* Run test case function */
2472 	t_ret = test_case_func(ad, op_params);
2473 
2474 	/* Free active device resources and return */
2475 	free_buffers(ad, op_params);
2476 	return t_ret;
2477 
2478 fail:
2479 	free_buffers(ad, op_params);
2480 	return TEST_FAILED;
2481 }
2482 
2483 /* Run given test function per active device per supported op type
2484  * per burst size.
2485  */
2486 static int
2487 run_test_case(test_case_function *test_case_func)
2488 {
2489 	int ret = 0;
2490 	uint8_t dev;
2491 
2492 	/* Alloc op_params */
2493 	struct test_op_params *op_params = rte_zmalloc(NULL,
2494 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2495 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2496 			RTE_ALIGN(sizeof(struct test_op_params),
2497 				RTE_CACHE_LINE_SIZE));
2498 
2499 	/* For each device run test case function */
2500 	for (dev = 0; dev < nb_active_devs; ++dev)
2501 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2502 
2503 	rte_free(op_params);
2504 
2505 	return ret;
2506 }
2507 
2508 
2509 /* Push back the HARQ output from DDR to host */
2510 static void
2511 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2512 		struct rte_bbdev_dec_op **ops,
2513 		const uint16_t n)
2514 {
2515 	uint16_t j;
2516 	int save_status, ret;
2517 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2518 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2519 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2520 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2521 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2522 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2523 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2524 	for (j = 0; j < n; ++j) {
2525 		if ((loopback && mem_out) || hc_out) {
2526 			save_status = ops[j]->status;
2527 			ops[j]->ldpc_dec.op_flags =
2528 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2529 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2530 			if (h_comp)
2531 				ops[j]->ldpc_dec.op_flags +=
2532 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2533 			ops[j]->ldpc_dec.harq_combined_input.offset =
2534 					harq_offset;
2535 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2536 			harq_offset += HARQ_INCR;
2537 			if (!loopback)
2538 				ops[j]->ldpc_dec.harq_combined_input.length =
2539 				ops[j]->ldpc_dec.harq_combined_output.length;
2540 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2541 					&ops[j], 1);
2542 			ret = 0;
2543 			while (ret == 0)
2544 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2545 						dev_id, queue_id,
2546 						&ops_deq[j], 1);
2547 			ops[j]->ldpc_dec.op_flags = flags;
2548 			ops[j]->status = save_status;
2549 		}
2550 	}
2551 }
2552 
2553 /*
2554  * Push back the HARQ output from HW DDR to Host
2555  * Preload HARQ memory input and adjust HARQ offset
2556  */
2557 static void
2558 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2559 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2560 		bool preload)
2561 {
2562 	uint16_t j;
2563 	int deq;
2564 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2565 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2566 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2567 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2568 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2569 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2570 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2571 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2572 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2573 	if ((mem_in || hc_in) && preload) {
2574 		for (j = 0; j < n; ++j) {
2575 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2576 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2577 			ops[j]->ldpc_dec.op_flags =
2578 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2579 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2580 			if (h_comp)
2581 				ops[j]->ldpc_dec.op_flags +=
2582 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2583 			ops[j]->ldpc_dec.harq_combined_output.offset =
2584 					harq_offset;
2585 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2586 			harq_offset += HARQ_INCR;
2587 		}
2588 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2589 		deq = 0;
2590 		while (deq != n)
2591 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2592 					dev_id, queue_id, &ops_deq[deq],
2593 					n - deq);
2594 		/* Restore the operations */
2595 		for (j = 0; j < n; ++j) {
2596 			ops[j]->ldpc_dec.op_flags = flags;
2597 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2598 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2599 		}
2600 	}
2601 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2602 	for (j = 0; j < n; ++j) {
2603 		/* Adjust HARQ offset when we reach external DDR */
2604 		if (mem_in || hc_in)
2605 			ops[j]->ldpc_dec.harq_combined_input.offset
2606 				= harq_offset;
2607 		if (mem_out || hc_out)
2608 			ops[j]->ldpc_dec.harq_combined_output.offset
2609 				= harq_offset;
2610 		harq_offset += HARQ_INCR;
2611 	}
2612 }
2613 
2614 static void
2615 dequeue_event_callback(uint16_t dev_id,
2616 		enum rte_bbdev_event_type event, void *cb_arg,
2617 		void *ret_param)
2618 {
2619 	int ret;
2620 	uint16_t i;
2621 	uint64_t total_time;
2622 	uint16_t deq, burst_sz, num_ops;
2623 	uint16_t queue_id = *(uint16_t *) ret_param;
2624 	struct rte_bbdev_info info;
2625 	double tb_len_bits;
2626 	struct thread_params *tp = cb_arg;
2627 
2628 	/* Find matching thread params using queue_id */
2629 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2630 		if (tp->queue_id == queue_id)
2631 			break;
2632 
2633 	if (i == MAX_QUEUES) {
2634 		printf("%s: Queue_id from interrupt details was not found!\n",
2635 				__func__);
2636 		return;
2637 	}
2638 
2639 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2640 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2641 		printf(
2642 			"Dequeue interrupt handler called for incorrect event!\n");
2643 		return;
2644 	}
2645 
2646 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2647 	num_ops = tp->op_params->num_to_process;
2648 
2649 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2650 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2651 				&tp->dec_ops[
2652 					rte_atomic16_read(&tp->nb_dequeued)],
2653 				burst_sz);
2654 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2655 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2656 				&tp->dec_ops[
2657 					rte_atomic16_read(&tp->nb_dequeued)],
2658 				burst_sz);
2659 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2660 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2661 				&tp->enc_ops[
2662 					rte_atomic16_read(&tp->nb_dequeued)],
2663 				burst_sz);
2664 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2665 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2666 				&tp->enc_ops[
2667 					rte_atomic16_read(&tp->nb_dequeued)],
2668 				burst_sz);
2669 
2670 	if (deq < burst_sz) {
2671 		printf(
2672 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2673 			burst_sz, deq);
2674 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2675 		return;
2676 	}
2677 
2678 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2679 		rte_atomic16_add(&tp->nb_dequeued, deq);
2680 		return;
2681 	}
2682 
2683 	total_time = rte_rdtsc_precise() - tp->start_time;
2684 
2685 	rte_bbdev_info_get(dev_id, &info);
2686 
2687 	ret = TEST_SUCCESS;
2688 
2689 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2690 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2691 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2692 				tp->op_params->vector_mask);
2693 		/* get the max of iter_count for all dequeued ops */
2694 		for (i = 0; i < num_ops; ++i)
2695 			tp->iter_count = RTE_MAX(
2696 					tp->dec_ops[i]->turbo_dec.iter_count,
2697 					tp->iter_count);
2698 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2699 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2700 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2701 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2702 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2703 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2704 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2705 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2706 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2707 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2708 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2709 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2710 				tp->op_params->vector_mask);
2711 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2712 	}
2713 
2714 	if (ret) {
2715 		printf("Buffers validation failed\n");
2716 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2717 	}
2718 
2719 	switch (test_vector.op_type) {
2720 	case RTE_BBDEV_OP_TURBO_DEC:
2721 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2722 		break;
2723 	case RTE_BBDEV_OP_TURBO_ENC:
2724 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2725 		break;
2726 	case RTE_BBDEV_OP_LDPC_DEC:
2727 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2728 		break;
2729 	case RTE_BBDEV_OP_LDPC_ENC:
2730 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2731 		break;
2732 	case RTE_BBDEV_OP_NONE:
2733 		tb_len_bits = 0.0;
2734 		break;
2735 	default:
2736 		printf("Unknown op type: %d\n", test_vector.op_type);
2737 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2738 		return;
2739 	}
2740 
2741 	tp->ops_per_sec += ((double)num_ops) /
2742 			((double)total_time / (double)rte_get_tsc_hz());
2743 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2744 			((double)total_time / (double)rte_get_tsc_hz());
2745 
2746 	rte_atomic16_add(&tp->nb_dequeued, deq);
2747 }
2748 
2749 static int
2750 throughput_intr_lcore_ldpc_dec(void *arg)
2751 {
2752 	struct thread_params *tp = arg;
2753 	unsigned int enqueued;
2754 	const uint16_t queue_id = tp->queue_id;
2755 	const uint16_t burst_sz = tp->op_params->burst_sz;
2756 	const uint16_t num_to_process = tp->op_params->num_to_process;
2757 	struct rte_bbdev_dec_op *ops[num_to_process];
2758 	struct test_buffers *bufs = NULL;
2759 	struct rte_bbdev_info info;
2760 	int ret, i, j;
2761 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2762 	uint16_t num_to_enq, enq;
2763 
2764 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2765 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2766 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2767 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2768 
2769 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2770 			"BURST_SIZE should be <= %u", MAX_BURST);
2771 
2772 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2773 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2774 			tp->dev_id, queue_id);
2775 
2776 	rte_bbdev_info_get(tp->dev_id, &info);
2777 
2778 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2779 			"NUM_OPS cannot exceed %u for this device",
2780 			info.drv.queue_size_lim);
2781 
2782 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2783 
2784 	rte_atomic16_clear(&tp->processing_status);
2785 	rte_atomic16_clear(&tp->nb_dequeued);
2786 
2787 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2788 		rte_pause();
2789 
2790 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2791 				num_to_process);
2792 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2793 			num_to_process);
2794 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2795 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2796 				bufs->hard_outputs, bufs->soft_outputs,
2797 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2798 
2799 	/* Set counter to validate the ordering */
2800 	for (j = 0; j < num_to_process; ++j)
2801 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2802 
2803 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2804 		for (i = 0; i < num_to_process; ++i) {
2805 			if (!loopback)
2806 				rte_pktmbuf_reset(
2807 					ops[i]->ldpc_dec.hard_output.data);
2808 			if (hc_out || loopback)
2809 				mbuf_reset(
2810 				ops[i]->ldpc_dec.harq_combined_output.data);
2811 		}
2812 
2813 		tp->start_time = rte_rdtsc_precise();
2814 		for (enqueued = 0; enqueued < num_to_process;) {
2815 			num_to_enq = burst_sz;
2816 
2817 			if (unlikely(num_to_process - enqueued < num_to_enq))
2818 				num_to_enq = num_to_process - enqueued;
2819 
2820 			enq = 0;
2821 			do {
2822 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2823 						tp->dev_id,
2824 						queue_id, &ops[enqueued],
2825 						num_to_enq);
2826 			} while (unlikely(num_to_enq != enq));
2827 			enqueued += enq;
2828 
2829 			/* Write to thread burst_sz current number of enqueued
2830 			 * descriptors. It ensures that proper number of
2831 			 * descriptors will be dequeued in callback
2832 			 * function - needed for last batch in case where
2833 			 * the number of operations is not a multiple of
2834 			 * burst size.
2835 			 */
2836 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2837 
2838 			/* Wait until processing of previous batch is
2839 			 * completed
2840 			 */
2841 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2842 					(int16_t) enqueued)
2843 				rte_pause();
2844 		}
2845 		if (j != TEST_REPETITIONS - 1)
2846 			rte_atomic16_clear(&tp->nb_dequeued);
2847 	}
2848 
2849 	return TEST_SUCCESS;
2850 }
2851 
2852 static int
2853 throughput_intr_lcore_dec(void *arg)
2854 {
2855 	struct thread_params *tp = arg;
2856 	unsigned int enqueued;
2857 	const uint16_t queue_id = tp->queue_id;
2858 	const uint16_t burst_sz = tp->op_params->burst_sz;
2859 	const uint16_t num_to_process = tp->op_params->num_to_process;
2860 	struct rte_bbdev_dec_op *ops[num_to_process];
2861 	struct test_buffers *bufs = NULL;
2862 	struct rte_bbdev_info info;
2863 	int ret, i, j;
2864 	uint16_t num_to_enq, enq;
2865 
2866 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2867 			"BURST_SIZE should be <= %u", MAX_BURST);
2868 
2869 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2870 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2871 			tp->dev_id, queue_id);
2872 
2873 	rte_bbdev_info_get(tp->dev_id, &info);
2874 
2875 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2876 			"NUM_OPS cannot exceed %u for this device",
2877 			info.drv.queue_size_lim);
2878 
2879 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2880 
2881 	rte_atomic16_clear(&tp->processing_status);
2882 	rte_atomic16_clear(&tp->nb_dequeued);
2883 
2884 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2885 		rte_pause();
2886 
2887 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2888 				num_to_process);
2889 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2890 			num_to_process);
2891 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2892 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2893 				bufs->hard_outputs, bufs->soft_outputs,
2894 				tp->op_params->ref_dec_op);
2895 
2896 	/* Set counter to validate the ordering */
2897 	for (j = 0; j < num_to_process; ++j)
2898 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2899 
2900 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2901 		for (i = 0; i < num_to_process; ++i)
2902 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2903 
2904 		tp->start_time = rte_rdtsc_precise();
2905 		for (enqueued = 0; enqueued < num_to_process;) {
2906 			num_to_enq = burst_sz;
2907 
2908 			if (unlikely(num_to_process - enqueued < num_to_enq))
2909 				num_to_enq = num_to_process - enqueued;
2910 
2911 			enq = 0;
2912 			do {
2913 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2914 						queue_id, &ops[enqueued],
2915 						num_to_enq);
2916 			} while (unlikely(num_to_enq != enq));
2917 			enqueued += enq;
2918 
2919 			/* Write to thread burst_sz current number of enqueued
2920 			 * descriptors. It ensures that proper number of
2921 			 * descriptors will be dequeued in callback
2922 			 * function - needed for last batch in case where
2923 			 * the number of operations is not a multiple of
2924 			 * burst size.
2925 			 */
2926 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2927 
2928 			/* Wait until processing of previous batch is
2929 			 * completed
2930 			 */
2931 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2932 					(int16_t) enqueued)
2933 				rte_pause();
2934 		}
2935 		if (j != TEST_REPETITIONS - 1)
2936 			rte_atomic16_clear(&tp->nb_dequeued);
2937 	}
2938 
2939 	return TEST_SUCCESS;
2940 }
2941 
2942 static int
2943 throughput_intr_lcore_enc(void *arg)
2944 {
2945 	struct thread_params *tp = arg;
2946 	unsigned int enqueued;
2947 	const uint16_t queue_id = tp->queue_id;
2948 	const uint16_t burst_sz = tp->op_params->burst_sz;
2949 	const uint16_t num_to_process = tp->op_params->num_to_process;
2950 	struct rte_bbdev_enc_op *ops[num_to_process];
2951 	struct test_buffers *bufs = NULL;
2952 	struct rte_bbdev_info info;
2953 	int ret, i, j;
2954 	uint16_t num_to_enq, enq;
2955 
2956 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2957 			"BURST_SIZE should be <= %u", MAX_BURST);
2958 
2959 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2960 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2961 			tp->dev_id, queue_id);
2962 
2963 	rte_bbdev_info_get(tp->dev_id, &info);
2964 
2965 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2966 			"NUM_OPS cannot exceed %u for this device",
2967 			info.drv.queue_size_lim);
2968 
2969 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2970 
2971 	rte_atomic16_clear(&tp->processing_status);
2972 	rte_atomic16_clear(&tp->nb_dequeued);
2973 
2974 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2975 		rte_pause();
2976 
2977 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2978 			num_to_process);
2979 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2980 			num_to_process);
2981 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2982 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2983 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2984 
2985 	/* Set counter to validate the ordering */
2986 	for (j = 0; j < num_to_process; ++j)
2987 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2988 
2989 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2990 		for (i = 0; i < num_to_process; ++i)
2991 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2992 
2993 		tp->start_time = rte_rdtsc_precise();
2994 		for (enqueued = 0; enqueued < num_to_process;) {
2995 			num_to_enq = burst_sz;
2996 
2997 			if (unlikely(num_to_process - enqueued < num_to_enq))
2998 				num_to_enq = num_to_process - enqueued;
2999 
3000 			enq = 0;
3001 			do {
3002 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3003 						queue_id, &ops[enqueued],
3004 						num_to_enq);
3005 			} while (unlikely(enq != num_to_enq));
3006 			enqueued += enq;
3007 
3008 			/* Write to thread burst_sz current number of enqueued
3009 			 * descriptors. It ensures that proper number of
3010 			 * descriptors will be dequeued in callback
3011 			 * function - needed for last batch in case where
3012 			 * the number of operations is not a multiple of
3013 			 * burst size.
3014 			 */
3015 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
3016 
3017 			/* Wait until processing of previous batch is
3018 			 * completed
3019 			 */
3020 			while (rte_atomic16_read(&tp->nb_dequeued) !=
3021 					(int16_t) enqueued)
3022 				rte_pause();
3023 		}
3024 		if (j != TEST_REPETITIONS - 1)
3025 			rte_atomic16_clear(&tp->nb_dequeued);
3026 	}
3027 
3028 	return TEST_SUCCESS;
3029 }
3030 
3031 
3032 static int
3033 throughput_intr_lcore_ldpc_enc(void *arg)
3034 {
3035 	struct thread_params *tp = arg;
3036 	unsigned int enqueued;
3037 	const uint16_t queue_id = tp->queue_id;
3038 	const uint16_t burst_sz = tp->op_params->burst_sz;
3039 	const uint16_t num_to_process = tp->op_params->num_to_process;
3040 	struct rte_bbdev_enc_op *ops[num_to_process];
3041 	struct test_buffers *bufs = NULL;
3042 	struct rte_bbdev_info info;
3043 	int ret, i, j;
3044 	uint16_t num_to_enq, enq;
3045 
3046 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3047 			"BURST_SIZE should be <= %u", MAX_BURST);
3048 
3049 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3050 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3051 			tp->dev_id, queue_id);
3052 
3053 	rte_bbdev_info_get(tp->dev_id, &info);
3054 
3055 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3056 			"NUM_OPS cannot exceed %u for this device",
3057 			info.drv.queue_size_lim);
3058 
3059 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3060 
3061 	rte_atomic16_clear(&tp->processing_status);
3062 	rte_atomic16_clear(&tp->nb_dequeued);
3063 
3064 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3065 		rte_pause();
3066 
3067 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3068 			num_to_process);
3069 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3070 			num_to_process);
3071 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3072 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3073 				bufs->inputs, bufs->hard_outputs,
3074 				tp->op_params->ref_enc_op);
3075 
3076 	/* Set counter to validate the ordering */
3077 	for (j = 0; j < num_to_process; ++j)
3078 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3079 
3080 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3081 		for (i = 0; i < num_to_process; ++i)
3082 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3083 
3084 		tp->start_time = rte_rdtsc_precise();
3085 		for (enqueued = 0; enqueued < num_to_process;) {
3086 			num_to_enq = burst_sz;
3087 
3088 			if (unlikely(num_to_process - enqueued < num_to_enq))
3089 				num_to_enq = num_to_process - enqueued;
3090 
3091 			enq = 0;
3092 			do {
3093 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3094 						tp->dev_id,
3095 						queue_id, &ops[enqueued],
3096 						num_to_enq);
3097 			} while (unlikely(enq != num_to_enq));
3098 			enqueued += enq;
3099 
3100 			/* Write to thread burst_sz current number of enqueued
3101 			 * descriptors. It ensures that proper number of
3102 			 * descriptors will be dequeued in callback
3103 			 * function - needed for last batch in case where
3104 			 * the number of operations is not a multiple of
3105 			 * burst size.
3106 			 */
3107 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
3108 
3109 			/* Wait until processing of previous batch is
3110 			 * completed
3111 			 */
3112 			while (rte_atomic16_read(&tp->nb_dequeued) !=
3113 					(int16_t) enqueued)
3114 				rte_pause();
3115 		}
3116 		if (j != TEST_REPETITIONS - 1)
3117 			rte_atomic16_clear(&tp->nb_dequeued);
3118 	}
3119 
3120 	return TEST_SUCCESS;
3121 }
3122 
3123 static int
3124 throughput_pmd_lcore_dec(void *arg)
3125 {
3126 	struct thread_params *tp = arg;
3127 	uint16_t enq, deq;
3128 	uint64_t total_time = 0, start_time;
3129 	const uint16_t queue_id = tp->queue_id;
3130 	const uint16_t burst_sz = tp->op_params->burst_sz;
3131 	const uint16_t num_ops = tp->op_params->num_to_process;
3132 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3133 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3134 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3135 	struct test_buffers *bufs = NULL;
3136 	int i, j, ret;
3137 	struct rte_bbdev_info info;
3138 	uint16_t num_to_enq;
3139 
3140 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3141 			"BURST_SIZE should be <= %u", MAX_BURST);
3142 
3143 	rte_bbdev_info_get(tp->dev_id, &info);
3144 
3145 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3146 			"NUM_OPS cannot exceed %u for this device",
3147 			info.drv.queue_size_lim);
3148 
3149 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3150 
3151 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3152 		rte_pause();
3153 
3154 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3155 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3156 
3157 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3158 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3159 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3160 
3161 	/* Set counter to validate the ordering */
3162 	for (j = 0; j < num_ops; ++j)
3163 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3164 
3165 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3166 
3167 		for (j = 0; j < num_ops; ++j)
3168 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3169 
3170 		start_time = rte_rdtsc_precise();
3171 
3172 		for (enq = 0, deq = 0; enq < num_ops;) {
3173 			num_to_enq = burst_sz;
3174 
3175 			if (unlikely(num_ops - enq < num_to_enq))
3176 				num_to_enq = num_ops - enq;
3177 
3178 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3179 					queue_id, &ops_enq[enq], num_to_enq);
3180 
3181 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3182 					queue_id, &ops_deq[deq], enq - deq);
3183 		}
3184 
3185 		/* dequeue the remaining */
3186 		while (deq < enq) {
3187 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3188 					queue_id, &ops_deq[deq], enq - deq);
3189 		}
3190 
3191 		total_time += rte_rdtsc_precise() - start_time;
3192 	}
3193 
3194 	tp->iter_count = 0;
3195 	/* get the max of iter_count for all dequeued ops */
3196 	for (i = 0; i < num_ops; ++i) {
3197 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3198 				tp->iter_count);
3199 	}
3200 
3201 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3202 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3203 				tp->op_params->vector_mask);
3204 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3205 	}
3206 
3207 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3208 
3209 	double tb_len_bits = calc_dec_TB_size(ref_op);
3210 
3211 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3212 			((double)total_time / (double)rte_get_tsc_hz());
3213 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3214 			1000000.0) / ((double)total_time /
3215 			(double)rte_get_tsc_hz());
3216 
3217 	return TEST_SUCCESS;
3218 }
3219 
3220 static int
3221 bler_pmd_lcore_ldpc_dec(void *arg)
3222 {
3223 	struct thread_params *tp = arg;
3224 	uint16_t enq, deq;
3225 	uint64_t total_time = 0, start_time;
3226 	const uint16_t queue_id = tp->queue_id;
3227 	const uint16_t burst_sz = tp->op_params->burst_sz;
3228 	const uint16_t num_ops = tp->op_params->num_to_process;
3229 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3230 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3231 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3232 	struct test_buffers *bufs = NULL;
3233 	int i, j, ret;
3234 	float parity_bler = 0;
3235 	struct rte_bbdev_info info;
3236 	uint16_t num_to_enq;
3237 	bool extDdr = check_bit(ldpc_cap_flags,
3238 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3239 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3240 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3241 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3242 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3243 
3244 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3245 			"BURST_SIZE should be <= %u", MAX_BURST);
3246 
3247 	rte_bbdev_info_get(tp->dev_id, &info);
3248 
3249 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3250 			"NUM_OPS cannot exceed %u for this device",
3251 			info.drv.queue_size_lim);
3252 
3253 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3254 
3255 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3256 		rte_pause();
3257 
3258 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3259 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3260 
3261 	/* For BLER tests we need to enable early termination */
3262 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3263 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3264 		ref_op->ldpc_dec.op_flags +=
3265 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3266 	ref_op->ldpc_dec.iter_max = get_iter_max();
3267 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3268 
3269 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3270 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3271 				bufs->hard_outputs, bufs->soft_outputs,
3272 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3273 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3274 
3275 	/* Set counter to validate the ordering */
3276 	for (j = 0; j < num_ops; ++j)
3277 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3278 
3279 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3280 		for (j = 0; j < num_ops; ++j) {
3281 			if (!loopback)
3282 				mbuf_reset(
3283 				ops_enq[j]->ldpc_dec.hard_output.data);
3284 			if (hc_out || loopback)
3285 				mbuf_reset(
3286 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3287 		}
3288 		if (extDdr)
3289 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3290 					num_ops, true);
3291 		start_time = rte_rdtsc_precise();
3292 
3293 		for (enq = 0, deq = 0; enq < num_ops;) {
3294 			num_to_enq = burst_sz;
3295 
3296 			if (unlikely(num_ops - enq < num_to_enq))
3297 				num_to_enq = num_ops - enq;
3298 
3299 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3300 					queue_id, &ops_enq[enq], num_to_enq);
3301 
3302 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3303 					queue_id, &ops_deq[deq], enq - deq);
3304 		}
3305 
3306 		/* dequeue the remaining */
3307 		while (deq < enq) {
3308 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3309 					queue_id, &ops_deq[deq], enq - deq);
3310 		}
3311 
3312 		total_time += rte_rdtsc_precise() - start_time;
3313 	}
3314 
3315 	tp->iter_count = 0;
3316 	tp->iter_average = 0;
3317 	/* get the max of iter_count for all dequeued ops */
3318 	for (i = 0; i < num_ops; ++i) {
3319 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3320 				tp->iter_count);
3321 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3322 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3323 			parity_bler += 1.0;
3324 	}
3325 
3326 	parity_bler /= num_ops; /* This one is based on SYND */
3327 	tp->iter_average /= num_ops;
3328 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3329 
3330 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3331 			&& tp->bler == 0
3332 			&& parity_bler == 0
3333 			&& !hc_out) {
3334 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3335 				tp->op_params->vector_mask);
3336 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3337 	}
3338 
3339 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3340 
3341 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3342 	tp->ops_per_sec = ((double)num_ops * 1) /
3343 			((double)total_time / (double)rte_get_tsc_hz());
3344 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3345 			1000000.0) / ((double)total_time /
3346 			(double)rte_get_tsc_hz());
3347 
3348 	return TEST_SUCCESS;
3349 }
3350 
3351 static int
3352 throughput_pmd_lcore_ldpc_dec(void *arg)
3353 {
3354 	struct thread_params *tp = arg;
3355 	uint16_t enq, deq;
3356 	uint64_t total_time = 0, start_time;
3357 	const uint16_t queue_id = tp->queue_id;
3358 	const uint16_t burst_sz = tp->op_params->burst_sz;
3359 	const uint16_t num_ops = tp->op_params->num_to_process;
3360 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3361 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3362 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3363 	struct test_buffers *bufs = NULL;
3364 	int i, j, ret;
3365 	struct rte_bbdev_info info;
3366 	uint16_t num_to_enq;
3367 	bool extDdr = check_bit(ldpc_cap_flags,
3368 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3369 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3370 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3371 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3372 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3373 
3374 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3375 			"BURST_SIZE should be <= %u", MAX_BURST);
3376 
3377 	rte_bbdev_info_get(tp->dev_id, &info);
3378 
3379 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3380 			"NUM_OPS cannot exceed %u for this device",
3381 			info.drv.queue_size_lim);
3382 
3383 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3384 
3385 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3386 		rte_pause();
3387 
3388 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3389 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3390 
3391 	/* For throughput tests we need to disable early termination */
3392 	if (check_bit(ref_op->ldpc_dec.op_flags,
3393 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3394 		ref_op->ldpc_dec.op_flags -=
3395 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3396 	ref_op->ldpc_dec.iter_max = get_iter_max();
3397 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3398 
3399 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3400 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3401 				bufs->hard_outputs, bufs->soft_outputs,
3402 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3403 
3404 	/* Set counter to validate the ordering */
3405 	for (j = 0; j < num_ops; ++j)
3406 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3407 
3408 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3409 		for (j = 0; j < num_ops; ++j) {
3410 			if (!loopback)
3411 				mbuf_reset(
3412 				ops_enq[j]->ldpc_dec.hard_output.data);
3413 			if (hc_out || loopback)
3414 				mbuf_reset(
3415 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3416 		}
3417 		if (extDdr)
3418 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3419 					num_ops, true);
3420 		start_time = rte_rdtsc_precise();
3421 
3422 		for (enq = 0, deq = 0; enq < num_ops;) {
3423 			num_to_enq = burst_sz;
3424 
3425 			if (unlikely(num_ops - enq < num_to_enq))
3426 				num_to_enq = num_ops - enq;
3427 
3428 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3429 					queue_id, &ops_enq[enq], num_to_enq);
3430 
3431 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3432 					queue_id, &ops_deq[deq], enq - deq);
3433 		}
3434 
3435 		/* dequeue the remaining */
3436 		while (deq < enq) {
3437 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3438 					queue_id, &ops_deq[deq], enq - deq);
3439 		}
3440 
3441 		total_time += rte_rdtsc_precise() - start_time;
3442 	}
3443 
3444 	tp->iter_count = 0;
3445 	/* get the max of iter_count for all dequeued ops */
3446 	for (i = 0; i < num_ops; ++i) {
3447 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3448 				tp->iter_count);
3449 	}
3450 	if (extDdr) {
3451 		/* Read loopback is not thread safe */
3452 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3453 	}
3454 
3455 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3456 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3457 				tp->op_params->vector_mask);
3458 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3459 	}
3460 
3461 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3462 
3463 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3464 
3465 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3466 			((double)total_time / (double)rte_get_tsc_hz());
3467 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3468 			1000000.0) / ((double)total_time /
3469 			(double)rte_get_tsc_hz());
3470 
3471 	return TEST_SUCCESS;
3472 }
3473 
3474 static int
3475 throughput_pmd_lcore_enc(void *arg)
3476 {
3477 	struct thread_params *tp = arg;
3478 	uint16_t enq, deq;
3479 	uint64_t total_time = 0, start_time;
3480 	const uint16_t queue_id = tp->queue_id;
3481 	const uint16_t burst_sz = tp->op_params->burst_sz;
3482 	const uint16_t num_ops = tp->op_params->num_to_process;
3483 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3484 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3485 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3486 	struct test_buffers *bufs = NULL;
3487 	int i, j, ret;
3488 	struct rte_bbdev_info info;
3489 	uint16_t num_to_enq;
3490 
3491 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3492 			"BURST_SIZE should be <= %u", MAX_BURST);
3493 
3494 	rte_bbdev_info_get(tp->dev_id, &info);
3495 
3496 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3497 			"NUM_OPS cannot exceed %u for this device",
3498 			info.drv.queue_size_lim);
3499 
3500 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3501 
3502 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3503 		rte_pause();
3504 
3505 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3506 			num_ops);
3507 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3508 			num_ops);
3509 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3510 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3511 				bufs->hard_outputs, ref_op);
3512 
3513 	/* Set counter to validate the ordering */
3514 	for (j = 0; j < num_ops; ++j)
3515 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3516 
3517 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3518 
3519 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3520 			for (j = 0; j < num_ops; ++j)
3521 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3522 
3523 		start_time = rte_rdtsc_precise();
3524 
3525 		for (enq = 0, deq = 0; enq < num_ops;) {
3526 			num_to_enq = burst_sz;
3527 
3528 			if (unlikely(num_ops - enq < num_to_enq))
3529 				num_to_enq = num_ops - enq;
3530 
3531 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3532 					queue_id, &ops_enq[enq], num_to_enq);
3533 
3534 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3535 					queue_id, &ops_deq[deq], enq - deq);
3536 		}
3537 
3538 		/* dequeue the remaining */
3539 		while (deq < enq) {
3540 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3541 					queue_id, &ops_deq[deq], enq - deq);
3542 		}
3543 
3544 		total_time += rte_rdtsc_precise() - start_time;
3545 	}
3546 
3547 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3548 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3549 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3550 	}
3551 
3552 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3553 
3554 	double tb_len_bits = calc_enc_TB_size(ref_op);
3555 
3556 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3557 			((double)total_time / (double)rte_get_tsc_hz());
3558 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3559 			/ 1000000.0) / ((double)total_time /
3560 			(double)rte_get_tsc_hz());
3561 
3562 	return TEST_SUCCESS;
3563 }
3564 
3565 static int
3566 throughput_pmd_lcore_ldpc_enc(void *arg)
3567 {
3568 	struct thread_params *tp = arg;
3569 	uint16_t enq, deq;
3570 	uint64_t total_time = 0, start_time;
3571 	const uint16_t queue_id = tp->queue_id;
3572 	const uint16_t burst_sz = tp->op_params->burst_sz;
3573 	const uint16_t num_ops = tp->op_params->num_to_process;
3574 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3575 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3576 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3577 	struct test_buffers *bufs = NULL;
3578 	int i, j, ret;
3579 	struct rte_bbdev_info info;
3580 	uint16_t num_to_enq;
3581 
3582 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3583 			"BURST_SIZE should be <= %u", MAX_BURST);
3584 
3585 	rte_bbdev_info_get(tp->dev_id, &info);
3586 
3587 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3588 			"NUM_OPS cannot exceed %u for this device",
3589 			info.drv.queue_size_lim);
3590 
3591 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3592 
3593 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3594 		rte_pause();
3595 
3596 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3597 			num_ops);
3598 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3599 			num_ops);
3600 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3601 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3602 				bufs->hard_outputs, ref_op);
3603 
3604 	/* Set counter to validate the ordering */
3605 	for (j = 0; j < num_ops; ++j)
3606 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3607 
3608 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3609 
3610 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3611 			for (j = 0; j < num_ops; ++j)
3612 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3613 
3614 		start_time = rte_rdtsc_precise();
3615 
3616 		for (enq = 0, deq = 0; enq < num_ops;) {
3617 			num_to_enq = burst_sz;
3618 
3619 			if (unlikely(num_ops - enq < num_to_enq))
3620 				num_to_enq = num_ops - enq;
3621 
3622 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3623 					queue_id, &ops_enq[enq], num_to_enq);
3624 
3625 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3626 					queue_id, &ops_deq[deq], enq - deq);
3627 		}
3628 
3629 		/* dequeue the remaining */
3630 		while (deq < enq) {
3631 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3632 					queue_id, &ops_deq[deq], enq - deq);
3633 		}
3634 
3635 		total_time += rte_rdtsc_precise() - start_time;
3636 	}
3637 
3638 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3639 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3640 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3641 	}
3642 
3643 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3644 
3645 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3646 
3647 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3648 			((double)total_time / (double)rte_get_tsc_hz());
3649 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3650 			/ 1000000.0) / ((double)total_time /
3651 			(double)rte_get_tsc_hz());
3652 
3653 	return TEST_SUCCESS;
3654 }
3655 
3656 static void
3657 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3658 {
3659 	unsigned int iter = 0;
3660 	double total_mops = 0, total_mbps = 0;
3661 
3662 	for (iter = 0; iter < used_cores; iter++) {
3663 		printf(
3664 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3665 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3666 			t_params[iter].mbps);
3667 		total_mops += t_params[iter].ops_per_sec;
3668 		total_mbps += t_params[iter].mbps;
3669 	}
3670 	printf(
3671 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3672 		used_cores, total_mops, total_mbps);
3673 }
3674 
3675 /* Aggregate the performance results over the number of cores used */
3676 static void
3677 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3678 {
3679 	unsigned int core_idx = 0;
3680 	double total_mops = 0, total_mbps = 0;
3681 	uint8_t iter_count = 0;
3682 
3683 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3684 		printf(
3685 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3686 			t_params[core_idx].lcore_id,
3687 			t_params[core_idx].ops_per_sec,
3688 			t_params[core_idx].mbps,
3689 			t_params[core_idx].iter_count);
3690 		total_mops += t_params[core_idx].ops_per_sec;
3691 		total_mbps += t_params[core_idx].mbps;
3692 		iter_count = RTE_MAX(iter_count,
3693 				t_params[core_idx].iter_count);
3694 	}
3695 	printf(
3696 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3697 		used_cores, total_mops, total_mbps, iter_count);
3698 }
3699 
3700 /* Aggregate the performance results over the number of cores used */
3701 static void
3702 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3703 {
3704 	unsigned int core_idx = 0;
3705 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3706 	double snr = get_snr();
3707 
3708 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3709 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3710 				t_params[core_idx].lcore_id,
3711 				t_params[core_idx].bler * 100,
3712 				t_params[core_idx].iter_average,
3713 				t_params[core_idx].mbps,
3714 				get_vector_filename());
3715 		total_mbps += t_params[core_idx].mbps;
3716 		total_bler += t_params[core_idx].bler;
3717 		total_iter += t_params[core_idx].iter_average;
3718 	}
3719 	total_bler /= used_cores;
3720 	total_iter /= used_cores;
3721 
3722 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3723 			snr, total_bler * 100, total_iter, get_iter_max(),
3724 			total_mbps, get_vector_filename());
3725 }
3726 
3727 /*
3728  * Test function that determines BLER wireless performance
3729  */
3730 static int
3731 bler_test(struct active_device *ad,
3732 		struct test_op_params *op_params)
3733 {
3734 	int ret;
3735 	unsigned int lcore_id, used_cores = 0;
3736 	struct thread_params *t_params;
3737 	struct rte_bbdev_info info;
3738 	lcore_function_t *bler_function;
3739 	uint16_t num_lcores;
3740 	const char *op_type_str;
3741 
3742 	rte_bbdev_info_get(ad->dev_id, &info);
3743 
3744 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3745 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3746 			test_vector.op_type);
3747 
3748 	printf("+ ------------------------------------------------------- +\n");
3749 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3750 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3751 			op_params->num_to_process, op_params->num_lcores,
3752 			op_type_str,
3753 			intr_enabled ? "Interrupt mode" : "PMD mode",
3754 			(double)rte_get_tsc_hz() / 1000000000.0);
3755 
3756 	/* Set number of lcores */
3757 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3758 			? ad->nb_queues
3759 			: op_params->num_lcores;
3760 
3761 	/* Allocate memory for thread parameters structure */
3762 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3763 			RTE_CACHE_LINE_SIZE);
3764 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3765 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3766 				RTE_CACHE_LINE_SIZE));
3767 
3768 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3769 			!check_bit(test_vector.ldpc_dec.op_flags,
3770 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3771 			&& !check_bit(test_vector.ldpc_dec.op_flags,
3772 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
3773 		bler_function = bler_pmd_lcore_ldpc_dec;
3774 	else
3775 		return TEST_SKIPPED;
3776 
3777 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3778 
3779 	/* Main core is set at first entry */
3780 	t_params[0].dev_id = ad->dev_id;
3781 	t_params[0].lcore_id = rte_lcore_id();
3782 	t_params[0].op_params = op_params;
3783 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3784 	t_params[0].iter_count = 0;
3785 
3786 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3787 		if (used_cores >= num_lcores)
3788 			break;
3789 
3790 		t_params[used_cores].dev_id = ad->dev_id;
3791 		t_params[used_cores].lcore_id = lcore_id;
3792 		t_params[used_cores].op_params = op_params;
3793 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3794 		t_params[used_cores].iter_count = 0;
3795 
3796 		rte_eal_remote_launch(bler_function,
3797 				&t_params[used_cores++], lcore_id);
3798 	}
3799 
3800 	rte_atomic16_set(&op_params->sync, SYNC_START);
3801 	ret = bler_function(&t_params[0]);
3802 
3803 	/* Main core is always used */
3804 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3805 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3806 
3807 	print_dec_bler(t_params, num_lcores);
3808 
3809 	/* Return if test failed */
3810 	if (ret) {
3811 		rte_free(t_params);
3812 		return ret;
3813 	}
3814 
3815 	/* Function to print something  here*/
3816 	rte_free(t_params);
3817 	return ret;
3818 }
3819 
3820 /*
3821  * Test function that determines how long an enqueue + dequeue of a burst
3822  * takes on available lcores.
3823  */
3824 static int
3825 throughput_test(struct active_device *ad,
3826 		struct test_op_params *op_params)
3827 {
3828 	int ret;
3829 	unsigned int lcore_id, used_cores = 0;
3830 	struct thread_params *t_params, *tp;
3831 	struct rte_bbdev_info info;
3832 	lcore_function_t *throughput_function;
3833 	uint16_t num_lcores;
3834 	const char *op_type_str;
3835 
3836 	rte_bbdev_info_get(ad->dev_id, &info);
3837 
3838 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3839 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3840 			test_vector.op_type);
3841 
3842 	printf("+ ------------------------------------------------------- +\n");
3843 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3844 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3845 			op_params->num_to_process, op_params->num_lcores,
3846 			op_type_str,
3847 			intr_enabled ? "Interrupt mode" : "PMD mode",
3848 			(double)rte_get_tsc_hz() / 1000000000.0);
3849 
3850 	/* Set number of lcores */
3851 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3852 			? ad->nb_queues
3853 			: op_params->num_lcores;
3854 
3855 	/* Allocate memory for thread parameters structure */
3856 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3857 			RTE_CACHE_LINE_SIZE);
3858 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3859 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3860 				RTE_CACHE_LINE_SIZE));
3861 
3862 	if (intr_enabled) {
3863 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3864 			throughput_function = throughput_intr_lcore_dec;
3865 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3866 			throughput_function = throughput_intr_lcore_ldpc_dec;
3867 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3868 			throughput_function = throughput_intr_lcore_enc;
3869 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3870 			throughput_function = throughput_intr_lcore_ldpc_enc;
3871 		else
3872 			throughput_function = throughput_intr_lcore_enc;
3873 
3874 		/* Dequeue interrupt callback registration */
3875 		ret = rte_bbdev_callback_register(ad->dev_id,
3876 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3877 				t_params);
3878 		if (ret < 0) {
3879 			rte_free(t_params);
3880 			return ret;
3881 		}
3882 	} else {
3883 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3884 			throughput_function = throughput_pmd_lcore_dec;
3885 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3886 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3887 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3888 			throughput_function = throughput_pmd_lcore_enc;
3889 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3890 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3891 		else
3892 			throughput_function = throughput_pmd_lcore_enc;
3893 	}
3894 
3895 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3896 
3897 	/* Main core is set at first entry */
3898 	t_params[0].dev_id = ad->dev_id;
3899 	t_params[0].lcore_id = rte_lcore_id();
3900 	t_params[0].op_params = op_params;
3901 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3902 	t_params[0].iter_count = 0;
3903 
3904 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3905 		if (used_cores >= num_lcores)
3906 			break;
3907 
3908 		t_params[used_cores].dev_id = ad->dev_id;
3909 		t_params[used_cores].lcore_id = lcore_id;
3910 		t_params[used_cores].op_params = op_params;
3911 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3912 		t_params[used_cores].iter_count = 0;
3913 
3914 		rte_eal_remote_launch(throughput_function,
3915 				&t_params[used_cores++], lcore_id);
3916 	}
3917 
3918 	rte_atomic16_set(&op_params->sync, SYNC_START);
3919 	ret = throughput_function(&t_params[0]);
3920 
3921 	/* Main core is always used */
3922 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3923 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3924 
3925 	/* Return if test failed */
3926 	if (ret) {
3927 		rte_free(t_params);
3928 		return ret;
3929 	}
3930 
3931 	/* Print throughput if interrupts are disabled and test passed */
3932 	if (!intr_enabled) {
3933 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3934 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3935 			print_dec_throughput(t_params, num_lcores);
3936 		else
3937 			print_enc_throughput(t_params, num_lcores);
3938 		rte_free(t_params);
3939 		return ret;
3940 	}
3941 
3942 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3943 	 * all pending operations. Skip waiting for queues which reported an
3944 	 * error using processing_status variable.
3945 	 * Wait for main lcore operations.
3946 	 */
3947 	tp = &t_params[0];
3948 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3949 			op_params->num_to_process) &&
3950 			(rte_atomic16_read(&tp->processing_status) !=
3951 			TEST_FAILED))
3952 		rte_pause();
3953 
3954 	tp->ops_per_sec /= TEST_REPETITIONS;
3955 	tp->mbps /= TEST_REPETITIONS;
3956 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3957 
3958 	/* Wait for worker lcores operations */
3959 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3960 		tp = &t_params[used_cores];
3961 
3962 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3963 				op_params->num_to_process) &&
3964 				(rte_atomic16_read(&tp->processing_status) !=
3965 				TEST_FAILED))
3966 			rte_pause();
3967 
3968 		tp->ops_per_sec /= TEST_REPETITIONS;
3969 		tp->mbps /= TEST_REPETITIONS;
3970 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3971 	}
3972 
3973 	/* Print throughput if test passed */
3974 	if (!ret) {
3975 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3976 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3977 			print_dec_throughput(t_params, num_lcores);
3978 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3979 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3980 			print_enc_throughput(t_params, num_lcores);
3981 	}
3982 
3983 	rte_free(t_params);
3984 	return ret;
3985 }
3986 
3987 static int
3988 latency_test_dec(struct rte_mempool *mempool,
3989 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3990 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3991 		const uint16_t num_to_process, uint16_t burst_sz,
3992 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3993 {
3994 	int ret = TEST_SUCCESS;
3995 	uint16_t i, j, dequeued;
3996 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3997 	uint64_t start_time = 0, last_time = 0;
3998 
3999 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4000 		uint16_t enq = 0, deq = 0;
4001 		bool first_time = true;
4002 		last_time = 0;
4003 
4004 		if (unlikely(num_to_process - dequeued < burst_sz))
4005 			burst_sz = num_to_process - dequeued;
4006 
4007 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4008 		TEST_ASSERT_SUCCESS(ret,
4009 				"rte_bbdev_dec_op_alloc_bulk() failed");
4010 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4011 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4012 					bufs->inputs,
4013 					bufs->hard_outputs,
4014 					bufs->soft_outputs,
4015 					ref_op);
4016 
4017 		/* Set counter to validate the ordering */
4018 		for (j = 0; j < burst_sz; ++j)
4019 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4020 
4021 		start_time = rte_rdtsc_precise();
4022 
4023 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4024 				burst_sz);
4025 		TEST_ASSERT(enq == burst_sz,
4026 				"Error enqueueing burst, expected %u, got %u",
4027 				burst_sz, enq);
4028 
4029 		/* Dequeue */
4030 		do {
4031 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4032 					&ops_deq[deq], burst_sz - deq);
4033 			if (likely(first_time && (deq > 0))) {
4034 				last_time = rte_rdtsc_precise() - start_time;
4035 				first_time = false;
4036 			}
4037 		} while (unlikely(burst_sz != deq));
4038 
4039 		*max_time = RTE_MAX(*max_time, last_time);
4040 		*min_time = RTE_MIN(*min_time, last_time);
4041 		*total_time += last_time;
4042 
4043 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4044 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4045 					vector_mask);
4046 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4047 		}
4048 
4049 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4050 		dequeued += deq;
4051 	}
4052 
4053 	return i;
4054 }
4055 
4056 /* Test case for latency/validation for LDPC Decoder */
4057 static int
4058 latency_test_ldpc_dec(struct rte_mempool *mempool,
4059 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4060 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4061 		const uint16_t num_to_process, uint16_t burst_sz,
4062 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4063 		bool disable_et)
4064 {
4065 	int ret = TEST_SUCCESS;
4066 	uint16_t i, j, dequeued;
4067 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4068 	uint64_t start_time = 0, last_time = 0;
4069 	bool extDdr = ldpc_cap_flags &
4070 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4071 
4072 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4073 		uint16_t enq = 0, deq = 0;
4074 		bool first_time = true;
4075 		last_time = 0;
4076 
4077 		if (unlikely(num_to_process - dequeued < burst_sz))
4078 			burst_sz = num_to_process - dequeued;
4079 
4080 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4081 		TEST_ASSERT_SUCCESS(ret,
4082 				"rte_bbdev_dec_op_alloc_bulk() failed");
4083 
4084 		/* For latency tests we need to disable early termination */
4085 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4086 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4087 			ref_op->ldpc_dec.op_flags -=
4088 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4089 		ref_op->ldpc_dec.iter_max = get_iter_max();
4090 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4091 
4092 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4093 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4094 					bufs->inputs,
4095 					bufs->hard_outputs,
4096 					bufs->soft_outputs,
4097 					bufs->harq_inputs,
4098 					bufs->harq_outputs,
4099 					ref_op);
4100 
4101 		if (extDdr)
4102 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4103 					burst_sz, true);
4104 
4105 		/* Set counter to validate the ordering */
4106 		for (j = 0; j < burst_sz; ++j)
4107 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4108 
4109 		start_time = rte_rdtsc_precise();
4110 
4111 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4112 				&ops_enq[enq], burst_sz);
4113 		TEST_ASSERT(enq == burst_sz,
4114 				"Error enqueueing burst, expected %u, got %u",
4115 				burst_sz, enq);
4116 
4117 		/* Dequeue */
4118 		do {
4119 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4120 					&ops_deq[deq], burst_sz - deq);
4121 			if (likely(first_time && (deq > 0))) {
4122 				last_time = rte_rdtsc_precise() - start_time;
4123 				first_time = false;
4124 			}
4125 		} while (unlikely(burst_sz != deq));
4126 
4127 		*max_time = RTE_MAX(*max_time, last_time);
4128 		*min_time = RTE_MIN(*min_time, last_time);
4129 		*total_time += last_time;
4130 
4131 		if (extDdr)
4132 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4133 
4134 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4135 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4136 					vector_mask);
4137 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4138 		}
4139 
4140 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4141 		dequeued += deq;
4142 	}
4143 	return i;
4144 }
4145 
4146 static int
4147 latency_test_enc(struct rte_mempool *mempool,
4148 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4149 		uint16_t dev_id, uint16_t queue_id,
4150 		const uint16_t num_to_process, uint16_t burst_sz,
4151 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4152 {
4153 	int ret = TEST_SUCCESS;
4154 	uint16_t i, j, dequeued;
4155 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4156 	uint64_t start_time = 0, last_time = 0;
4157 
4158 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4159 		uint16_t enq = 0, deq = 0;
4160 		bool first_time = true;
4161 		last_time = 0;
4162 
4163 		if (unlikely(num_to_process - dequeued < burst_sz))
4164 			burst_sz = num_to_process - dequeued;
4165 
4166 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4167 		TEST_ASSERT_SUCCESS(ret,
4168 				"rte_bbdev_enc_op_alloc_bulk() failed");
4169 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4170 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4171 					bufs->inputs,
4172 					bufs->hard_outputs,
4173 					ref_op);
4174 
4175 		/* Set counter to validate the ordering */
4176 		for (j = 0; j < burst_sz; ++j)
4177 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4178 
4179 		start_time = rte_rdtsc_precise();
4180 
4181 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4182 				burst_sz);
4183 		TEST_ASSERT(enq == burst_sz,
4184 				"Error enqueueing burst, expected %u, got %u",
4185 				burst_sz, enq);
4186 
4187 		/* Dequeue */
4188 		do {
4189 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4190 					&ops_deq[deq], burst_sz - deq);
4191 			if (likely(first_time && (deq > 0))) {
4192 				last_time += rte_rdtsc_precise() - start_time;
4193 				first_time = false;
4194 			}
4195 		} while (unlikely(burst_sz != deq));
4196 
4197 		*max_time = RTE_MAX(*max_time, last_time);
4198 		*min_time = RTE_MIN(*min_time, last_time);
4199 		*total_time += last_time;
4200 
4201 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4202 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4203 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4204 		}
4205 
4206 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4207 		dequeued += deq;
4208 	}
4209 
4210 	return i;
4211 }
4212 
4213 static int
4214 latency_test_ldpc_enc(struct rte_mempool *mempool,
4215 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4216 		uint16_t dev_id, uint16_t queue_id,
4217 		const uint16_t num_to_process, uint16_t burst_sz,
4218 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4219 {
4220 	int ret = TEST_SUCCESS;
4221 	uint16_t i, j, dequeued;
4222 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4223 	uint64_t start_time = 0, last_time = 0;
4224 
4225 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4226 		uint16_t enq = 0, deq = 0;
4227 		bool first_time = true;
4228 		last_time = 0;
4229 
4230 		if (unlikely(num_to_process - dequeued < burst_sz))
4231 			burst_sz = num_to_process - dequeued;
4232 
4233 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4234 		TEST_ASSERT_SUCCESS(ret,
4235 				"rte_bbdev_enc_op_alloc_bulk() failed");
4236 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4237 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4238 					bufs->inputs,
4239 					bufs->hard_outputs,
4240 					ref_op);
4241 
4242 		/* Set counter to validate the ordering */
4243 		for (j = 0; j < burst_sz; ++j)
4244 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4245 
4246 		start_time = rte_rdtsc_precise();
4247 
4248 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4249 				&ops_enq[enq], burst_sz);
4250 		TEST_ASSERT(enq == burst_sz,
4251 				"Error enqueueing burst, expected %u, got %u",
4252 				burst_sz, enq);
4253 
4254 		/* Dequeue */
4255 		do {
4256 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4257 					&ops_deq[deq], burst_sz - deq);
4258 			if (likely(first_time && (deq > 0))) {
4259 				last_time += rte_rdtsc_precise() - start_time;
4260 				first_time = false;
4261 			}
4262 		} while (unlikely(burst_sz != deq));
4263 
4264 		*max_time = RTE_MAX(*max_time, last_time);
4265 		*min_time = RTE_MIN(*min_time, last_time);
4266 		*total_time += last_time;
4267 
4268 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4269 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4270 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4271 		}
4272 
4273 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4274 		dequeued += deq;
4275 	}
4276 
4277 	return i;
4278 }
4279 
4280 /* Common function for running validation and latency test cases */
4281 static int
4282 validation_latency_test(struct active_device *ad,
4283 		struct test_op_params *op_params, bool latency_flag)
4284 {
4285 	int iter;
4286 	uint16_t burst_sz = op_params->burst_sz;
4287 	const uint16_t num_to_process = op_params->num_to_process;
4288 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4289 	const uint16_t queue_id = ad->queue_ids[0];
4290 	struct test_buffers *bufs = NULL;
4291 	struct rte_bbdev_info info;
4292 	uint64_t total_time, min_time, max_time;
4293 	const char *op_type_str;
4294 
4295 	total_time = max_time = 0;
4296 	min_time = UINT64_MAX;
4297 
4298 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4299 			"BURST_SIZE should be <= %u", MAX_BURST);
4300 
4301 	rte_bbdev_info_get(ad->dev_id, &info);
4302 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4303 
4304 	op_type_str = rte_bbdev_op_type_str(op_type);
4305 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4306 
4307 	printf("+ ------------------------------------------------------- +\n");
4308 	if (latency_flag)
4309 		printf("== test: latency\ndev:");
4310 	else
4311 		printf("== test: validation\ndev:");
4312 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4313 			info.dev_name, burst_sz, num_to_process, op_type_str);
4314 
4315 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4316 		iter = latency_test_dec(op_params->mp, bufs,
4317 				op_params->ref_dec_op, op_params->vector_mask,
4318 				ad->dev_id, queue_id, num_to_process,
4319 				burst_sz, &total_time, &min_time, &max_time);
4320 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4321 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4322 				op_params->ref_enc_op, ad->dev_id, queue_id,
4323 				num_to_process, burst_sz, &total_time,
4324 				&min_time, &max_time);
4325 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4326 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4327 				op_params->ref_dec_op, op_params->vector_mask,
4328 				ad->dev_id, queue_id, num_to_process,
4329 				burst_sz, &total_time, &min_time, &max_time,
4330 				latency_flag);
4331 	else /* RTE_BBDEV_OP_TURBO_ENC */
4332 		iter = latency_test_enc(op_params->mp, bufs,
4333 				op_params->ref_enc_op,
4334 				ad->dev_id, queue_id,
4335 				num_to_process, burst_sz, &total_time,
4336 				&min_time, &max_time);
4337 
4338 	if (iter <= 0)
4339 		return TEST_FAILED;
4340 
4341 	printf("Operation latency:\n"
4342 			"\tavg: %lg cycles, %lg us\n"
4343 			"\tmin: %lg cycles, %lg us\n"
4344 			"\tmax: %lg cycles, %lg us\n",
4345 			(double)total_time / (double)iter,
4346 			(double)(total_time * 1000000) / (double)iter /
4347 			(double)rte_get_tsc_hz(), (double)min_time,
4348 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4349 			(double)max_time, (double)(max_time * 1000000) /
4350 			(double)rte_get_tsc_hz());
4351 
4352 	return TEST_SUCCESS;
4353 }
4354 
4355 static int
4356 latency_test(struct active_device *ad, struct test_op_params *op_params)
4357 {
4358 	return validation_latency_test(ad, op_params, true);
4359 }
4360 
4361 static int
4362 validation_test(struct active_device *ad, struct test_op_params *op_params)
4363 {
4364 	return validation_latency_test(ad, op_params, false);
4365 }
4366 
4367 #ifdef RTE_BBDEV_OFFLOAD_COST
4368 static int
4369 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4370 		struct rte_bbdev_stats *stats)
4371 {
4372 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4373 	struct rte_bbdev_stats *q_stats;
4374 
4375 	if (queue_id >= dev->data->num_queues)
4376 		return -1;
4377 
4378 	q_stats = &dev->data->queues[queue_id].queue_stats;
4379 
4380 	stats->enqueued_count = q_stats->enqueued_count;
4381 	stats->dequeued_count = q_stats->dequeued_count;
4382 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4383 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4384 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4385 
4386 	return 0;
4387 }
4388 
4389 static int
4390 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4391 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4392 		uint16_t queue_id, const uint16_t num_to_process,
4393 		uint16_t burst_sz, struct test_time_stats *time_st)
4394 {
4395 	int i, dequeued, ret;
4396 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4397 	uint64_t enq_start_time, deq_start_time;
4398 	uint64_t enq_sw_last_time, deq_last_time;
4399 	struct rte_bbdev_stats stats;
4400 
4401 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4402 		uint16_t enq = 0, deq = 0;
4403 
4404 		if (unlikely(num_to_process - dequeued < burst_sz))
4405 			burst_sz = num_to_process - dequeued;
4406 
4407 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4408 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4409 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4410 					bufs->inputs,
4411 					bufs->hard_outputs,
4412 					bufs->soft_outputs,
4413 					ref_op);
4414 
4415 		/* Start time meas for enqueue function offload latency */
4416 		enq_start_time = rte_rdtsc_precise();
4417 		do {
4418 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4419 					&ops_enq[enq], burst_sz - enq);
4420 		} while (unlikely(burst_sz != enq));
4421 
4422 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4423 		TEST_ASSERT_SUCCESS(ret,
4424 				"Failed to get stats for queue (%u) of device (%u)",
4425 				queue_id, dev_id);
4426 
4427 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4428 				stats.acc_offload_cycles;
4429 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4430 				enq_sw_last_time);
4431 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4432 				enq_sw_last_time);
4433 		time_st->enq_sw_total_time += enq_sw_last_time;
4434 
4435 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4436 				stats.acc_offload_cycles);
4437 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4438 				stats.acc_offload_cycles);
4439 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4440 
4441 		/* give time for device to process ops */
4442 		rte_delay_us(WAIT_OFFLOAD_US);
4443 
4444 		/* Start time meas for dequeue function offload latency */
4445 		deq_start_time = rte_rdtsc_precise();
4446 		/* Dequeue one operation */
4447 		do {
4448 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4449 					&ops_deq[deq], enq);
4450 		} while (unlikely(deq == 0));
4451 
4452 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4453 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4454 				deq_last_time);
4455 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4456 				deq_last_time);
4457 		time_st->deq_total_time += deq_last_time;
4458 
4459 		/* Dequeue remaining operations if needed*/
4460 		while (burst_sz != deq)
4461 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4462 					&ops_deq[deq], burst_sz - deq);
4463 
4464 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4465 		dequeued += deq;
4466 	}
4467 
4468 	return i;
4469 }
4470 
4471 static int
4472 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4473 		struct test_buffers *bufs,
4474 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4475 		uint16_t queue_id, const uint16_t num_to_process,
4476 		uint16_t burst_sz, struct test_time_stats *time_st)
4477 {
4478 	int i, dequeued, ret;
4479 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4480 	uint64_t enq_start_time, deq_start_time;
4481 	uint64_t enq_sw_last_time, deq_last_time;
4482 	struct rte_bbdev_stats stats;
4483 	bool extDdr = ldpc_cap_flags &
4484 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4485 
4486 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4487 		uint16_t enq = 0, deq = 0;
4488 
4489 		if (unlikely(num_to_process - dequeued < burst_sz))
4490 			burst_sz = num_to_process - dequeued;
4491 
4492 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4493 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4494 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4495 					bufs->inputs,
4496 					bufs->hard_outputs,
4497 					bufs->soft_outputs,
4498 					bufs->harq_inputs,
4499 					bufs->harq_outputs,
4500 					ref_op);
4501 
4502 		if (extDdr)
4503 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4504 					burst_sz, true);
4505 
4506 		/* Start time meas for enqueue function offload latency */
4507 		enq_start_time = rte_rdtsc_precise();
4508 		do {
4509 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4510 					&ops_enq[enq], burst_sz - enq);
4511 		} while (unlikely(burst_sz != enq));
4512 
4513 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4514 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4515 		TEST_ASSERT_SUCCESS(ret,
4516 				"Failed to get stats for queue (%u) of device (%u)",
4517 				queue_id, dev_id);
4518 
4519 		enq_sw_last_time -= stats.acc_offload_cycles;
4520 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4521 				enq_sw_last_time);
4522 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4523 				enq_sw_last_time);
4524 		time_st->enq_sw_total_time += enq_sw_last_time;
4525 
4526 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4527 				stats.acc_offload_cycles);
4528 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4529 				stats.acc_offload_cycles);
4530 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4531 
4532 		/* give time for device to process ops */
4533 		rte_delay_us(WAIT_OFFLOAD_US);
4534 
4535 		/* Start time meas for dequeue function offload latency */
4536 		deq_start_time = rte_rdtsc_precise();
4537 		/* Dequeue one operation */
4538 		do {
4539 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4540 					&ops_deq[deq], enq);
4541 		} while (unlikely(deq == 0));
4542 
4543 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4544 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4545 				deq_last_time);
4546 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4547 				deq_last_time);
4548 		time_st->deq_total_time += deq_last_time;
4549 
4550 		/* Dequeue remaining operations if needed*/
4551 		while (burst_sz != deq)
4552 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4553 					&ops_deq[deq], burst_sz - deq);
4554 
4555 		if (extDdr) {
4556 			/* Read loopback is not thread safe */
4557 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4558 		}
4559 
4560 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4561 		dequeued += deq;
4562 	}
4563 
4564 	return i;
4565 }
4566 
4567 static int
4568 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4569 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4570 		uint16_t queue_id, const uint16_t num_to_process,
4571 		uint16_t burst_sz, struct test_time_stats *time_st)
4572 {
4573 	int i, dequeued, ret;
4574 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4575 	uint64_t enq_start_time, deq_start_time;
4576 	uint64_t enq_sw_last_time, deq_last_time;
4577 	struct rte_bbdev_stats stats;
4578 
4579 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4580 		uint16_t enq = 0, deq = 0;
4581 
4582 		if (unlikely(num_to_process - dequeued < burst_sz))
4583 			burst_sz = num_to_process - dequeued;
4584 
4585 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4586 		TEST_ASSERT_SUCCESS(ret,
4587 				"rte_bbdev_enc_op_alloc_bulk() failed");
4588 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4589 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4590 					bufs->inputs,
4591 					bufs->hard_outputs,
4592 					ref_op);
4593 
4594 		/* Start time meas for enqueue function offload latency */
4595 		enq_start_time = rte_rdtsc_precise();
4596 		do {
4597 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4598 					&ops_enq[enq], burst_sz - enq);
4599 		} while (unlikely(burst_sz != enq));
4600 
4601 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4602 
4603 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4604 		TEST_ASSERT_SUCCESS(ret,
4605 				"Failed to get stats for queue (%u) of device (%u)",
4606 				queue_id, dev_id);
4607 		enq_sw_last_time -= stats.acc_offload_cycles;
4608 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4609 				enq_sw_last_time);
4610 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4611 				enq_sw_last_time);
4612 		time_st->enq_sw_total_time += enq_sw_last_time;
4613 
4614 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4615 				stats.acc_offload_cycles);
4616 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4617 				stats.acc_offload_cycles);
4618 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4619 
4620 		/* give time for device to process ops */
4621 		rte_delay_us(WAIT_OFFLOAD_US);
4622 
4623 		/* Start time meas for dequeue function offload latency */
4624 		deq_start_time = rte_rdtsc_precise();
4625 		/* Dequeue one operation */
4626 		do {
4627 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4628 					&ops_deq[deq], enq);
4629 		} while (unlikely(deq == 0));
4630 
4631 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4632 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4633 				deq_last_time);
4634 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4635 				deq_last_time);
4636 		time_st->deq_total_time += deq_last_time;
4637 
4638 		while (burst_sz != deq)
4639 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4640 					&ops_deq[deq], burst_sz - deq);
4641 
4642 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4643 		dequeued += deq;
4644 	}
4645 
4646 	return i;
4647 }
4648 
4649 static int
4650 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4651 		struct test_buffers *bufs,
4652 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4653 		uint16_t queue_id, const uint16_t num_to_process,
4654 		uint16_t burst_sz, struct test_time_stats *time_st)
4655 {
4656 	int i, dequeued, ret;
4657 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4658 	uint64_t enq_start_time, deq_start_time;
4659 	uint64_t enq_sw_last_time, deq_last_time;
4660 	struct rte_bbdev_stats stats;
4661 
4662 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4663 		uint16_t enq = 0, deq = 0;
4664 
4665 		if (unlikely(num_to_process - dequeued < burst_sz))
4666 			burst_sz = num_to_process - dequeued;
4667 
4668 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4669 		TEST_ASSERT_SUCCESS(ret,
4670 				"rte_bbdev_enc_op_alloc_bulk() failed");
4671 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4672 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4673 					bufs->inputs,
4674 					bufs->hard_outputs,
4675 					ref_op);
4676 
4677 		/* Start time meas for enqueue function offload latency */
4678 		enq_start_time = rte_rdtsc_precise();
4679 		do {
4680 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4681 					&ops_enq[enq], burst_sz - enq);
4682 		} while (unlikely(burst_sz != enq));
4683 
4684 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4685 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4686 		TEST_ASSERT_SUCCESS(ret,
4687 				"Failed to get stats for queue (%u) of device (%u)",
4688 				queue_id, dev_id);
4689 
4690 		enq_sw_last_time -= stats.acc_offload_cycles;
4691 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4692 				enq_sw_last_time);
4693 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4694 				enq_sw_last_time);
4695 		time_st->enq_sw_total_time += enq_sw_last_time;
4696 
4697 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4698 				stats.acc_offload_cycles);
4699 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4700 				stats.acc_offload_cycles);
4701 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4702 
4703 		/* give time for device to process ops */
4704 		rte_delay_us(WAIT_OFFLOAD_US);
4705 
4706 		/* Start time meas for dequeue function offload latency */
4707 		deq_start_time = rte_rdtsc_precise();
4708 		/* Dequeue one operation */
4709 		do {
4710 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4711 					&ops_deq[deq], enq);
4712 		} while (unlikely(deq == 0));
4713 
4714 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4715 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4716 				deq_last_time);
4717 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4718 				deq_last_time);
4719 		time_st->deq_total_time += deq_last_time;
4720 
4721 		while (burst_sz != deq)
4722 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4723 					&ops_deq[deq], burst_sz - deq);
4724 
4725 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4726 		dequeued += deq;
4727 	}
4728 
4729 	return i;
4730 }
4731 #endif
4732 
4733 static int
4734 offload_cost_test(struct active_device *ad,
4735 		struct test_op_params *op_params)
4736 {
4737 #ifndef RTE_BBDEV_OFFLOAD_COST
4738 	RTE_SET_USED(ad);
4739 	RTE_SET_USED(op_params);
4740 	printf("Offload latency test is disabled.\n");
4741 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4742 	return TEST_SKIPPED;
4743 #else
4744 	int iter;
4745 	uint16_t burst_sz = op_params->burst_sz;
4746 	const uint16_t num_to_process = op_params->num_to_process;
4747 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4748 	const uint16_t queue_id = ad->queue_ids[0];
4749 	struct test_buffers *bufs = NULL;
4750 	struct rte_bbdev_info info;
4751 	const char *op_type_str;
4752 	struct test_time_stats time_st;
4753 
4754 	memset(&time_st, 0, sizeof(struct test_time_stats));
4755 	time_st.enq_sw_min_time = UINT64_MAX;
4756 	time_st.enq_acc_min_time = UINT64_MAX;
4757 	time_st.deq_min_time = UINT64_MAX;
4758 
4759 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4760 			"BURST_SIZE should be <= %u", MAX_BURST);
4761 
4762 	rte_bbdev_info_get(ad->dev_id, &info);
4763 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4764 
4765 	op_type_str = rte_bbdev_op_type_str(op_type);
4766 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4767 
4768 	printf("+ ------------------------------------------------------- +\n");
4769 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4770 			info.dev_name, burst_sz, num_to_process, op_type_str);
4771 
4772 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4773 		iter = offload_latency_test_dec(op_params->mp, bufs,
4774 				op_params->ref_dec_op, ad->dev_id, queue_id,
4775 				num_to_process, burst_sz, &time_st);
4776 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4777 		iter = offload_latency_test_enc(op_params->mp, bufs,
4778 				op_params->ref_enc_op, ad->dev_id, queue_id,
4779 				num_to_process, burst_sz, &time_st);
4780 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4781 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4782 				op_params->ref_enc_op, ad->dev_id, queue_id,
4783 				num_to_process, burst_sz, &time_st);
4784 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4785 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4786 			op_params->ref_dec_op, ad->dev_id, queue_id,
4787 			num_to_process, burst_sz, &time_st);
4788 	else
4789 		iter = offload_latency_test_enc(op_params->mp, bufs,
4790 				op_params->ref_enc_op, ad->dev_id, queue_id,
4791 				num_to_process, burst_sz, &time_st);
4792 
4793 	if (iter <= 0)
4794 		return TEST_FAILED;
4795 
4796 	printf("Enqueue driver offload cost latency:\n"
4797 			"\tavg: %lg cycles, %lg us\n"
4798 			"\tmin: %lg cycles, %lg us\n"
4799 			"\tmax: %lg cycles, %lg us\n"
4800 			"Enqueue accelerator offload cost latency:\n"
4801 			"\tavg: %lg cycles, %lg us\n"
4802 			"\tmin: %lg cycles, %lg us\n"
4803 			"\tmax: %lg cycles, %lg us\n",
4804 			(double)time_st.enq_sw_total_time / (double)iter,
4805 			(double)(time_st.enq_sw_total_time * 1000000) /
4806 			(double)iter / (double)rte_get_tsc_hz(),
4807 			(double)time_st.enq_sw_min_time,
4808 			(double)(time_st.enq_sw_min_time * 1000000) /
4809 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4810 			(double)(time_st.enq_sw_max_time * 1000000) /
4811 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4812 			(double)iter,
4813 			(double)(time_st.enq_acc_total_time * 1000000) /
4814 			(double)iter / (double)rte_get_tsc_hz(),
4815 			(double)time_st.enq_acc_min_time,
4816 			(double)(time_st.enq_acc_min_time * 1000000) /
4817 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4818 			(double)(time_st.enq_acc_max_time * 1000000) /
4819 			rte_get_tsc_hz());
4820 
4821 	printf("Dequeue offload cost latency - one op:\n"
4822 			"\tavg: %lg cycles, %lg us\n"
4823 			"\tmin: %lg cycles, %lg us\n"
4824 			"\tmax: %lg cycles, %lg us\n",
4825 			(double)time_st.deq_total_time / (double)iter,
4826 			(double)(time_st.deq_total_time * 1000000) /
4827 			(double)iter / (double)rte_get_tsc_hz(),
4828 			(double)time_st.deq_min_time,
4829 			(double)(time_st.deq_min_time * 1000000) /
4830 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4831 			(double)(time_st.deq_max_time * 1000000) /
4832 			rte_get_tsc_hz());
4833 
4834 	struct rte_bbdev_stats stats = {0};
4835 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4836 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4837 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4838 				"Mismatch in enqueue count %10"PRIu64" %d",
4839 				stats.enqueued_count, num_to_process);
4840 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4841 				"Mismatch in dequeue count %10"PRIu64" %d",
4842 				stats.dequeued_count, num_to_process);
4843 	}
4844 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4845 			"Enqueue count Error %10"PRIu64"",
4846 			stats.enqueue_err_count);
4847 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4848 			"Dequeue count Error (%10"PRIu64"",
4849 			stats.dequeue_err_count);
4850 
4851 	return TEST_SUCCESS;
4852 #endif
4853 }
4854 
4855 #ifdef RTE_BBDEV_OFFLOAD_COST
4856 static int
4857 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4858 		const uint16_t num_to_process, uint16_t burst_sz,
4859 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4860 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4861 {
4862 	int i, deq_total;
4863 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4864 	uint64_t deq_start_time, deq_last_time;
4865 
4866 	/* Test deq offload latency from an empty queue */
4867 
4868 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4869 			++i, deq_total += burst_sz) {
4870 		deq_start_time = rte_rdtsc_precise();
4871 
4872 		if (unlikely(num_to_process - deq_total < burst_sz))
4873 			burst_sz = num_to_process - deq_total;
4874 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4875 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4876 					burst_sz);
4877 		else
4878 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4879 					burst_sz);
4880 
4881 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4882 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4883 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4884 		*deq_total_time += deq_last_time;
4885 	}
4886 
4887 	return i;
4888 }
4889 
4890 static int
4891 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4892 		const uint16_t num_to_process, uint16_t burst_sz,
4893 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4894 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4895 {
4896 	int i, deq_total;
4897 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4898 	uint64_t deq_start_time, deq_last_time;
4899 
4900 	/* Test deq offload latency from an empty queue */
4901 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4902 			++i, deq_total += burst_sz) {
4903 		deq_start_time = rte_rdtsc_precise();
4904 
4905 		if (unlikely(num_to_process - deq_total < burst_sz))
4906 			burst_sz = num_to_process - deq_total;
4907 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4908 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4909 					burst_sz);
4910 		else
4911 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4912 					burst_sz);
4913 
4914 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4915 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4916 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4917 		*deq_total_time += deq_last_time;
4918 	}
4919 
4920 	return i;
4921 }
4922 
4923 #endif
4924 
4925 static int
4926 offload_latency_empty_q_test(struct active_device *ad,
4927 		struct test_op_params *op_params)
4928 {
4929 #ifndef RTE_BBDEV_OFFLOAD_COST
4930 	RTE_SET_USED(ad);
4931 	RTE_SET_USED(op_params);
4932 	printf("Offload latency empty dequeue test is disabled.\n");
4933 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4934 	return TEST_SKIPPED;
4935 #else
4936 	int iter;
4937 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4938 	uint16_t burst_sz = op_params->burst_sz;
4939 	const uint16_t num_to_process = op_params->num_to_process;
4940 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4941 	const uint16_t queue_id = ad->queue_ids[0];
4942 	struct rte_bbdev_info info;
4943 	const char *op_type_str;
4944 
4945 	deq_total_time = deq_max_time = 0;
4946 	deq_min_time = UINT64_MAX;
4947 
4948 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4949 			"BURST_SIZE should be <= %u", MAX_BURST);
4950 
4951 	rte_bbdev_info_get(ad->dev_id, &info);
4952 
4953 	op_type_str = rte_bbdev_op_type_str(op_type);
4954 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4955 
4956 	printf("+ ------------------------------------------------------- +\n");
4957 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4958 			info.dev_name, burst_sz, num_to_process, op_type_str);
4959 
4960 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4961 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4962 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4963 				num_to_process, burst_sz, &deq_total_time,
4964 				&deq_min_time, &deq_max_time, op_type);
4965 	else
4966 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4967 				num_to_process, burst_sz, &deq_total_time,
4968 				&deq_min_time, &deq_max_time, op_type);
4969 
4970 	if (iter <= 0)
4971 		return TEST_FAILED;
4972 
4973 	printf("Empty dequeue offload:\n"
4974 			"\tavg: %lg cycles, %lg us\n"
4975 			"\tmin: %lg cycles, %lg us\n"
4976 			"\tmax: %lg cycles, %lg us\n",
4977 			(double)deq_total_time / (double)iter,
4978 			(double)(deq_total_time * 1000000) / (double)iter /
4979 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4980 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4981 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4982 			rte_get_tsc_hz());
4983 
4984 	return TEST_SUCCESS;
4985 #endif
4986 }
4987 
4988 static int
4989 bler_tc(void)
4990 {
4991 	return run_test_case(bler_test);
4992 }
4993 
4994 static int
4995 throughput_tc(void)
4996 {
4997 	return run_test_case(throughput_test);
4998 }
4999 
5000 static int
5001 offload_cost_tc(void)
5002 {
5003 	return run_test_case(offload_cost_test);
5004 }
5005 
5006 static int
5007 offload_latency_empty_q_tc(void)
5008 {
5009 	return run_test_case(offload_latency_empty_q_test);
5010 }
5011 
5012 static int
5013 latency_tc(void)
5014 {
5015 	return run_test_case(latency_test);
5016 }
5017 
5018 static int
5019 validation_tc(void)
5020 {
5021 	return run_test_case(validation_test);
5022 }
5023 
5024 static int
5025 interrupt_tc(void)
5026 {
5027 	return run_test_case(throughput_test);
5028 }
5029 
5030 static struct unit_test_suite bbdev_bler_testsuite = {
5031 	.suite_name = "BBdev BLER Tests",
5032 	.setup = testsuite_setup,
5033 	.teardown = testsuite_teardown,
5034 	.unit_test_cases = {
5035 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5036 		TEST_CASES_END() /**< NULL terminate unit test array */
5037 	}
5038 };
5039 
5040 static struct unit_test_suite bbdev_throughput_testsuite = {
5041 	.suite_name = "BBdev Throughput Tests",
5042 	.setup = testsuite_setup,
5043 	.teardown = testsuite_teardown,
5044 	.unit_test_cases = {
5045 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5046 		TEST_CASES_END() /**< NULL terminate unit test array */
5047 	}
5048 };
5049 
5050 static struct unit_test_suite bbdev_validation_testsuite = {
5051 	.suite_name = "BBdev Validation Tests",
5052 	.setup = testsuite_setup,
5053 	.teardown = testsuite_teardown,
5054 	.unit_test_cases = {
5055 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5056 		TEST_CASES_END() /**< NULL terminate unit test array */
5057 	}
5058 };
5059 
5060 static struct unit_test_suite bbdev_latency_testsuite = {
5061 	.suite_name = "BBdev Latency Tests",
5062 	.setup = testsuite_setup,
5063 	.teardown = testsuite_teardown,
5064 	.unit_test_cases = {
5065 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5066 		TEST_CASES_END() /**< NULL terminate unit test array */
5067 	}
5068 };
5069 
5070 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5071 	.suite_name = "BBdev Offload Cost Tests",
5072 	.setup = testsuite_setup,
5073 	.teardown = testsuite_teardown,
5074 	.unit_test_cases = {
5075 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5076 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5077 		TEST_CASES_END() /**< NULL terminate unit test array */
5078 	}
5079 };
5080 
5081 static struct unit_test_suite bbdev_interrupt_testsuite = {
5082 	.suite_name = "BBdev Interrupt Tests",
5083 	.setup = interrupt_testsuite_setup,
5084 	.teardown = testsuite_teardown,
5085 	.unit_test_cases = {
5086 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5087 		TEST_CASES_END() /**< NULL terminate unit test array */
5088 	}
5089 };
5090 
5091 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5092 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5093 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5094 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5095 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5096 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5097