xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <inttypes.h>
8 #include <math.h>
9 
10 #include <rte_eal.h>
11 #include <rte_common.h>
12 #include <rte_dev.h>
13 #include <rte_launch.h>
14 #include <rte_bbdev.h>
15 #include <rte_cycles.h>
16 #include <rte_lcore.h>
17 #include <rte_malloc.h>
18 #include <rte_random.h>
19 #include <rte_hexdump.h>
20 #include <rte_interrupts.h>
21 
22 #include "main.h"
23 #include "test_bbdev_vector.h"
24 
25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
26 
27 #define MAX_QUEUES RTE_MAX_LCORE
28 #define TEST_REPETITIONS 100
29 #define WAIT_OFFLOAD_US 1000
30 
31 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
32 #include <fpga_lte_fec.h>
33 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
34 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
35 #define VF_UL_4G_QUEUE_VALUE 4
36 #define VF_DL_4G_QUEUE_VALUE 4
37 #define UL_4G_BANDWIDTH 3
38 #define DL_4G_BANDWIDTH 3
39 #define UL_4G_LOAD_BALANCE 128
40 #define DL_4G_LOAD_BALANCE 128
41 #define FLR_4G_TIMEOUT 610
42 #endif
43 
44 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
45 #include <rte_pmd_fpga_5gnr_fec.h>
46 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
47 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
48 #define VF_UL_5G_QUEUE_VALUE 4
49 #define VF_DL_5G_QUEUE_VALUE 4
50 #define UL_5G_BANDWIDTH 3
51 #define DL_5G_BANDWIDTH 3
52 #define UL_5G_LOAD_BALANCE 128
53 #define DL_5G_LOAD_BALANCE 128
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC
57 #include <rte_acc_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #define ACC200PF_DRIVER_NAME   ("intel_acc200_pf")
67 #define ACC200VF_DRIVER_NAME   ("intel_acc200_vf")
68 #define ACC200_QMGR_NUM_AQS 16
69 #define ACC200_QMGR_NUM_QGS 2
70 #define ACC200_QMGR_AQ_DEPTH 5
71 #define ACC200_QMGR_INVALID_IDX -1
72 #define ACC200_QMGR_RR 1
73 #define ACC200_QOS_GBR 0
74 #endif
75 
76 #define OPS_CACHE_SIZE 256U
77 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
78 
79 #define SYNC_WAIT 0
80 #define SYNC_START 1
81 #define INVALID_OPAQUE -1
82 
83 #define INVALID_QUEUE_ID -1
84 /* Increment for next code block in external HARQ memory */
85 #define HARQ_INCR 32768
86 /* Headroom for filler LLRs insertion in HARQ buffer */
87 #define FILLER_HEADROOM 1024
88 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
89 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
90 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
91 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
92 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
93 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
94 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
95 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
96 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
97 
98 static struct test_bbdev_vector test_vector;
99 
100 /* Switch between PMD and Interrupt for throughput TC */
101 static bool intr_enabled;
102 
103 /* LLR arithmetic representation for numerical conversion */
104 static int ldpc_llr_decimals;
105 static int ldpc_llr_size;
106 /* Keep track of the LDPC decoder device capability flag */
107 static uint32_t ldpc_cap_flags;
108 
109 /* Represents tested active devices */
110 static struct active_device {
111 	const char *driver_name;
112 	uint8_t dev_id;
113 	uint16_t supported_ops;
114 	uint16_t queue_ids[MAX_QUEUES];
115 	uint16_t nb_queues;
116 	struct rte_mempool *ops_mempool;
117 	struct rte_mempool *in_mbuf_pool;
118 	struct rte_mempool *hard_out_mbuf_pool;
119 	struct rte_mempool *soft_out_mbuf_pool;
120 	struct rte_mempool *harq_in_mbuf_pool;
121 	struct rte_mempool *harq_out_mbuf_pool;
122 } active_devs[RTE_BBDEV_MAX_DEVS];
123 
124 static uint8_t nb_active_devs;
125 
126 /* Data buffers used by BBDEV ops */
127 struct test_buffers {
128 	struct rte_bbdev_op_data *inputs;
129 	struct rte_bbdev_op_data *hard_outputs;
130 	struct rte_bbdev_op_data *soft_outputs;
131 	struct rte_bbdev_op_data *harq_inputs;
132 	struct rte_bbdev_op_data *harq_outputs;
133 };
134 
135 /* Operation parameters specific for given test case */
136 struct test_op_params {
137 	struct rte_mempool *mp;
138 	struct rte_bbdev_dec_op *ref_dec_op;
139 	struct rte_bbdev_enc_op *ref_enc_op;
140 	struct rte_bbdev_fft_op *ref_fft_op;
141 	uint16_t burst_sz;
142 	uint16_t num_to_process;
143 	uint16_t num_lcores;
144 	int vector_mask;
145 	uint16_t sync;
146 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
147 };
148 
149 /* Contains per lcore params */
150 struct thread_params {
151 	uint8_t dev_id;
152 	uint16_t queue_id;
153 	uint32_t lcore_id;
154 	uint64_t start_time;
155 	double ops_per_sec;
156 	double mbps;
157 	uint8_t iter_count;
158 	double iter_average;
159 	double bler;
160 	uint16_t nb_dequeued;
161 	int16_t processing_status;
162 	uint16_t burst_sz;
163 	struct test_op_params *op_params;
164 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
165 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
166 	struct rte_bbdev_fft_op *fft_ops[MAX_BURST];
167 };
168 
169 /* Stores time statistics */
170 struct test_time_stats {
171 	/* Stores software enqueue total working time */
172 	uint64_t enq_sw_total_time;
173 	/* Stores minimum value of software enqueue working time */
174 	uint64_t enq_sw_min_time;
175 	/* Stores maximum value of software enqueue working time */
176 	uint64_t enq_sw_max_time;
177 	/* Stores turbo enqueue total working time */
178 	uint64_t enq_acc_total_time;
179 	/* Stores minimum value of accelerator enqueue working time */
180 	uint64_t enq_acc_min_time;
181 	/* Stores maximum value of accelerator enqueue working time */
182 	uint64_t enq_acc_max_time;
183 	/* Stores dequeue total working time */
184 	uint64_t deq_total_time;
185 	/* Stores minimum value of dequeue working time */
186 	uint64_t deq_min_time;
187 	/* Stores maximum value of dequeue working time */
188 	uint64_t deq_max_time;
189 };
190 
191 typedef int (test_case_function)(struct active_device *ad,
192 		struct test_op_params *op_params);
193 
194 static inline void
195 mbuf_reset(struct rte_mbuf *m)
196 {
197 	m->pkt_len = 0;
198 
199 	do {
200 		m->data_len = 0;
201 		m = m->next;
202 	} while (m != NULL);
203 }
204 
205 /* Read flag value 0/1 from bitmap */
206 static inline bool
207 check_bit(uint32_t bitmap, uint32_t bitmask)
208 {
209 	return bitmap & bitmask;
210 }
211 
212 static inline void
213 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
214 {
215 	ad->supported_ops |= (1 << op_type);
216 }
217 
218 static inline bool
219 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
220 {
221 	return ad->supported_ops & (1 << op_type);
222 }
223 
224 static inline bool
225 flags_match(uint32_t flags_req, uint32_t flags_present)
226 {
227 	return (flags_req & flags_present) == flags_req;
228 }
229 
230 static void
231 clear_soft_out_cap(uint32_t *op_flags)
232 {
233 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
234 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
235 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
236 }
237 
238 /* This API is to convert all the test vector op data entries
239  * to big endian format. It is used when the device supports
240  * the input in the big endian format.
241  */
242 static inline void
243 convert_op_data_to_be(void)
244 {
245 	struct op_data_entries *op;
246 	enum op_data_type type;
247 	uint8_t nb_segs, *rem_data, temp;
248 	uint32_t *data, len;
249 	int complete, rem, i, j;
250 
251 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
252 		nb_segs = test_vector.entries[type].nb_segments;
253 		op = &test_vector.entries[type];
254 
255 		/* Invert byte endianness for all the segments */
256 		for (i = 0; i < nb_segs; ++i) {
257 			len = op->segments[i].length;
258 			data = op->segments[i].addr;
259 
260 			/* Swap complete u32 bytes */
261 			complete = len / 4;
262 			for (j = 0; j < complete; j++)
263 				data[j] = rte_bswap32(data[j]);
264 
265 			/* Swap any remaining bytes */
266 			rem = len % 4;
267 			rem_data = (uint8_t *)&data[j];
268 			for (j = 0; j < rem/2; j++) {
269 				temp = rem_data[j];
270 				rem_data[j] = rem_data[rem - j - 1];
271 				rem_data[rem - j - 1] = temp;
272 			}
273 		}
274 	}
275 }
276 
277 static int
278 check_dev_cap(const struct rte_bbdev_info *dev_info)
279 {
280 	unsigned int i;
281 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
282 		nb_harq_inputs, nb_harq_outputs;
283 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
284 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
285 
286 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
287 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
288 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
289 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
290 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
291 
292 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
293 		if (op_cap->type != test_vector.op_type)
294 			continue;
295 
296 		if (dev_data_endianness == RTE_BIG_ENDIAN)
297 			convert_op_data_to_be();
298 
299 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
300 			const struct rte_bbdev_op_cap_turbo_dec *cap =
301 					&op_cap->cap.turbo_dec;
302 			/* Ignore lack of soft output capability, just skip
303 			 * checking if soft output is valid.
304 			 */
305 			if ((test_vector.turbo_dec.op_flags &
306 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
307 					!(cap->capability_flags &
308 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
309 				printf(
310 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
311 					dev_info->dev_name);
312 				clear_soft_out_cap(
313 					&test_vector.turbo_dec.op_flags);
314 			}
315 
316 			if (!flags_match(test_vector.turbo_dec.op_flags,
317 					cap->capability_flags))
318 				return TEST_FAILED;
319 			if (nb_inputs > cap->num_buffers_src) {
320 				printf("Too many inputs defined: %u, max: %u\n",
321 					nb_inputs, cap->num_buffers_src);
322 				return TEST_FAILED;
323 			}
324 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
325 					(test_vector.turbo_dec.op_flags &
326 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
327 				printf(
328 					"Too many soft outputs defined: %u, max: %u\n",
329 						nb_soft_outputs,
330 						cap->num_buffers_soft_out);
331 				return TEST_FAILED;
332 			}
333 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
334 				printf(
335 					"Too many hard outputs defined: %u, max: %u\n",
336 						nb_hard_outputs,
337 						cap->num_buffers_hard_out);
338 				return TEST_FAILED;
339 			}
340 			if (intr_enabled && !(cap->capability_flags &
341 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
342 				printf(
343 					"Dequeue interrupts are not supported!\n");
344 				return TEST_FAILED;
345 			}
346 
347 			return TEST_SUCCESS;
348 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
349 			const struct rte_bbdev_op_cap_turbo_enc *cap =
350 					&op_cap->cap.turbo_enc;
351 
352 			if (!flags_match(test_vector.turbo_enc.op_flags,
353 					cap->capability_flags))
354 				return TEST_FAILED;
355 			if (nb_inputs > cap->num_buffers_src) {
356 				printf("Too many inputs defined: %u, max: %u\n",
357 					nb_inputs, cap->num_buffers_src);
358 				return TEST_FAILED;
359 			}
360 			if (nb_hard_outputs > cap->num_buffers_dst) {
361 				printf(
362 					"Too many hard outputs defined: %u, max: %u\n",
363 					nb_hard_outputs, cap->num_buffers_dst);
364 				return TEST_FAILED;
365 			}
366 			if (intr_enabled && !(cap->capability_flags &
367 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
368 				printf(
369 					"Dequeue interrupts are not supported!\n");
370 				return TEST_FAILED;
371 			}
372 
373 			return TEST_SUCCESS;
374 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
375 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
376 					&op_cap->cap.ldpc_enc;
377 
378 			if (!flags_match(test_vector.ldpc_enc.op_flags,
379 					cap->capability_flags)){
380 				printf("Flag Mismatch\n");
381 				return TEST_FAILED;
382 			}
383 			if (nb_inputs > cap->num_buffers_src) {
384 				printf("Too many inputs defined: %u, max: %u\n",
385 					nb_inputs, cap->num_buffers_src);
386 				return TEST_FAILED;
387 			}
388 			if (nb_hard_outputs > cap->num_buffers_dst) {
389 				printf(
390 					"Too many hard outputs defined: %u, max: %u\n",
391 					nb_hard_outputs, cap->num_buffers_dst);
392 				return TEST_FAILED;
393 			}
394 			if (intr_enabled && !(cap->capability_flags &
395 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
396 				printf(
397 					"Dequeue interrupts are not supported!\n");
398 				return TEST_FAILED;
399 			}
400 
401 			return TEST_SUCCESS;
402 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
403 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
404 					&op_cap->cap.ldpc_dec;
405 
406 			if (!flags_match(test_vector.ldpc_dec.op_flags,
407 					cap->capability_flags)){
408 				printf("Flag Mismatch\n");
409 				return TEST_FAILED;
410 			}
411 			if (nb_inputs > cap->num_buffers_src) {
412 				printf("Too many inputs defined: %u, max: %u\n",
413 					nb_inputs, cap->num_buffers_src);
414 				return TEST_FAILED;
415 			}
416 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
417 				printf(
418 					"Too many hard outputs defined: %u, max: %u\n",
419 					nb_hard_outputs,
420 					cap->num_buffers_hard_out);
421 				return TEST_FAILED;
422 			}
423 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
424 				printf(
425 					"Too many HARQ inputs defined: %u, max: %u\n",
426 					nb_harq_inputs,
427 					cap->num_buffers_hard_out);
428 				return TEST_FAILED;
429 			}
430 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
431 				printf(
432 					"Too many HARQ outputs defined: %u, max: %u\n",
433 					nb_harq_outputs,
434 					cap->num_buffers_hard_out);
435 				return TEST_FAILED;
436 			}
437 			if (intr_enabled && !(cap->capability_flags &
438 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
439 				printf(
440 					"Dequeue interrupts are not supported!\n");
441 				return TEST_FAILED;
442 			}
443 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
444 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
445 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
446 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
447 					))) {
448 				printf("Skip loop-back with interrupt\n");
449 				return TEST_FAILED;
450 			}
451 			return TEST_SUCCESS;
452 		} else if (op_cap->type == RTE_BBDEV_OP_FFT) {
453 			const struct rte_bbdev_op_cap_fft *cap = &op_cap->cap.fft;
454 
455 			if (!flags_match(test_vector.fft.op_flags, cap->capability_flags)) {
456 				printf("Flag Mismatch\n");
457 				return TEST_FAILED;
458 			}
459 			if (nb_inputs > cap->num_buffers_src) {
460 				printf("Too many inputs defined: %u, max: %u\n",
461 					nb_inputs, cap->num_buffers_src);
462 				return TEST_FAILED;
463 			}
464 			return TEST_SUCCESS;
465 		}
466 	}
467 
468 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
469 		return TEST_SUCCESS; /* Special case for NULL device */
470 
471 	return TEST_FAILED;
472 }
473 
474 /* calculates optimal mempool size not smaller than the val */
475 static unsigned int
476 optimal_mempool_size(unsigned int val)
477 {
478 	return rte_align32pow2(val + 1) - 1;
479 }
480 
481 /* allocates mbuf mempool for inputs and outputs */
482 static struct rte_mempool *
483 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
484 		int socket_id, unsigned int mbuf_pool_size,
485 		const char *op_type_str)
486 {
487 	unsigned int i;
488 	uint32_t max_seg_sz = 0;
489 	char pool_name[RTE_MEMPOOL_NAMESIZE];
490 
491 	/* find max input segment size */
492 	for (i = 0; i < entries->nb_segments; ++i)
493 		if (entries->segments[i].length > max_seg_sz)
494 			max_seg_sz = entries->segments[i].length;
495 
496 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
497 			dev_id);
498 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
499 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
500 					+ FILLER_HEADROOM,
501 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
502 }
503 
504 static int
505 create_mempools(struct active_device *ad, int socket_id,
506 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
507 {
508 	struct rte_mempool *mp;
509 	unsigned int ops_pool_size, mbuf_pool_size = 0;
510 	char pool_name[RTE_MEMPOOL_NAMESIZE];
511 	const char *op_type_str;
512 	enum rte_bbdev_op_type op_type = org_op_type;
513 
514 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
515 	struct op_data_entries *hard_out =
516 			&test_vector.entries[DATA_HARD_OUTPUT];
517 	struct op_data_entries *soft_out =
518 			&test_vector.entries[DATA_SOFT_OUTPUT];
519 	struct op_data_entries *harq_in =
520 			&test_vector.entries[DATA_HARQ_INPUT];
521 	struct op_data_entries *harq_out =
522 			&test_vector.entries[DATA_HARQ_OUTPUT];
523 
524 	/* allocate ops mempool */
525 	ops_pool_size = optimal_mempool_size(RTE_MAX(
526 			/* Ops used plus 1 reference op */
527 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
528 			/* Minimal cache size plus 1 reference op */
529 			(unsigned int)(1.5 * rte_lcore_count() *
530 					OPS_CACHE_SIZE + 1)),
531 			OPS_POOL_SIZE_MIN));
532 
533 	if (org_op_type == RTE_BBDEV_OP_NONE)
534 		op_type = RTE_BBDEV_OP_TURBO_ENC;
535 
536 	op_type_str = rte_bbdev_op_type_str(op_type);
537 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
538 
539 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
540 			ad->dev_id);
541 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
542 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
543 	TEST_ASSERT_NOT_NULL(mp,
544 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
545 			ops_pool_size,
546 			ad->dev_id,
547 			socket_id);
548 	ad->ops_mempool = mp;
549 
550 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
551 	if (org_op_type == RTE_BBDEV_OP_NONE)
552 		return TEST_SUCCESS;
553 
554 	/* Inputs */
555 	if (in->nb_segments > 0) {
556 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
557 				in->nb_segments);
558 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
559 				mbuf_pool_size, "in");
560 		TEST_ASSERT_NOT_NULL(mp,
561 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
562 				mbuf_pool_size,
563 				ad->dev_id,
564 				socket_id);
565 		ad->in_mbuf_pool = mp;
566 	}
567 
568 	/* Hard outputs */
569 	if (hard_out->nb_segments > 0) {
570 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
571 				hard_out->nb_segments);
572 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
573 				mbuf_pool_size,
574 				"hard_out");
575 		TEST_ASSERT_NOT_NULL(mp,
576 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
577 				mbuf_pool_size,
578 				ad->dev_id,
579 				socket_id);
580 		ad->hard_out_mbuf_pool = mp;
581 	}
582 
583 	/* Soft outputs */
584 	if (soft_out->nb_segments > 0) {
585 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
586 				soft_out->nb_segments);
587 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
588 				mbuf_pool_size,
589 				"soft_out");
590 		TEST_ASSERT_NOT_NULL(mp,
591 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
592 				mbuf_pool_size,
593 				ad->dev_id,
594 				socket_id);
595 		ad->soft_out_mbuf_pool = mp;
596 	}
597 
598 	/* HARQ inputs */
599 	if (harq_in->nb_segments > 0) {
600 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
601 				harq_in->nb_segments);
602 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
603 				mbuf_pool_size,
604 				"harq_in");
605 		TEST_ASSERT_NOT_NULL(mp,
606 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
607 				mbuf_pool_size,
608 				ad->dev_id,
609 				socket_id);
610 		ad->harq_in_mbuf_pool = mp;
611 	}
612 
613 	/* HARQ outputs */
614 	if (harq_out->nb_segments > 0) {
615 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
616 				harq_out->nb_segments);
617 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
618 				mbuf_pool_size,
619 				"harq_out");
620 		TEST_ASSERT_NOT_NULL(mp,
621 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
622 				mbuf_pool_size,
623 				ad->dev_id,
624 				socket_id);
625 		ad->harq_out_mbuf_pool = mp;
626 	}
627 
628 	return TEST_SUCCESS;
629 }
630 
631 static int
632 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
633 		struct test_bbdev_vector *vector)
634 {
635 	int ret;
636 	unsigned int queue_id;
637 	struct rte_bbdev_queue_conf qconf;
638 	struct active_device *ad = &active_devs[nb_active_devs];
639 	unsigned int nb_queues;
640 	enum rte_bbdev_op_type op_type = vector->op_type;
641 
642 /* Configure fpga lte fec with PF & VF values
643  * if '-i' flag is set and using fpga device
644  */
645 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
646 	if ((get_init_device() == true) &&
647 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
648 		struct rte_fpga_lte_fec_conf conf;
649 		unsigned int i;
650 
651 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
652 				info->drv.driver_name);
653 
654 		/* clear default configuration before initialization */
655 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
656 
657 		/* Set PF mode :
658 		 * true if PF is used for data plane
659 		 * false for VFs
660 		 */
661 		conf.pf_mode_en = true;
662 
663 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
664 			/* Number of UL queues per VF (fpga supports 8 VFs) */
665 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
666 			/* Number of DL queues per VF (fpga supports 8 VFs) */
667 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
668 		}
669 
670 		/* UL bandwidth. Needed for schedule algorithm */
671 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
672 		/* DL bandwidth */
673 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
674 
675 		/* UL & DL load Balance Factor to 64 */
676 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
677 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
678 
679 		/**< FLR timeout value */
680 		conf.flr_time_out = FLR_4G_TIMEOUT;
681 
682 		/* setup FPGA PF with configuration information */
683 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
684 		TEST_ASSERT_SUCCESS(ret,
685 				"Failed to configure 4G FPGA PF for bbdev %s",
686 				info->dev_name);
687 	}
688 #endif
689 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
690 	if ((get_init_device() == true) &&
691 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
692 		struct rte_fpga_5gnr_fec_conf conf;
693 		unsigned int i;
694 
695 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
696 				info->drv.driver_name);
697 
698 		/* clear default configuration before initialization */
699 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
700 
701 		/* Set PF mode :
702 		 * true if PF is used for data plane
703 		 * false for VFs
704 		 */
705 		conf.pf_mode_en = true;
706 
707 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
708 			/* Number of UL queues per VF (fpga supports 8 VFs) */
709 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
710 			/* Number of DL queues per VF (fpga supports 8 VFs) */
711 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
712 		}
713 
714 		/* UL bandwidth. Needed for schedule algorithm */
715 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
716 		/* DL bandwidth */
717 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
718 
719 		/* UL & DL load Balance Factor to 64 */
720 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
721 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
722 
723 		/* setup FPGA PF with configuration information */
724 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
725 		TEST_ASSERT_SUCCESS(ret,
726 				"Failed to configure 5G FPGA PF for bbdev %s",
727 				info->dev_name);
728 	}
729 #endif
730 #ifdef RTE_BASEBAND_ACC
731 	if ((get_init_device() == true) &&
732 			(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
733 		struct rte_acc_conf conf;
734 		unsigned int i;
735 
736 		printf("Configure ACC100/ACC101 FEC Driver %s with default values\n",
737 				info->drv.driver_name);
738 
739 		/* clear default configuration before initialization */
740 		memset(&conf, 0, sizeof(struct rte_acc_conf));
741 
742 		/* Always set in PF mode for built-in configuration */
743 		conf.pf_mode_en = true;
744 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
745 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
746 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
747 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
748 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
749 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
750 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
751 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
752 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
753 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
754 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
755 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
756 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
757 		}
758 
759 		conf.input_pos_llr_1_bit = true;
760 		conf.output_pos_llr_1_bit = true;
761 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
762 
763 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
764 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
765 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
766 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
767 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
768 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
769 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
770 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
771 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
772 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
773 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
774 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
775 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
776 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
777 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
778 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
779 
780 		/* setup PF with configuration information */
781 		ret = rte_acc_configure(info->dev_name, &conf);
782 		TEST_ASSERT_SUCCESS(ret,
783 				"Failed to configure ACC100 PF for bbdev %s",
784 				info->dev_name);
785 	}
786 	if ((get_init_device() == true) &&
787 		(!strcmp(info->drv.driver_name, ACC200PF_DRIVER_NAME))) {
788 		struct rte_acc_conf conf;
789 		unsigned int i;
790 
791 		printf("Configure ACC200 FEC Driver %s with default values\n",
792 				info->drv.driver_name);
793 
794 		/* clear default configuration before initialization */
795 		memset(&conf, 0, sizeof(struct rte_acc_conf));
796 
797 		/* Always set in PF mode for built-in configuration */
798 		conf.pf_mode_en = true;
799 		for (i = 0; i < RTE_ACC_NUM_VFS; ++i) {
800 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
801 			conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
802 			conf.arb_dl_4g[i].round_robin_weight = ACC200_QMGR_RR;
803 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
804 			conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR;
805 			conf.arb_ul_4g[i].round_robin_weight = ACC200_QMGR_RR;
806 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
807 			conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
808 			conf.arb_dl_5g[i].round_robin_weight = ACC200_QMGR_RR;
809 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
810 			conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR;
811 			conf.arb_ul_5g[i].round_robin_weight = ACC200_QMGR_RR;
812 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
813 			conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR;
814 			conf.arb_fft[i].round_robin_weight = ACC200_QMGR_RR;
815 		}
816 
817 		conf.input_pos_llr_1_bit = true;
818 		conf.output_pos_llr_1_bit = true;
819 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
820 
821 		conf.q_ul_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
822 		conf.q_ul_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
823 		conf.q_ul_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
824 		conf.q_ul_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
825 		conf.q_dl_4g.num_qgroups = ACC200_QMGR_NUM_QGS;
826 		conf.q_dl_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
827 		conf.q_dl_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
828 		conf.q_dl_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
829 		conf.q_ul_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
830 		conf.q_ul_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
831 		conf.q_ul_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
832 		conf.q_ul_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
833 		conf.q_dl_5g.num_qgroups = ACC200_QMGR_NUM_QGS;
834 		conf.q_dl_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
835 		conf.q_dl_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
836 		conf.q_dl_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
837 		conf.q_fft.num_qgroups = ACC200_QMGR_NUM_QGS;
838 		conf.q_fft.first_qgroup_index = ACC200_QMGR_INVALID_IDX;
839 		conf.q_fft.num_aqs_per_groups = ACC200_QMGR_NUM_AQS;
840 		conf.q_fft.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH;
841 
842 		/* setup PF with configuration information */
843 		ret = rte_acc_configure(info->dev_name, &conf);
844 		TEST_ASSERT_SUCCESS(ret,
845 				"Failed to configure ACC200 PF for bbdev %s",
846 				info->dev_name);
847 	}
848 #endif
849 	/* Let's refresh this now this is configured */
850 	rte_bbdev_info_get(dev_id, info);
851 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
852 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
853 
854 	/* setup device */
855 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
856 	if (ret < 0) {
857 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
858 				dev_id, nb_queues, info->socket_id, ret);
859 		return TEST_FAILED;
860 	}
861 
862 	/* configure interrupts if needed */
863 	if (intr_enabled) {
864 		ret = rte_bbdev_intr_enable(dev_id);
865 		if (ret < 0) {
866 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
867 					ret);
868 			return TEST_FAILED;
869 		}
870 	}
871 
872 	/* setup device queues */
873 	qconf.socket = info->socket_id;
874 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
875 	qconf.priority = 0;
876 	qconf.deferred_start = 0;
877 	qconf.op_type = op_type;
878 
879 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
880 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
881 		if (ret != 0) {
882 			printf(
883 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
884 					queue_id, qconf.priority, dev_id);
885 			qconf.priority++;
886 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
887 					&qconf);
888 		}
889 		if (ret != 0) {
890 			printf("All queues on dev %u allocated: %u\n",
891 					dev_id, queue_id);
892 			break;
893 		}
894 		ad->queue_ids[queue_id] = queue_id;
895 	}
896 	TEST_ASSERT(queue_id != 0,
897 			"ERROR Failed to configure any queues on dev %u",
898 			dev_id);
899 	ad->nb_queues = queue_id;
900 
901 	set_avail_op(ad, op_type);
902 
903 	return TEST_SUCCESS;
904 }
905 
906 static int
907 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
908 		struct test_bbdev_vector *vector)
909 {
910 	int ret;
911 
912 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
913 	active_devs[nb_active_devs].dev_id = dev_id;
914 
915 	ret = add_bbdev_dev(dev_id, info, vector);
916 	if (ret == TEST_SUCCESS)
917 		++nb_active_devs;
918 	return ret;
919 }
920 
921 static uint8_t
922 populate_active_devices(void)
923 {
924 	int ret;
925 	uint8_t dev_id;
926 	uint8_t nb_devs_added = 0;
927 	struct rte_bbdev_info info;
928 
929 	RTE_BBDEV_FOREACH(dev_id) {
930 		rte_bbdev_info_get(dev_id, &info);
931 
932 		if (check_dev_cap(&info)) {
933 			printf(
934 				"Device %d (%s) does not support specified capabilities\n",
935 					dev_id, info.dev_name);
936 			continue;
937 		}
938 
939 		ret = add_active_device(dev_id, &info, &test_vector);
940 		if (ret != 0) {
941 			printf("Adding active bbdev %s skipped\n",
942 					info.dev_name);
943 			continue;
944 		}
945 		nb_devs_added++;
946 	}
947 
948 	return nb_devs_added;
949 }
950 
951 static int
952 read_test_vector(void)
953 {
954 	int ret;
955 
956 	memset(&test_vector, 0, sizeof(test_vector));
957 	printf("Test vector file = %s\n", get_vector_filename());
958 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
959 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
960 			get_vector_filename());
961 
962 	return TEST_SUCCESS;
963 }
964 
965 static int
966 testsuite_setup(void)
967 {
968 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
969 
970 	if (populate_active_devices() == 0) {
971 		printf("No suitable devices found!\n");
972 		return TEST_SKIPPED;
973 	}
974 
975 	return TEST_SUCCESS;
976 }
977 
978 static int
979 interrupt_testsuite_setup(void)
980 {
981 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
982 
983 	/* Enable interrupts */
984 	intr_enabled = true;
985 
986 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
987 	if (populate_active_devices() == 0 ||
988 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
989 		intr_enabled = false;
990 		printf("No suitable devices found!\n");
991 		return TEST_SKIPPED;
992 	}
993 
994 	return TEST_SUCCESS;
995 }
996 
997 static void
998 testsuite_teardown(void)
999 {
1000 	uint8_t dev_id;
1001 
1002 	/* Unconfigure devices */
1003 	RTE_BBDEV_FOREACH(dev_id)
1004 		rte_bbdev_close(dev_id);
1005 
1006 	/* Clear active devices structs. */
1007 	memset(active_devs, 0, sizeof(active_devs));
1008 	nb_active_devs = 0;
1009 
1010 	/* Disable interrupts */
1011 	intr_enabled = false;
1012 }
1013 
1014 static int
1015 ut_setup(void)
1016 {
1017 	uint8_t i, dev_id;
1018 
1019 	for (i = 0; i < nb_active_devs; i++) {
1020 		dev_id = active_devs[i].dev_id;
1021 		/* reset bbdev stats */
1022 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
1023 				"Failed to reset stats of bbdev %u", dev_id);
1024 		/* start the device */
1025 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
1026 				"Failed to start bbdev %u", dev_id);
1027 	}
1028 
1029 	return TEST_SUCCESS;
1030 }
1031 
1032 static void
1033 ut_teardown(void)
1034 {
1035 	uint8_t i, dev_id;
1036 	struct rte_bbdev_stats stats;
1037 
1038 	for (i = 0; i < nb_active_devs; i++) {
1039 		dev_id = active_devs[i].dev_id;
1040 		/* read stats and print */
1041 		rte_bbdev_stats_get(dev_id, &stats);
1042 		/* Stop the device */
1043 		rte_bbdev_stop(dev_id);
1044 	}
1045 }
1046 
1047 static int
1048 init_op_data_objs(struct rte_bbdev_op_data *bufs,
1049 		struct op_data_entries *ref_entries,
1050 		struct rte_mempool *mbuf_pool, const uint16_t n,
1051 		enum op_data_type op_type, uint16_t min_alignment)
1052 {
1053 	int ret;
1054 	unsigned int i, j;
1055 	bool large_input = false;
1056 
1057 	for (i = 0; i < n; ++i) {
1058 		char *data;
1059 		struct op_data_buf *seg = &ref_entries->segments[0];
1060 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
1061 		TEST_ASSERT_NOT_NULL(m_head,
1062 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1063 				op_type, n * ref_entries->nb_segments,
1064 				mbuf_pool->size);
1065 
1066 		if ((seg->length + RTE_PKTMBUF_HEADROOM) > RTE_BBDEV_LDPC_E_MAX_MBUF) {
1067 			/*
1068 			 * Special case when DPDK mbuf cannot handle
1069 			 * the required input size
1070 			 */
1071 			printf("Warning: Larger input size than DPDK mbuf %d\n",
1072 					seg->length);
1073 			large_input = true;
1074 		}
1075 		bufs[i].data = m_head;
1076 		bufs[i].offset = 0;
1077 		bufs[i].length = 0;
1078 
1079 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1080 			if (large_input) {
1081 				/* Allocate a fake overused mbuf */
1082 				data = rte_malloc(NULL, seg->length, 0);
1083 				TEST_ASSERT_NOT_NULL(data,
1084 					"rte malloc failed with %u bytes",
1085 					seg->length);
1086 				memcpy(data, seg->addr, seg->length);
1087 				m_head->buf_addr = data;
1088 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1089 				m_head->data_off = 0;
1090 				m_head->data_len = seg->length;
1091 			} else {
1092 				data = rte_pktmbuf_append(m_head, seg->length);
1093 				TEST_ASSERT_NOT_NULL(data,
1094 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1095 					seg->length, op_type);
1096 
1097 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1098 						data, min_alignment),
1099 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1100 					data, min_alignment);
1101 				rte_memcpy(data, seg->addr, seg->length);
1102 			}
1103 
1104 			bufs[i].length += seg->length;
1105 
1106 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1107 				struct rte_mbuf *m_tail =
1108 						rte_pktmbuf_alloc(mbuf_pool);
1109 				TEST_ASSERT_NOT_NULL(m_tail,
1110 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1111 						op_type,
1112 						n * ref_entries->nb_segments,
1113 						mbuf_pool->size);
1114 				seg += 1;
1115 
1116 				data = rte_pktmbuf_append(m_tail, seg->length);
1117 				TEST_ASSERT_NOT_NULL(data,
1118 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1119 						seg->length, op_type);
1120 
1121 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1122 						min_alignment),
1123 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1124 						data, min_alignment);
1125 				rte_memcpy(data, seg->addr, seg->length);
1126 				bufs[i].length += seg->length;
1127 
1128 				ret = rte_pktmbuf_chain(m_head, m_tail);
1129 				TEST_ASSERT_SUCCESS(ret,
1130 						"Couldn't chain mbufs from %d data type mbuf pool",
1131 						op_type);
1132 			}
1133 		} else {
1134 			if (((op_type == DATA_HARD_OUTPUT) || (op_type == DATA_SOFT_OUTPUT))
1135 					&& ((seg->length + RTE_PKTMBUF_HEADROOM)
1136 					> RTE_BBDEV_LDPC_E_MAX_MBUF)) {
1137 				/* Allocate a fake overused mbuf + margin */
1138 				data = rte_malloc(NULL, seg->length + 1024, 0);
1139 				TEST_ASSERT_NOT_NULL(data,
1140 					"rte malloc failed with %u bytes",
1141 					seg->length + 1024);
1142 				m_head->buf_addr = data;
1143 				rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data));
1144 				m_head->data_off = 0;
1145 				m_head->data_len = seg->length;
1146 			} else {
1147 				/* allocate chained-mbuf for output buffer */
1148 				for (j = 1; j < ref_entries->nb_segments; ++j) {
1149 					struct rte_mbuf *m_tail =
1150 						rte_pktmbuf_alloc(mbuf_pool);
1151 					TEST_ASSERT_NOT_NULL(m_tail,
1152 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1153 						op_type,
1154 						n * ref_entries->nb_segments,
1155 						mbuf_pool->size);
1156 
1157 					ret = rte_pktmbuf_chain(m_head, m_tail);
1158 					TEST_ASSERT_SUCCESS(ret,
1159 						"Couldn't chain mbufs from %d data type mbuf pool",
1160 						op_type);
1161 				}
1162 			}
1163 			bufs[i].length += seg->length;
1164 		}
1165 	}
1166 
1167 	return 0;
1168 }
1169 
1170 static int
1171 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1172 		const int socket)
1173 {
1174 	int i;
1175 
1176 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1177 	if (*buffers == NULL) {
1178 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1179 				socket);
1180 		/* try to allocate memory on other detected sockets */
1181 		for (i = 0; i < socket; i++) {
1182 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1183 			if (*buffers != NULL)
1184 				break;
1185 		}
1186 	}
1187 
1188 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1189 }
1190 
1191 static void
1192 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1193 		const uint16_t n, const int8_t max_llr_modulus)
1194 {
1195 	uint16_t i, byte_idx;
1196 
1197 	for (i = 0; i < n; ++i) {
1198 		struct rte_mbuf *m = input_ops[i].data;
1199 		while (m != NULL) {
1200 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1201 					input_ops[i].offset);
1202 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1203 					++byte_idx)
1204 				llr[byte_idx] = round((double)max_llr_modulus *
1205 						llr[byte_idx] / INT8_MAX);
1206 
1207 			m = m->next;
1208 		}
1209 	}
1210 }
1211 
1212 /*
1213  * We may have to insert filler bits
1214  * when they are required by the HARQ assumption
1215  */
1216 static void
1217 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1218 		const uint16_t n, struct test_op_params *op_params)
1219 {
1220 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1221 
1222 	if (input_ops == NULL)
1223 		return;
1224 	/* No need to add filler if not required by device */
1225 	if (!(ldpc_cap_flags &
1226 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1227 		return;
1228 	/* No need to add filler for loopback operation */
1229 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1230 		return;
1231 
1232 	uint16_t i, j, parity_offset;
1233 	for (i = 0; i < n; ++i) {
1234 		struct rte_mbuf *m = input_ops[i].data;
1235 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1236 				input_ops[i].offset);
1237 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1238 				* dec.z_c - dec.n_filler;
1239 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1240 		m->data_len = new_hin_size;
1241 		input_ops[i].length = new_hin_size;
1242 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1243 				j--)
1244 			llr[j] = llr[j - dec.n_filler];
1245 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1246 		for (j = 0; j < dec.n_filler; j++)
1247 			llr[parity_offset + j] = llr_max_pre_scaling;
1248 	}
1249 }
1250 
1251 static void
1252 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1253 		const uint16_t n, const int8_t llr_size,
1254 		const int8_t llr_decimals)
1255 {
1256 	if (input_ops == NULL)
1257 		return;
1258 
1259 	uint16_t i, byte_idx;
1260 
1261 	int16_t llr_max, llr_min, llr_tmp;
1262 	llr_max = (1 << (llr_size - 1)) - 1;
1263 	llr_min = -llr_max;
1264 	for (i = 0; i < n; ++i) {
1265 		struct rte_mbuf *m = input_ops[i].data;
1266 		while (m != NULL) {
1267 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1268 					input_ops[i].offset);
1269 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1270 					++byte_idx) {
1271 
1272 				llr_tmp = llr[byte_idx];
1273 				if (llr_decimals == 4)
1274 					llr_tmp *= 8;
1275 				else if (llr_decimals == 2)
1276 					llr_tmp *= 2;
1277 				else if (llr_decimals == 0)
1278 					llr_tmp /= 2;
1279 				llr_tmp = RTE_MIN(llr_max,
1280 						RTE_MAX(llr_min, llr_tmp));
1281 				llr[byte_idx] = (int8_t) llr_tmp;
1282 			}
1283 
1284 			m = m->next;
1285 		}
1286 	}
1287 }
1288 
1289 
1290 
1291 static int
1292 fill_queue_buffers(struct test_op_params *op_params,
1293 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1294 		struct rte_mempool *soft_out_mp,
1295 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1296 		uint16_t queue_id,
1297 		const struct rte_bbdev_op_cap *capabilities,
1298 		uint16_t min_alignment, const int socket_id)
1299 {
1300 	int ret;
1301 	enum op_data_type type;
1302 	const uint16_t n = op_params->num_to_process;
1303 
1304 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1305 		in_mp,
1306 		soft_out_mp,
1307 		hard_out_mp,
1308 		harq_in_mp,
1309 		harq_out_mp,
1310 	};
1311 
1312 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1313 		&op_params->q_bufs[socket_id][queue_id].inputs,
1314 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1315 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1316 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1317 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1318 	};
1319 
1320 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1321 		struct op_data_entries *ref_entries =
1322 				&test_vector.entries[type];
1323 		if (ref_entries->nb_segments == 0)
1324 			continue;
1325 
1326 		ret = allocate_buffers_on_socket(queue_ops[type],
1327 				n * sizeof(struct rte_bbdev_op_data),
1328 				socket_id);
1329 		TEST_ASSERT_SUCCESS(ret,
1330 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1331 
1332 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1333 				mbuf_pools[type], n, type, min_alignment);
1334 		TEST_ASSERT_SUCCESS(ret,
1335 				"Couldn't init rte_bbdev_op_data structs");
1336 	}
1337 
1338 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1339 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1340 			capabilities->cap.turbo_dec.max_llr_modulus);
1341 
1342 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1343 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1344 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1345 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1346 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1347 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1348 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1349 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1350 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1351 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1352 		if (!loopback && !llr_comp)
1353 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1354 					ldpc_llr_size, ldpc_llr_decimals);
1355 		if (!loopback && !harq_comp)
1356 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1357 					ldpc_llr_size, ldpc_llr_decimals);
1358 		if (!loopback)
1359 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1360 					op_params);
1361 	}
1362 
1363 	return 0;
1364 }
1365 
1366 static void
1367 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1368 {
1369 	unsigned int i, j;
1370 
1371 	rte_mempool_free(ad->ops_mempool);
1372 	rte_mempool_free(ad->in_mbuf_pool);
1373 	rte_mempool_free(ad->hard_out_mbuf_pool);
1374 	rte_mempool_free(ad->soft_out_mbuf_pool);
1375 	rte_mempool_free(ad->harq_in_mbuf_pool);
1376 	rte_mempool_free(ad->harq_out_mbuf_pool);
1377 
1378 	for (i = 0; i < rte_lcore_count(); ++i) {
1379 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1380 			rte_free(op_params->q_bufs[j][i].inputs);
1381 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1382 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1383 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1384 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1385 		}
1386 	}
1387 }
1388 
1389 static void
1390 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1391 		unsigned int start_idx,
1392 		struct rte_bbdev_op_data *inputs,
1393 		struct rte_bbdev_op_data *hard_outputs,
1394 		struct rte_bbdev_op_data *soft_outputs,
1395 		struct rte_bbdev_dec_op *ref_op)
1396 {
1397 	unsigned int i;
1398 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1399 
1400 	for (i = 0; i < n; ++i) {
1401 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1402 			ops[i]->turbo_dec.tb_params.ea =
1403 					turbo_dec->tb_params.ea;
1404 			ops[i]->turbo_dec.tb_params.eb =
1405 					turbo_dec->tb_params.eb;
1406 			ops[i]->turbo_dec.tb_params.k_pos =
1407 					turbo_dec->tb_params.k_pos;
1408 			ops[i]->turbo_dec.tb_params.k_neg =
1409 					turbo_dec->tb_params.k_neg;
1410 			ops[i]->turbo_dec.tb_params.c =
1411 					turbo_dec->tb_params.c;
1412 			ops[i]->turbo_dec.tb_params.c_neg =
1413 					turbo_dec->tb_params.c_neg;
1414 			ops[i]->turbo_dec.tb_params.cab =
1415 					turbo_dec->tb_params.cab;
1416 			ops[i]->turbo_dec.tb_params.r =
1417 					turbo_dec->tb_params.r;
1418 		} else {
1419 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1420 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1421 		}
1422 
1423 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1424 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1425 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1426 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1427 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1428 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1429 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1430 
1431 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1432 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1433 		if (soft_outputs != NULL)
1434 			ops[i]->turbo_dec.soft_output =
1435 				soft_outputs[start_idx + i];
1436 	}
1437 }
1438 
1439 static void
1440 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1441 		unsigned int start_idx,
1442 		struct rte_bbdev_op_data *inputs,
1443 		struct rte_bbdev_op_data *outputs,
1444 		struct rte_bbdev_enc_op *ref_op)
1445 {
1446 	unsigned int i;
1447 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1448 	for (i = 0; i < n; ++i) {
1449 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1450 			ops[i]->turbo_enc.tb_params.ea =
1451 					turbo_enc->tb_params.ea;
1452 			ops[i]->turbo_enc.tb_params.eb =
1453 					turbo_enc->tb_params.eb;
1454 			ops[i]->turbo_enc.tb_params.k_pos =
1455 					turbo_enc->tb_params.k_pos;
1456 			ops[i]->turbo_enc.tb_params.k_neg =
1457 					turbo_enc->tb_params.k_neg;
1458 			ops[i]->turbo_enc.tb_params.c =
1459 					turbo_enc->tb_params.c;
1460 			ops[i]->turbo_enc.tb_params.c_neg =
1461 					turbo_enc->tb_params.c_neg;
1462 			ops[i]->turbo_enc.tb_params.cab =
1463 					turbo_enc->tb_params.cab;
1464 			ops[i]->turbo_enc.tb_params.ncb_pos =
1465 					turbo_enc->tb_params.ncb_pos;
1466 			ops[i]->turbo_enc.tb_params.ncb_neg =
1467 					turbo_enc->tb_params.ncb_neg;
1468 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1469 		} else {
1470 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1471 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1472 			ops[i]->turbo_enc.cb_params.ncb =
1473 					turbo_enc->cb_params.ncb;
1474 		}
1475 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1476 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1477 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1478 
1479 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1480 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1481 	}
1482 }
1483 
1484 
1485 /* Returns a random number drawn from a normal distribution
1486  * with mean of 0 and variance of 1
1487  * Marsaglia algorithm
1488  */
1489 static double
1490 randn(int n)
1491 {
1492 	double S, Z, U1, U2, u, v, fac;
1493 
1494 	do {
1495 		U1 = (double)rand() / RAND_MAX;
1496 		U2 = (double)rand() / RAND_MAX;
1497 		u = 2. * U1 - 1.;
1498 		v = 2. * U2 - 1.;
1499 		S = u * u + v * v;
1500 	} while (S >= 1 || S == 0);
1501 	fac = sqrt(-2. * log(S) / S);
1502 	Z = (n % 2) ? u * fac : v * fac;
1503 	return Z;
1504 }
1505 
1506 static inline double
1507 maxstar(double A, double B)
1508 {
1509 	if (fabs(A - B) > 5)
1510 		return RTE_MAX(A, B);
1511 	else
1512 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1513 }
1514 
1515 /*
1516  * Generate Qm LLRS for Qm==8
1517  * Modulation, AWGN and LLR estimation from max log development
1518  */
1519 static void
1520 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1521 {
1522 	int qm = 8;
1523 	int qam = 256;
1524 	int m, k;
1525 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1526 	/* 5.1.4 of TS38.211 */
1527 	const double symbols_I[256] = {
1528 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1529 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1530 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1531 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1532 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1533 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1534 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1535 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1536 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1537 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1538 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1539 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1540 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1541 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1542 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1543 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1544 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1545 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1546 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1547 			-13, -13, -15, -15, -13, -13, -15, -15};
1548 	const double symbols_Q[256] = {
1549 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1550 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1551 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1552 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1553 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1554 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1555 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1556 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1557 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1558 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1559 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1560 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1561 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1562 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1563 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1564 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1565 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1566 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1567 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1568 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1569 	/* Average constellation point energy */
1570 	N0 *= 170.0;
1571 	for (k = 0; k < qm; k++)
1572 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1573 	/* 5.1.4 of TS38.211 */
1574 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1575 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1576 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1577 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1578 	/* AWGN channel */
1579 	I += sqrt(N0 / 2) * randn(0);
1580 	Q += sqrt(N0 / 2) * randn(1);
1581 	/*
1582 	 * Calculate the log of the probability that each of
1583 	 * the constellation points was transmitted
1584 	 */
1585 	for (m = 0; m < qam; m++)
1586 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1587 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1588 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1589 	for (k = 0; k < qm; k++) {
1590 		p0 = -999999;
1591 		p1 = -999999;
1592 		/* For each constellation point */
1593 		for (m = 0; m < qam; m++) {
1594 			if ((m >> (qm - k - 1)) & 1)
1595 				p1 = maxstar(p1, log_syml_prob[m]);
1596 			else
1597 				p0 = maxstar(p0, log_syml_prob[m]);
1598 		}
1599 		/* Calculate the LLR */
1600 		llr_ = p0 - p1;
1601 		llr_ *= (1 << ldpc_llr_decimals);
1602 		llr_ = round(llr_);
1603 		if (llr_ > llr_max)
1604 			llr_ = llr_max;
1605 		if (llr_ < -llr_max)
1606 			llr_ = -llr_max;
1607 		llrs[qm * i + k] = (int8_t) llr_;
1608 	}
1609 }
1610 
1611 
1612 /*
1613  * Generate Qm LLRS for Qm==6
1614  * Modulation, AWGN and LLR estimation from max log development
1615  */
1616 static void
1617 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1618 {
1619 	int qm = 6;
1620 	int qam = 64;
1621 	int m, k;
1622 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1623 	/* 5.1.4 of TS38.211 */
1624 	const double symbols_I[64] = {
1625 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1626 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1627 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1628 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1629 			-5, -5, -7, -7, -5, -5, -7, -7};
1630 	const double symbols_Q[64] = {
1631 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1632 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1633 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1634 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1635 			-3, -1, -3, -1, -5, -7, -5, -7};
1636 	/* Average constellation point energy */
1637 	N0 *= 42.0;
1638 	for (k = 0; k < qm; k++)
1639 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1640 	/* 5.1.4 of TS38.211 */
1641 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1642 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1643 	/* AWGN channel */
1644 	I += sqrt(N0 / 2) * randn(0);
1645 	Q += sqrt(N0 / 2) * randn(1);
1646 	/*
1647 	 * Calculate the log of the probability that each of
1648 	 * the constellation points was transmitted
1649 	 */
1650 	for (m = 0; m < qam; m++)
1651 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1652 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1653 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1654 	for (k = 0; k < qm; k++) {
1655 		p0 = -999999;
1656 		p1 = -999999;
1657 		/* For each constellation point */
1658 		for (m = 0; m < qam; m++) {
1659 			if ((m >> (qm - k - 1)) & 1)
1660 				p1 = maxstar(p1, log_syml_prob[m]);
1661 			else
1662 				p0 = maxstar(p0, log_syml_prob[m]);
1663 		}
1664 		/* Calculate the LLR */
1665 		llr_ = p0 - p1;
1666 		llr_ *= (1 << ldpc_llr_decimals);
1667 		llr_ = round(llr_);
1668 		if (llr_ > llr_max)
1669 			llr_ = llr_max;
1670 		if (llr_ < -llr_max)
1671 			llr_ = -llr_max;
1672 		llrs[qm * i + k] = (int8_t) llr_;
1673 	}
1674 }
1675 
1676 /*
1677  * Generate Qm LLRS for Qm==4
1678  * Modulation, AWGN and LLR estimation from max log development
1679  */
1680 static void
1681 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1682 {
1683 	int qm = 4;
1684 	int qam = 16;
1685 	int m, k;
1686 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1687 	/* 5.1.4 of TS38.211 */
1688 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1689 			-1, -1, -3, -3, -1, -1, -3, -3};
1690 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1691 			1, 3, 1, 3, -1, -3, -1, -3};
1692 	/* Average constellation point energy */
1693 	N0 *= 10.0;
1694 	for (k = 0; k < qm; k++)
1695 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1696 	/* 5.1.4 of TS38.211 */
1697 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1698 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1699 	/* AWGN channel */
1700 	I += sqrt(N0 / 2) * randn(0);
1701 	Q += sqrt(N0 / 2) * randn(1);
1702 	/*
1703 	 * Calculate the log of the probability that each of
1704 	 * the constellation points was transmitted
1705 	 */
1706 	for (m = 0; m < qam; m++)
1707 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1708 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1709 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1710 	for (k = 0; k < qm; k++) {
1711 		p0 = -999999;
1712 		p1 = -999999;
1713 		/* For each constellation point */
1714 		for (m = 0; m < qam; m++) {
1715 			if ((m >> (qm - k - 1)) & 1)
1716 				p1 = maxstar(p1, log_syml_prob[m]);
1717 			else
1718 				p0 = maxstar(p0, log_syml_prob[m]);
1719 		}
1720 		/* Calculate the LLR */
1721 		llr_ = p0 - p1;
1722 		llr_ *= (1 << ldpc_llr_decimals);
1723 		llr_ = round(llr_);
1724 		if (llr_ > llr_max)
1725 			llr_ = llr_max;
1726 		if (llr_ < -llr_max)
1727 			llr_ = -llr_max;
1728 		llrs[qm * i + k] = (int8_t) llr_;
1729 	}
1730 }
1731 
1732 static void
1733 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1734 {
1735 	double b, b1, n;
1736 	double coeff = 2.0 * sqrt(N0);
1737 
1738 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1739 	if (llrs[j] < 8 && llrs[j] > -8)
1740 		return;
1741 
1742 	/* Note don't change sign here */
1743 	n = randn(j % 2);
1744 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1745 			+ coeff * n) / N0;
1746 	b = b1 * (1 << ldpc_llr_decimals);
1747 	b = round(b);
1748 	if (b > llr_max)
1749 		b = llr_max;
1750 	if (b < -llr_max)
1751 		b = -llr_max;
1752 	llrs[j] = (int8_t) b;
1753 }
1754 
1755 /* Generate LLR for a given SNR */
1756 static void
1757 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1758 		struct rte_bbdev_dec_op *ref_op)
1759 {
1760 	struct rte_mbuf *m;
1761 	uint16_t qm;
1762 	uint32_t i, j, e, range;
1763 	double N0, llr_max;
1764 
1765 	e = ref_op->ldpc_dec.cb_params.e;
1766 	qm = ref_op->ldpc_dec.q_m;
1767 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1768 	range = e / qm;
1769 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1770 
1771 	for (i = 0; i < n; ++i) {
1772 		m = inputs[i].data;
1773 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1774 		if (qm == 8) {
1775 			for (j = 0; j < range; ++j)
1776 				gen_qm8_llr(llrs, j, N0, llr_max);
1777 		} else if (qm == 6) {
1778 			for (j = 0; j < range; ++j)
1779 				gen_qm6_llr(llrs, j, N0, llr_max);
1780 		} else if (qm == 4) {
1781 			for (j = 0; j < range; ++j)
1782 				gen_qm4_llr(llrs, j, N0, llr_max);
1783 		} else {
1784 			for (j = 0; j < e; ++j)
1785 				gen_qm2_llr(llrs, j, N0, llr_max);
1786 		}
1787 	}
1788 }
1789 
1790 static void
1791 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1792 		unsigned int start_idx,
1793 		struct rte_bbdev_op_data *inputs,
1794 		struct rte_bbdev_op_data *hard_outputs,
1795 		struct rte_bbdev_op_data *soft_outputs,
1796 		struct rte_bbdev_op_data *harq_inputs,
1797 		struct rte_bbdev_op_data *harq_outputs,
1798 		struct rte_bbdev_dec_op *ref_op)
1799 {
1800 	unsigned int i;
1801 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1802 
1803 	for (i = 0; i < n; ++i) {
1804 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1805 			ops[i]->ldpc_dec.tb_params.ea =
1806 					ldpc_dec->tb_params.ea;
1807 			ops[i]->ldpc_dec.tb_params.eb =
1808 					ldpc_dec->tb_params.eb;
1809 			ops[i]->ldpc_dec.tb_params.c =
1810 					ldpc_dec->tb_params.c;
1811 			ops[i]->ldpc_dec.tb_params.cab =
1812 					ldpc_dec->tb_params.cab;
1813 			ops[i]->ldpc_dec.tb_params.r =
1814 					ldpc_dec->tb_params.r;
1815 		} else {
1816 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1817 		}
1818 
1819 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1820 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1821 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1822 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1823 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1824 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1825 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1826 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1827 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1828 
1829 		if (hard_outputs != NULL)
1830 			ops[i]->ldpc_dec.hard_output =
1831 					hard_outputs[start_idx + i];
1832 		if (inputs != NULL)
1833 			ops[i]->ldpc_dec.input =
1834 					inputs[start_idx + i];
1835 		if (soft_outputs != NULL)
1836 			ops[i]->ldpc_dec.soft_output =
1837 					soft_outputs[start_idx + i];
1838 		if (harq_inputs != NULL)
1839 			ops[i]->ldpc_dec.harq_combined_input =
1840 					harq_inputs[start_idx + i];
1841 		if (harq_outputs != NULL)
1842 			ops[i]->ldpc_dec.harq_combined_output =
1843 					harq_outputs[start_idx + i];
1844 	}
1845 }
1846 
1847 
1848 static void
1849 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1850 		unsigned int start_idx,
1851 		struct rte_bbdev_op_data *inputs,
1852 		struct rte_bbdev_op_data *outputs,
1853 		struct rte_bbdev_enc_op *ref_op)
1854 {
1855 	unsigned int i;
1856 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1857 	for (i = 0; i < n; ++i) {
1858 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1859 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1860 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1861 			ops[i]->ldpc_enc.tb_params.cab =
1862 					ldpc_enc->tb_params.cab;
1863 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1864 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1865 		} else {
1866 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1867 		}
1868 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1869 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1870 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1871 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1872 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1873 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1874 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1875 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1876 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1877 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1878 	}
1879 }
1880 
1881 static void
1882 copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n,
1883 		unsigned int start_idx, struct rte_bbdev_op_data *inputs,
1884 		struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *pwrouts,
1885 		struct rte_bbdev_fft_op *ref_op)
1886 {
1887 	unsigned int i, j;
1888 	struct rte_bbdev_op_fft *fft = &ref_op->fft;
1889 	for (i = 0; i < n; i++) {
1890 		ops[i]->fft.input_sequence_size = fft->input_sequence_size;
1891 		ops[i]->fft.input_leading_padding = fft->input_leading_padding;
1892 		ops[i]->fft.output_sequence_size = fft->output_sequence_size;
1893 		ops[i]->fft.output_leading_depadding =
1894 				fft->output_leading_depadding;
1895 		for (j = 0; j < RTE_BBDEV_MAX_CS_2; j++)
1896 			ops[i]->fft.window_index[j] = fft->window_index[j];
1897 		ops[i]->fft.cs_bitmap = fft->cs_bitmap;
1898 		ops[i]->fft.num_antennas_log2 = fft->num_antennas_log2;
1899 		ops[i]->fft.idft_log2 = fft->idft_log2;
1900 		ops[i]->fft.dft_log2 = fft->dft_log2;
1901 		ops[i]->fft.cs_time_adjustment = fft->cs_time_adjustment;
1902 		ops[i]->fft.idft_shift = fft->idft_shift;
1903 		ops[i]->fft.dft_shift = fft->dft_shift;
1904 		ops[i]->fft.ncs_reciprocal = fft->ncs_reciprocal;
1905 		ops[i]->fft.power_shift = fft->power_shift;
1906 		ops[i]->fft.fp16_exp_adjust = fft->fp16_exp_adjust;
1907 		ops[i]->fft.base_output = outputs[start_idx + i];
1908 		ops[i]->fft.base_input = inputs[start_idx + i];
1909 		if (pwrouts != NULL)
1910 			ops[i]->fft.power_meas_output = pwrouts[start_idx + i];
1911 		ops[i]->fft.op_flags = fft->op_flags;
1912 	}
1913 }
1914 
1915 static int
1916 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1917 		unsigned int order_idx, const int expected_status)
1918 {
1919 	int status = op->status;
1920 	/* ignore parity mismatch false alarms for long iterations */
1921 	if (get_iter_max() >= 10) {
1922 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1923 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1924 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1925 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1926 		}
1927 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1928 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1929 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1930 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1931 		}
1932 	}
1933 
1934 	TEST_ASSERT(status == expected_status,
1935 			"op_status (%d) != expected_status (%d)",
1936 			op->status, expected_status);
1937 
1938 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1939 			"Ordering error, expected %p, got %p",
1940 			(void *)(uintptr_t)order_idx, op->opaque_data);
1941 
1942 	return TEST_SUCCESS;
1943 }
1944 
1945 static int
1946 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1947 		unsigned int order_idx, const int expected_status)
1948 {
1949 	TEST_ASSERT(op->status == expected_status,
1950 			"op_status (%d) != expected_status (%d)",
1951 			op->status, expected_status);
1952 
1953 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1954 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1955 				"Ordering error, expected %p, got %p",
1956 				(void *)(uintptr_t)order_idx, op->opaque_data);
1957 
1958 	return TEST_SUCCESS;
1959 }
1960 
1961 static int
1962 check_fft_status_and_ordering(struct rte_bbdev_fft_op *op,
1963 		unsigned int order_idx, const int expected_status)
1964 {
1965 	TEST_ASSERT(op->status == expected_status,
1966 			"op_status (%d) != expected_status (%d)",
1967 			op->status, expected_status);
1968 
1969 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1970 			"Ordering error, expected %p, got %p",
1971 			(void *)(uintptr_t)order_idx, op->opaque_data);
1972 
1973 	return TEST_SUCCESS;
1974 }
1975 
1976 static inline int
1977 validate_op_chain(struct rte_bbdev_op_data *op,
1978 		struct op_data_entries *orig_op)
1979 {
1980 	uint8_t i;
1981 	struct rte_mbuf *m = op->data;
1982 	uint8_t nb_dst_segments = orig_op->nb_segments;
1983 	uint32_t total_data_size = 0;
1984 
1985 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1986 			"Number of segments differ in original (%u) and filled (%u) op",
1987 			nb_dst_segments, m->nb_segs);
1988 
1989 	/* Validate each mbuf segment length */
1990 	for (i = 0; i < nb_dst_segments; ++i) {
1991 		/* Apply offset to the first mbuf segment */
1992 		uint16_t offset = (i == 0) ? op->offset : 0;
1993 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1994 		total_data_size += orig_op->segments[i].length;
1995 
1996 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1997 				"Length of segment differ in original (%u) and filled (%u) op",
1998 				orig_op->segments[i].length, data_len);
1999 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
2000 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
2001 				data_len,
2002 				"Output buffers (CB=%u) are not equal", i);
2003 		m = m->next;
2004 	}
2005 
2006 	/* Validate total mbuf pkt length */
2007 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2008 	TEST_ASSERT(total_data_size == pkt_len,
2009 			"Length of data differ in original (%u) and filled (%u) op",
2010 			total_data_size, pkt_len);
2011 
2012 	return TEST_SUCCESS;
2013 }
2014 
2015 /*
2016  * Compute K0 for a given configuration for HARQ output length computation
2017  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
2018  */
2019 static inline uint16_t
2020 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
2021 {
2022 	if (rv_index == 0)
2023 		return 0;
2024 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
2025 	if (n_cb == n) {
2026 		if (rv_index == 1)
2027 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
2028 		else if (rv_index == 2)
2029 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
2030 		else
2031 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
2032 	}
2033 	/* LBRM case - includes a division by N */
2034 	if (rv_index == 1)
2035 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
2036 				/ n) * z_c;
2037 	else if (rv_index == 2)
2038 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
2039 				/ n) * z_c;
2040 	else
2041 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
2042 				/ n) * z_c;
2043 }
2044 
2045 /* HARQ output length including the Filler bits */
2046 static inline uint16_t
2047 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
2048 {
2049 	uint16_t k0 = 0;
2050 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
2051 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
2052 	/* Compute RM out size and number of rows */
2053 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2054 			* ops_ld->z_c - ops_ld->n_filler;
2055 	uint16_t deRmOutSize = RTE_MIN(
2056 			k0 + ops_ld->cb_params.e +
2057 			((k0 > parity_offset) ?
2058 					0 : ops_ld->n_filler),
2059 					ops_ld->n_cb);
2060 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
2061 			/ ops_ld->z_c);
2062 	uint16_t harq_output_len = numRows * ops_ld->z_c;
2063 	return harq_output_len;
2064 }
2065 
2066 static inline int
2067 validate_op_harq_chain(struct rte_bbdev_op_data *op,
2068 		struct op_data_entries *orig_op,
2069 		struct rte_bbdev_op_ldpc_dec *ops_ld)
2070 {
2071 	uint8_t i;
2072 	uint32_t j, jj, k;
2073 	struct rte_mbuf *m = op->data;
2074 	uint8_t nb_dst_segments = orig_op->nb_segments;
2075 	uint32_t total_data_size = 0;
2076 	int8_t *harq_orig, *harq_out, abs_harq_origin;
2077 	uint32_t byte_error = 0, cum_error = 0, error;
2078 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
2079 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
2080 	uint16_t parity_offset;
2081 
2082 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2083 			"Number of segments differ in original (%u) and filled (%u) op",
2084 			nb_dst_segments, m->nb_segs);
2085 
2086 	/* Validate each mbuf segment length */
2087 	for (i = 0; i < nb_dst_segments; ++i) {
2088 		/* Apply offset to the first mbuf segment */
2089 		uint16_t offset = (i == 0) ? op->offset : 0;
2090 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2091 		total_data_size += orig_op->segments[i].length;
2092 
2093 		TEST_ASSERT(orig_op->segments[i].length <
2094 				(uint32_t)(data_len + 64),
2095 				"Length of segment differ in original (%u) and filled (%u) op",
2096 				orig_op->segments[i].length, data_len);
2097 		harq_orig = (int8_t *) orig_op->segments[i].addr;
2098 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2099 
2100 		if (!(ldpc_cap_flags &
2101 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
2102 				) || (ops_ld->op_flags &
2103 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2104 			data_len -= ops_ld->z_c;
2105 			parity_offset = data_len;
2106 		} else {
2107 			/* Compute RM out size and number of rows */
2108 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
2109 					* ops_ld->z_c - ops_ld->n_filler;
2110 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
2111 					ops_ld->n_filler;
2112 			if (data_len > deRmOutSize)
2113 				data_len = deRmOutSize;
2114 			if (data_len > orig_op->segments[i].length)
2115 				data_len = orig_op->segments[i].length;
2116 		}
2117 		/*
2118 		 * HARQ output can have minor differences
2119 		 * due to integer representation and related scaling
2120 		 */
2121 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2122 			if (j == parity_offset) {
2123 				/* Special Handling of the filler bits */
2124 				for (k = 0; k < ops_ld->n_filler; k++) {
2125 					if (harq_out[jj] !=
2126 							llr_max_pre_scaling) {
2127 						printf("HARQ Filler issue %d: %d %d\n",
2128 							jj, harq_out[jj],
2129 							llr_max);
2130 						byte_error++;
2131 					}
2132 					jj++;
2133 				}
2134 			}
2135 			if (!(ops_ld->op_flags &
2136 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
2137 				if (ldpc_llr_decimals > 1)
2138 					harq_out[jj] = (harq_out[jj] + 1)
2139 						>> (ldpc_llr_decimals - 1);
2140 				/* Saturated to S7 */
2141 				if (harq_orig[j] > llr_max)
2142 					harq_orig[j] = llr_max;
2143 				if (harq_orig[j] < -llr_max)
2144 					harq_orig[j] = -llr_max;
2145 			}
2146 			if (harq_orig[j] != harq_out[jj]) {
2147 				error = (harq_orig[j] > harq_out[jj]) ?
2148 						harq_orig[j] - harq_out[jj] :
2149 						harq_out[jj] - harq_orig[j];
2150 				abs_harq_origin = harq_orig[j] > 0 ?
2151 							harq_orig[j] :
2152 							-harq_orig[j];
2153 				/* Residual quantization error */
2154 				if ((error > 8 && (abs_harq_origin <
2155 						(llr_max - 16))) ||
2156 						(error > 16)) {
2157 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2158 							j, harq_orig[j],
2159 							harq_out[jj], error);
2160 					byte_error++;
2161 					cum_error += error;
2162 				}
2163 			}
2164 		}
2165 		m = m->next;
2166 	}
2167 
2168 	if (byte_error)
2169 		TEST_ASSERT(byte_error <= 1,
2170 				"HARQ output mismatch (%d) %d",
2171 				byte_error, cum_error);
2172 
2173 	/* Validate total mbuf pkt length */
2174 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2175 	TEST_ASSERT(total_data_size < pkt_len + 64,
2176 			"Length of data differ in original (%u) and filled (%u) op",
2177 			total_data_size, pkt_len);
2178 
2179 	return TEST_SUCCESS;
2180 }
2181 
2182 
2183 static inline int
2184 validate_op_so_chain(struct rte_bbdev_op_data *op,
2185 		struct op_data_entries *orig_op)
2186 {
2187 	struct rte_mbuf *m = op->data;
2188 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2189 	uint32_t j, jj;
2190 	int8_t *so_orig, *so_out;
2191 	uint32_t byte_error = 0, error, margin_error = 0;
2192 
2193 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2194 			"Number of segments differ in original (%u) and filled (%u) op",
2195 			nb_dst_segments, m->nb_segs);
2196 
2197 	/* Validate each mbuf segment length. */
2198 	for (i = 0; i < nb_dst_segments; ++i) {
2199 		/* Apply offset to the first mbuf segment. */
2200 		uint16_t offset = (i == 0) ? op->offset : 0;
2201 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
2202 
2203 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2204 				"Length of segment differ in original (%u) and filled (%u) op",
2205 				orig_op->segments[i].length, data_len);
2206 		so_orig = (int8_t *) orig_op->segments[i].addr;
2207 		so_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
2208 		margin_error += data_len / 8; /* Allow for few % errors. */
2209 
2210 		/* SO output can have minor differences due to algorithm variations. */
2211 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
2212 			if (so_orig[j] != so_out[jj]) {
2213 				error = (so_orig[j] > so_out[jj]) ? so_orig[j] - so_out[jj] :
2214 						so_out[jj] - so_orig[j];
2215 				/* Residual quantization error. */
2216 				if (error > 32) {
2217 					printf("Warning: Soft mismatch %d: exp %d act %d => %d\n",
2218 							j, so_orig[j], so_out[jj], error);
2219 					byte_error++;
2220 				}
2221 			}
2222 		}
2223 		m = m->next;
2224 	}
2225 
2226 	if (byte_error > margin_error)
2227 		TEST_ASSERT(byte_error <= 1, "Soft output mismatch (%d) %d",
2228 				byte_error, margin_error);
2229 
2230 	return TEST_SUCCESS;
2231 }
2232 
2233 static int
2234 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2235 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2236 {
2237 	unsigned int i;
2238 	int ret;
2239 	struct op_data_entries *hard_data_orig =
2240 			&test_vector.entries[DATA_HARD_OUTPUT];
2241 	struct op_data_entries *soft_data_orig =
2242 			&test_vector.entries[DATA_SOFT_OUTPUT];
2243 	struct rte_bbdev_op_turbo_dec *ops_td;
2244 	struct rte_bbdev_op_data *hard_output;
2245 	struct rte_bbdev_op_data *soft_output;
2246 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2247 
2248 	for (i = 0; i < n; ++i) {
2249 		ops_td = &ops[i]->turbo_dec;
2250 		hard_output = &ops_td->hard_output;
2251 		soft_output = &ops_td->soft_output;
2252 
2253 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2254 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2255 					"Returned iter_count (%d) > expected iter_count (%d)",
2256 					ops_td->iter_count, ref_td->iter_count);
2257 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2258 		TEST_ASSERT_SUCCESS(ret,
2259 				"Checking status and ordering for decoder failed");
2260 
2261 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2262 				hard_data_orig),
2263 				"Hard output buffers (CB=%u) are not equal",
2264 				i);
2265 
2266 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2267 			TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output,
2268 					soft_data_orig),
2269 					"Soft output buffers (CB=%u) are not equal",
2270 					i);
2271 	}
2272 
2273 	return TEST_SUCCESS;
2274 }
2275 
2276 /* Check Number of code blocks errors */
2277 static int
2278 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2279 {
2280 	unsigned int i;
2281 	struct op_data_entries *hard_data_orig =
2282 			&test_vector.entries[DATA_HARD_OUTPUT];
2283 	struct rte_bbdev_op_ldpc_dec *ops_td;
2284 	struct rte_bbdev_op_data *hard_output;
2285 	int errors = 0;
2286 	struct rte_mbuf *m;
2287 
2288 	for (i = 0; i < n; ++i) {
2289 		ops_td = &ops[i]->ldpc_dec;
2290 		hard_output = &ops_td->hard_output;
2291 		m = hard_output->data;
2292 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2293 				hard_data_orig->segments[0].addr,
2294 				hard_data_orig->segments[0].length))
2295 			errors++;
2296 	}
2297 	return errors;
2298 }
2299 
2300 static int
2301 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2302 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2303 {
2304 	unsigned int i;
2305 	int ret;
2306 	struct op_data_entries *hard_data_orig =
2307 			&test_vector.entries[DATA_HARD_OUTPUT];
2308 	struct op_data_entries *soft_data_orig =
2309 			&test_vector.entries[DATA_SOFT_OUTPUT];
2310 	struct op_data_entries *harq_data_orig =
2311 				&test_vector.entries[DATA_HARQ_OUTPUT];
2312 	struct rte_bbdev_op_ldpc_dec *ops_td;
2313 	struct rte_bbdev_op_data *hard_output;
2314 	struct rte_bbdev_op_data *harq_output;
2315 	struct rte_bbdev_op_data *soft_output;
2316 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2317 
2318 	for (i = 0; i < n; ++i) {
2319 		ops_td = &ops[i]->ldpc_dec;
2320 		hard_output = &ops_td->hard_output;
2321 		harq_output = &ops_td->harq_combined_output;
2322 		soft_output = &ops_td->soft_output;
2323 
2324 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2325 		TEST_ASSERT_SUCCESS(ret,
2326 				"Checking status and ordering for decoder failed");
2327 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2328 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2329 					"Returned iter_count (%d) > expected iter_count (%d)",
2330 					ops_td->iter_count, ref_td->iter_count);
2331 		/*
2332 		 * We can ignore output data when the decoding failed to
2333 		 * converge or for loop-back cases
2334 		 */
2335 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2336 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2337 				) && (
2338 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2339 						)) == 0)
2340 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2341 					hard_data_orig),
2342 					"Hard output buffers (CB=%u) are not equal",
2343 					i);
2344 
2345 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2346 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2347 					soft_data_orig),
2348 					"Soft output buffers (CB=%u) are not equal",
2349 					i);
2350 		if (ref_op->ldpc_dec.op_flags &
2351 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2352 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2353 					harq_data_orig, ops_td),
2354 					"HARQ output buffers (CB=%u) are not equal",
2355 					i);
2356 		}
2357 		if (ref_op->ldpc_dec.op_flags &
2358 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2359 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2360 					harq_data_orig, ops_td),
2361 					"HARQ output buffers (CB=%u) are not equal",
2362 					i);
2363 
2364 	}
2365 
2366 	return TEST_SUCCESS;
2367 }
2368 
2369 
2370 static int
2371 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2372 		struct rte_bbdev_enc_op *ref_op)
2373 {
2374 	unsigned int i;
2375 	int ret;
2376 	struct op_data_entries *hard_data_orig =
2377 			&test_vector.entries[DATA_HARD_OUTPUT];
2378 
2379 	for (i = 0; i < n; ++i) {
2380 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2381 		TEST_ASSERT_SUCCESS(ret,
2382 				"Checking status and ordering for encoder failed");
2383 		TEST_ASSERT_SUCCESS(validate_op_chain(
2384 				&ops[i]->turbo_enc.output,
2385 				hard_data_orig),
2386 				"Output buffers (CB=%u) are not equal",
2387 				i);
2388 	}
2389 
2390 	return TEST_SUCCESS;
2391 }
2392 
2393 static int
2394 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2395 		struct rte_bbdev_enc_op *ref_op)
2396 {
2397 	unsigned int i;
2398 	int ret;
2399 	struct op_data_entries *hard_data_orig =
2400 			&test_vector.entries[DATA_HARD_OUTPUT];
2401 
2402 	for (i = 0; i < n; ++i) {
2403 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2404 		TEST_ASSERT_SUCCESS(ret,
2405 				"Checking status and ordering for encoder failed");
2406 		TEST_ASSERT_SUCCESS(validate_op_chain(
2407 				&ops[i]->ldpc_enc.output,
2408 				hard_data_orig),
2409 				"Output buffers (CB=%u) are not equal",
2410 				i);
2411 	}
2412 
2413 	return TEST_SUCCESS;
2414 }
2415 
2416 
2417 static inline int
2418 validate_op_fft_chain(struct rte_bbdev_op_data *op, struct op_data_entries *orig_op)
2419 {
2420 	struct rte_mbuf *m = op->data;
2421 	uint8_t i, nb_dst_segments = orig_op->nb_segments;
2422 	int16_t delt, abs_delt, thres_hold = 3;
2423 	uint32_t j, data_len_iq, error_num;
2424 	int16_t *ref_out, *op_out;
2425 
2426 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
2427 			"Number of segments differ in original (%u) and filled (%u) op fft",
2428 			nb_dst_segments, m->nb_segs);
2429 
2430 	/* Due to size limitation of mbuf, FFT doesn't use real mbuf. */
2431 	for (i = 0; i < nb_dst_segments; ++i) {
2432 		uint16_t offset = (i == 0) ? op->offset : 0;
2433 		uint32_t data_len = op->length;
2434 
2435 		TEST_ASSERT(orig_op->segments[i].length == data_len,
2436 				"Length of segment differ in original (%u) and filled (%u) op fft",
2437 				orig_op->segments[i].length, data_len);
2438 		/* Divided by 2 to get the number of 16bits data. */
2439 		data_len_iq = data_len >> 1;
2440 		ref_out = (int16_t *)(orig_op->segments[i].addr);
2441 		op_out = rte_pktmbuf_mtod_offset(m, int16_t *, offset);
2442 		error_num = 0;
2443 		for (j = 0; j < data_len_iq; j++) {
2444 			delt = ref_out[j] - op_out[j];
2445 			abs_delt = delt > 0 ? delt : -delt;
2446 			error_num += (abs_delt > thres_hold ? 1 : 0);
2447 		}
2448 		if (error_num > 0) {
2449 			rte_memdump(stdout, "Buffer A", ref_out, data_len);
2450 			rte_memdump(stdout, "Buffer B", op_out, data_len);
2451 			TEST_ASSERT(error_num == 0,
2452 				"FFT Output are not matched total (%u) errors (%u)",
2453 				data_len_iq, error_num);
2454 		}
2455 
2456 		m = m->next;
2457 	}
2458 
2459 	return TEST_SUCCESS;
2460 }
2461 
2462 static int
2463 validate_fft_op(struct rte_bbdev_fft_op **ops, const uint16_t n,
2464 		struct rte_bbdev_fft_op *ref_op)
2465 {
2466 	unsigned int i;
2467 	int ret;
2468 	struct op_data_entries *fft_data_orig = &test_vector.entries[DATA_HARD_OUTPUT];
2469 	struct op_data_entries *fft_pwr_orig = &test_vector.entries[DATA_SOFT_OUTPUT];
2470 
2471 	for (i = 0; i < n; ++i) {
2472 		ret = check_fft_status_and_ordering(ops[i], i, ref_op->status);
2473 		TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for FFT failed");
2474 		TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2475 				&ops[i]->fft.base_output, fft_data_orig),
2476 				"FFT Output buffers (op=%u) are not matched", i);
2477 		if (check_bit(ops[i]->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS))
2478 			TEST_ASSERT_SUCCESS(validate_op_fft_chain(
2479 				&ops[i]->fft.power_meas_output, fft_pwr_orig),
2480 				"FFT Power Output buffers (op=%u) are not matched", i);
2481 	}
2482 
2483 	return TEST_SUCCESS;
2484 }
2485 
2486 static void
2487 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2488 {
2489 	unsigned int i;
2490 	struct op_data_entries *entry;
2491 
2492 	op->turbo_dec = test_vector.turbo_dec;
2493 	entry = &test_vector.entries[DATA_INPUT];
2494 	for (i = 0; i < entry->nb_segments; ++i)
2495 		op->turbo_dec.input.length +=
2496 				entry->segments[i].length;
2497 }
2498 
2499 static void
2500 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2501 {
2502 	unsigned int i;
2503 	struct op_data_entries *entry;
2504 
2505 	op->ldpc_dec = test_vector.ldpc_dec;
2506 	entry = &test_vector.entries[DATA_INPUT];
2507 	for (i = 0; i < entry->nb_segments; ++i)
2508 		op->ldpc_dec.input.length +=
2509 				entry->segments[i].length;
2510 	if (test_vector.ldpc_dec.op_flags &
2511 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2512 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2513 		for (i = 0; i < entry->nb_segments; ++i)
2514 			op->ldpc_dec.harq_combined_input.length +=
2515 				entry->segments[i].length;
2516 	}
2517 }
2518 
2519 static void
2520 create_reference_fft_op(struct rte_bbdev_fft_op *op)
2521 {
2522 	unsigned int i;
2523 	struct op_data_entries *entry;
2524 	op->fft = test_vector.fft;
2525 	entry = &test_vector.entries[DATA_INPUT];
2526 	for (i = 0; i < entry->nb_segments; ++i)
2527 		op->fft.base_input.length += entry->segments[i].length;
2528 }
2529 
2530 static void
2531 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2532 {
2533 	unsigned int i;
2534 	struct op_data_entries *entry;
2535 
2536 	op->turbo_enc = test_vector.turbo_enc;
2537 	entry = &test_vector.entries[DATA_INPUT];
2538 	for (i = 0; i < entry->nb_segments; ++i)
2539 		op->turbo_enc.input.length +=
2540 				entry->segments[i].length;
2541 }
2542 
2543 static void
2544 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2545 {
2546 	unsigned int i;
2547 	struct op_data_entries *entry;
2548 
2549 	op->ldpc_enc = test_vector.ldpc_enc;
2550 	entry = &test_vector.entries[DATA_INPUT];
2551 	for (i = 0; i < entry->nb_segments; ++i)
2552 		op->ldpc_enc.input.length +=
2553 				entry->segments[i].length;
2554 }
2555 
2556 static uint32_t
2557 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2558 {
2559 	uint8_t i;
2560 	uint32_t c, r, tb_size = 0;
2561 
2562 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2563 		tb_size = op->turbo_dec.tb_params.k_neg;
2564 	} else {
2565 		c = op->turbo_dec.tb_params.c;
2566 		r = op->turbo_dec.tb_params.r;
2567 		for (i = 0; i < c-r; i++)
2568 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2569 				op->turbo_dec.tb_params.k_neg :
2570 				op->turbo_dec.tb_params.k_pos;
2571 	}
2572 	return tb_size;
2573 }
2574 
2575 static uint32_t
2576 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2577 {
2578 	uint8_t i;
2579 	uint32_t c, r, tb_size = 0;
2580 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2581 
2582 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2583 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2584 	} else {
2585 		c = op->ldpc_dec.tb_params.c;
2586 		r = op->ldpc_dec.tb_params.r;
2587 		for (i = 0; i < c-r; i++)
2588 			tb_size += sys_cols * op->ldpc_dec.z_c
2589 					- op->ldpc_dec.n_filler;
2590 	}
2591 	return tb_size;
2592 }
2593 
2594 static uint32_t
2595 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2596 {
2597 	uint8_t i;
2598 	uint32_t c, r, tb_size = 0;
2599 
2600 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2601 		tb_size = op->turbo_enc.tb_params.k_neg;
2602 	} else {
2603 		c = op->turbo_enc.tb_params.c;
2604 		r = op->turbo_enc.tb_params.r;
2605 		for (i = 0; i < c-r; i++)
2606 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2607 				op->turbo_enc.tb_params.k_neg :
2608 				op->turbo_enc.tb_params.k_pos;
2609 	}
2610 	return tb_size;
2611 }
2612 
2613 static uint32_t
2614 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2615 {
2616 	uint8_t i;
2617 	uint32_t c, r, tb_size = 0;
2618 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2619 
2620 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2621 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2622 	} else {
2623 		c = op->turbo_enc.tb_params.c;
2624 		r = op->turbo_enc.tb_params.r;
2625 		for (i = 0; i < c-r; i++)
2626 			tb_size += sys_cols * op->ldpc_enc.z_c
2627 					- op->ldpc_enc.n_filler;
2628 	}
2629 	return tb_size;
2630 }
2631 
2632 static uint32_t
2633 calc_fft_size(struct rte_bbdev_fft_op *op)
2634 {
2635 	uint32_t output_size;
2636 	int num_cs = 0, i;
2637 	for (i = 0; i < 12; i++)
2638 		if (check_bit(op->fft.cs_bitmap, 1 << i))
2639 			num_cs++;
2640 	output_size = (num_cs * op->fft.output_sequence_size * 4) << op->fft.num_antennas_log2;
2641 	return output_size;
2642 }
2643 
2644 static int
2645 init_test_op_params(struct test_op_params *op_params,
2646 		enum rte_bbdev_op_type op_type, const int expected_status,
2647 		const int vector_mask, struct rte_mempool *ops_mp,
2648 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2649 {
2650 	int ret = 0;
2651 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2652 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2653 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2654 				&op_params->ref_dec_op, 1);
2655 	else if (op_type == RTE_BBDEV_OP_FFT)
2656 		ret = rte_bbdev_fft_op_alloc_bulk(ops_mp,
2657 				&op_params->ref_fft_op, 1);
2658 	else
2659 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2660 				&op_params->ref_enc_op, 1);
2661 
2662 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2663 
2664 	op_params->mp = ops_mp;
2665 	op_params->burst_sz = burst_sz;
2666 	op_params->num_to_process = num_to_process;
2667 	op_params->num_lcores = num_lcores;
2668 	op_params->vector_mask = vector_mask;
2669 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2670 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2671 		op_params->ref_dec_op->status = expected_status;
2672 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2673 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2674 		op_params->ref_enc_op->status = expected_status;
2675 	else if (op_type == RTE_BBDEV_OP_FFT)
2676 		op_params->ref_fft_op->status = expected_status;
2677 	return 0;
2678 }
2679 
2680 static int
2681 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2682 		struct test_op_params *op_params)
2683 {
2684 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2685 	unsigned int i;
2686 	struct active_device *ad;
2687 	unsigned int burst_sz = get_burst_sz();
2688 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2689 	const struct rte_bbdev_op_cap *capabilities = NULL;
2690 
2691 	ad = &active_devs[dev_id];
2692 
2693 	/* Check if device supports op_type */
2694 	if (!is_avail_op(ad, test_vector.op_type))
2695 		return TEST_SUCCESS;
2696 
2697 	struct rte_bbdev_info info;
2698 	rte_bbdev_info_get(ad->dev_id, &info);
2699 	socket_id = GET_SOCKET(info.socket_id);
2700 
2701 	f_ret = create_mempools(ad, socket_id, op_type,
2702 			get_num_ops());
2703 	if (f_ret != TEST_SUCCESS) {
2704 		printf("Couldn't create mempools");
2705 		goto fail;
2706 	}
2707 	if (op_type == RTE_BBDEV_OP_NONE)
2708 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2709 
2710 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2711 			test_vector.expected_status,
2712 			test_vector.mask,
2713 			ad->ops_mempool,
2714 			burst_sz,
2715 			get_num_ops(),
2716 			get_num_lcores());
2717 	if (f_ret != TEST_SUCCESS) {
2718 		printf("Couldn't init test op params");
2719 		goto fail;
2720 	}
2721 
2722 
2723 	/* Find capabilities */
2724 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2725 	do {
2726 		if (cap->type == test_vector.op_type) {
2727 			capabilities = cap;
2728 			break;
2729 		}
2730 		cap++;
2731 	} while (cap->type != RTE_BBDEV_OP_NONE);
2732 	TEST_ASSERT_NOT_NULL(capabilities,
2733 			"Couldn't find capabilities");
2734 
2735 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2736 		create_reference_dec_op(op_params->ref_dec_op);
2737 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2738 		create_reference_enc_op(op_params->ref_enc_op);
2739 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2740 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2741 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2742 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2743 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
2744 		create_reference_fft_op(op_params->ref_fft_op);
2745 
2746 	for (i = 0; i < ad->nb_queues; ++i) {
2747 		f_ret = fill_queue_buffers(op_params,
2748 				ad->in_mbuf_pool,
2749 				ad->hard_out_mbuf_pool,
2750 				ad->soft_out_mbuf_pool,
2751 				ad->harq_in_mbuf_pool,
2752 				ad->harq_out_mbuf_pool,
2753 				ad->queue_ids[i],
2754 				capabilities,
2755 				info.drv.min_alignment,
2756 				socket_id);
2757 		if (f_ret != TEST_SUCCESS) {
2758 			printf("Couldn't init queue buffers");
2759 			goto fail;
2760 		}
2761 	}
2762 
2763 	/* Run test case function */
2764 	t_ret = test_case_func(ad, op_params);
2765 
2766 	/* Free active device resources and return */
2767 	free_buffers(ad, op_params);
2768 	return t_ret;
2769 
2770 fail:
2771 	free_buffers(ad, op_params);
2772 	return TEST_FAILED;
2773 }
2774 
2775 /* Run given test function per active device per supported op type
2776  * per burst size.
2777  */
2778 static int
2779 run_test_case(test_case_function *test_case_func)
2780 {
2781 	int ret = 0;
2782 	uint8_t dev;
2783 
2784 	/* Alloc op_params */
2785 	struct test_op_params *op_params = rte_zmalloc(NULL,
2786 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2787 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2788 			RTE_ALIGN(sizeof(struct test_op_params),
2789 				RTE_CACHE_LINE_SIZE));
2790 
2791 	/* For each device run test case function */
2792 	for (dev = 0; dev < nb_active_devs; ++dev)
2793 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2794 
2795 	rte_free(op_params);
2796 
2797 	return ret;
2798 }
2799 
2800 
2801 /* Push back the HARQ output from DDR to host */
2802 static void
2803 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2804 		struct rte_bbdev_dec_op **ops,
2805 		const uint16_t n)
2806 {
2807 	uint16_t j;
2808 	int save_status, ret;
2809 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2810 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2811 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2812 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2813 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2814 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2815 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2816 	for (j = 0; j < n; ++j) {
2817 		if ((loopback && mem_out) || hc_out) {
2818 			save_status = ops[j]->status;
2819 			ops[j]->ldpc_dec.op_flags =
2820 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2821 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2822 			if (h_comp)
2823 				ops[j]->ldpc_dec.op_flags +=
2824 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2825 			ops[j]->ldpc_dec.harq_combined_input.offset =
2826 					harq_offset;
2827 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2828 			harq_offset += HARQ_INCR;
2829 			if (!loopback)
2830 				ops[j]->ldpc_dec.harq_combined_input.length =
2831 				ops[j]->ldpc_dec.harq_combined_output.length;
2832 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2833 					&ops[j], 1);
2834 			ret = 0;
2835 			while (ret == 0)
2836 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2837 						dev_id, queue_id,
2838 						&ops_deq[j], 1);
2839 			ops[j]->ldpc_dec.op_flags = flags;
2840 			ops[j]->status = save_status;
2841 		}
2842 	}
2843 }
2844 
2845 /*
2846  * Push back the HARQ output from HW DDR to Host
2847  * Preload HARQ memory input and adjust HARQ offset
2848  */
2849 static void
2850 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2851 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2852 		bool preload)
2853 {
2854 	uint16_t j;
2855 	int deq;
2856 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2857 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2858 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2859 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2860 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2861 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2862 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2863 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2864 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2865 	if ((mem_in || hc_in) && preload) {
2866 		for (j = 0; j < n; ++j) {
2867 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2868 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2869 			ops[j]->ldpc_dec.op_flags =
2870 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2871 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2872 			if (h_comp)
2873 				ops[j]->ldpc_dec.op_flags +=
2874 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2875 			ops[j]->ldpc_dec.harq_combined_output.offset =
2876 					harq_offset;
2877 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2878 			harq_offset += HARQ_INCR;
2879 		}
2880 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2881 		deq = 0;
2882 		while (deq != n)
2883 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2884 					dev_id, queue_id, &ops_deq[deq],
2885 					n - deq);
2886 		/* Restore the operations */
2887 		for (j = 0; j < n; ++j) {
2888 			ops[j]->ldpc_dec.op_flags = flags;
2889 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2890 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2891 		}
2892 	}
2893 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2894 	for (j = 0; j < n; ++j) {
2895 		/* Adjust HARQ offset when we reach external DDR */
2896 		if (mem_in || hc_in)
2897 			ops[j]->ldpc_dec.harq_combined_input.offset
2898 				= harq_offset;
2899 		if (mem_out || hc_out)
2900 			ops[j]->ldpc_dec.harq_combined_output.offset
2901 				= harq_offset;
2902 		harq_offset += HARQ_INCR;
2903 	}
2904 }
2905 
2906 static void
2907 dequeue_event_callback(uint16_t dev_id,
2908 		enum rte_bbdev_event_type event, void *cb_arg,
2909 		void *ret_param)
2910 {
2911 	int ret;
2912 	uint16_t i;
2913 	uint64_t total_time;
2914 	uint16_t deq, burst_sz, num_ops;
2915 	uint16_t queue_id = *(uint16_t *) ret_param;
2916 	struct rte_bbdev_info info;
2917 	double tb_len_bits;
2918 	struct thread_params *tp = cb_arg;
2919 
2920 	/* Find matching thread params using queue_id */
2921 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2922 		if (tp->queue_id == queue_id)
2923 			break;
2924 
2925 	if (i == MAX_QUEUES) {
2926 		printf("%s: Queue_id from interrupt details was not found!\n",
2927 				__func__);
2928 		return;
2929 	}
2930 
2931 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2932 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2933 		printf(
2934 			"Dequeue interrupt handler called for incorrect event!\n");
2935 		return;
2936 	}
2937 
2938 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
2939 	num_ops = tp->op_params->num_to_process;
2940 
2941 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2942 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2943 				&tp->dec_ops[
2944 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2945 				burst_sz);
2946 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2947 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2948 				&tp->dec_ops[
2949 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2950 				burst_sz);
2951 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2952 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2953 				&tp->enc_ops[
2954 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2955 				burst_sz);
2956 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
2957 		deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
2958 				&tp->fft_ops[
2959 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2960 				burst_sz);
2961 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2962 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2963 				&tp->enc_ops[
2964 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2965 				burst_sz);
2966 
2967 	if (deq < burst_sz) {
2968 		printf(
2969 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2970 			burst_sz, deq);
2971 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2972 		return;
2973 	}
2974 
2975 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
2976 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2977 		return;
2978 	}
2979 
2980 	total_time = rte_rdtsc_precise() - tp->start_time;
2981 
2982 	rte_bbdev_info_get(dev_id, &info);
2983 
2984 	ret = TEST_SUCCESS;
2985 
2986 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2987 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2988 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2989 				tp->op_params->vector_mask);
2990 		/* get the max of iter_count for all dequeued ops */
2991 		for (i = 0; i < num_ops; ++i)
2992 			tp->iter_count = RTE_MAX(
2993 					tp->dec_ops[i]->turbo_dec.iter_count,
2994 					tp->iter_count);
2995 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2996 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2997 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2998 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2999 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3000 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
3001 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3002 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
3003 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
3004 	} else if (test_vector.op_type == RTE_BBDEV_OP_FFT) {
3005 		struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
3006 		ret = validate_fft_op(tp->fft_ops, num_ops, ref_op);
3007 		rte_bbdev_fft_op_free_bulk(tp->fft_ops, deq);
3008 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
3009 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3010 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
3011 				tp->op_params->vector_mask);
3012 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
3013 	}
3014 
3015 	if (ret) {
3016 		printf("Buffers validation failed\n");
3017 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3018 	}
3019 
3020 	switch (test_vector.op_type) {
3021 	case RTE_BBDEV_OP_TURBO_DEC:
3022 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
3023 		break;
3024 	case RTE_BBDEV_OP_TURBO_ENC:
3025 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
3026 		break;
3027 	case RTE_BBDEV_OP_LDPC_DEC:
3028 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
3029 		break;
3030 	case RTE_BBDEV_OP_FFT:
3031 		tb_len_bits = calc_fft_size(tp->op_params->ref_fft_op);
3032 		break;
3033 	case RTE_BBDEV_OP_LDPC_ENC:
3034 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
3035 		break;
3036 	case RTE_BBDEV_OP_NONE:
3037 		tb_len_bits = 0.0;
3038 		break;
3039 	default:
3040 		printf("Unknown op type: %d\n", test_vector.op_type);
3041 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
3042 		return;
3043 	}
3044 
3045 	tp->ops_per_sec += ((double)num_ops) /
3046 			((double)total_time / (double)rte_get_tsc_hz());
3047 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
3048 			((double)total_time / (double)rte_get_tsc_hz());
3049 
3050 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
3051 }
3052 
3053 static int
3054 throughput_intr_lcore_ldpc_dec(void *arg)
3055 {
3056 	struct thread_params *tp = arg;
3057 	unsigned int enqueued;
3058 	const uint16_t queue_id = tp->queue_id;
3059 	const uint16_t burst_sz = tp->op_params->burst_sz;
3060 	const uint16_t num_to_process = tp->op_params->num_to_process;
3061 	struct rte_bbdev_dec_op *ops[num_to_process];
3062 	struct test_buffers *bufs = NULL;
3063 	struct rte_bbdev_info info;
3064 	int ret, i, j;
3065 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3066 	uint16_t num_to_enq, enq;
3067 
3068 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3069 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3070 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3071 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3072 
3073 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3074 			"BURST_SIZE should be <= %u", MAX_BURST);
3075 
3076 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3077 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3078 			tp->dev_id, queue_id);
3079 
3080 	rte_bbdev_info_get(tp->dev_id, &info);
3081 
3082 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3083 			"NUM_OPS cannot exceed %u for this device",
3084 			info.drv.queue_size_lim);
3085 
3086 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3087 
3088 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3089 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3090 
3091 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3092 
3093 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3094 				num_to_process);
3095 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3096 			num_to_process);
3097 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3098 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
3099 				bufs->hard_outputs, bufs->soft_outputs,
3100 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3101 
3102 	/* Set counter to validate the ordering */
3103 	for (j = 0; j < num_to_process; ++j)
3104 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3105 
3106 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3107 		for (i = 0; i < num_to_process; ++i) {
3108 			if (!loopback)
3109 				rte_pktmbuf_reset(
3110 					ops[i]->ldpc_dec.hard_output.data);
3111 			if (hc_out || loopback)
3112 				mbuf_reset(
3113 				ops[i]->ldpc_dec.harq_combined_output.data);
3114 		}
3115 
3116 		tp->start_time = rte_rdtsc_precise();
3117 		for (enqueued = 0; enqueued < num_to_process;) {
3118 			num_to_enq = burst_sz;
3119 
3120 			if (unlikely(num_to_process - enqueued < num_to_enq))
3121 				num_to_enq = num_to_process - enqueued;
3122 
3123 			enq = 0;
3124 			do {
3125 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
3126 						tp->dev_id,
3127 						queue_id, &ops[enqueued],
3128 						num_to_enq);
3129 			} while (unlikely(num_to_enq != enq));
3130 			enqueued += enq;
3131 
3132 			/* Write to thread burst_sz current number of enqueued
3133 			 * descriptors. It ensures that proper number of
3134 			 * descriptors will be dequeued in callback
3135 			 * function - needed for last batch in case where
3136 			 * the number of operations is not a multiple of
3137 			 * burst size.
3138 			 */
3139 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3140 
3141 			/* Wait until processing of previous batch is
3142 			 * completed
3143 			 */
3144 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3145 		}
3146 		if (j != TEST_REPETITIONS - 1)
3147 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3148 	}
3149 
3150 	return TEST_SUCCESS;
3151 }
3152 
3153 static int
3154 throughput_intr_lcore_dec(void *arg)
3155 {
3156 	struct thread_params *tp = arg;
3157 	unsigned int enqueued;
3158 	const uint16_t queue_id = tp->queue_id;
3159 	const uint16_t burst_sz = tp->op_params->burst_sz;
3160 	const uint16_t num_to_process = tp->op_params->num_to_process;
3161 	struct rte_bbdev_dec_op *ops[num_to_process];
3162 	struct test_buffers *bufs = NULL;
3163 	struct rte_bbdev_info info;
3164 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3165 	int ret, i, j;
3166 	uint16_t num_to_enq, enq;
3167 
3168 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3169 			"BURST_SIZE should be <= %u", MAX_BURST);
3170 
3171 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3172 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3173 			tp->dev_id, queue_id);
3174 
3175 	rte_bbdev_info_get(tp->dev_id, &info);
3176 
3177 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3178 			"NUM_OPS cannot exceed %u for this device",
3179 			info.drv.queue_size_lim);
3180 
3181 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3182 
3183 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3184 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3185 
3186 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3187 
3188 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
3189 				num_to_process);
3190 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_to_process);
3191 	ref_op->turbo_dec.iter_max = get_iter_max();
3192 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3193 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
3194 				bufs->hard_outputs, bufs->soft_outputs,
3195 				tp->op_params->ref_dec_op);
3196 
3197 	/* Set counter to validate the ordering. */
3198 	for (j = 0; j < num_to_process; ++j)
3199 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3200 
3201 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3202 		for (i = 0; i < num_to_process; ++i) {
3203 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
3204 			if (ops[i]->turbo_dec.soft_output.data != NULL)
3205 				rte_pktmbuf_reset(ops[i]->turbo_dec.soft_output.data);
3206 		}
3207 
3208 
3209 		tp->start_time = rte_rdtsc_precise();
3210 		for (enqueued = 0; enqueued < num_to_process;) {
3211 			num_to_enq = burst_sz;
3212 
3213 			if (unlikely(num_to_process - enqueued < num_to_enq))
3214 				num_to_enq = num_to_process - enqueued;
3215 
3216 			enq = 0;
3217 			do {
3218 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3219 						queue_id, &ops[enqueued],
3220 						num_to_enq);
3221 			} while (unlikely(num_to_enq != enq));
3222 			enqueued += enq;
3223 
3224 			/* Write to thread burst_sz current number of enqueued
3225 			 * descriptors. It ensures that proper number of
3226 			 * descriptors will be dequeued in callback
3227 			 * function - needed for last batch in case where
3228 			 * the number of operations is not a multiple of
3229 			 * burst size.
3230 			 */
3231 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3232 
3233 			/* Wait until processing of previous batch is
3234 			 * completed
3235 			 */
3236 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3237 		}
3238 		if (j != TEST_REPETITIONS - 1)
3239 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3240 	}
3241 
3242 	return TEST_SUCCESS;
3243 }
3244 
3245 static int
3246 throughput_intr_lcore_enc(void *arg)
3247 {
3248 	struct thread_params *tp = arg;
3249 	unsigned int enqueued;
3250 	const uint16_t queue_id = tp->queue_id;
3251 	const uint16_t burst_sz = tp->op_params->burst_sz;
3252 	const uint16_t num_to_process = tp->op_params->num_to_process;
3253 	struct rte_bbdev_enc_op *ops[num_to_process];
3254 	struct test_buffers *bufs = NULL;
3255 	struct rte_bbdev_info info;
3256 	int ret, i, j;
3257 	uint16_t num_to_enq, enq;
3258 
3259 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3260 			"BURST_SIZE should be <= %u", MAX_BURST);
3261 
3262 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3263 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3264 			tp->dev_id, queue_id);
3265 
3266 	rte_bbdev_info_get(tp->dev_id, &info);
3267 
3268 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3269 			"NUM_OPS cannot exceed %u for this device",
3270 			info.drv.queue_size_lim);
3271 
3272 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3273 
3274 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3275 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3276 
3277 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3278 
3279 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3280 			num_to_process);
3281 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3282 			num_to_process);
3283 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3284 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
3285 				bufs->hard_outputs, tp->op_params->ref_enc_op);
3286 
3287 	/* Set counter to validate the ordering */
3288 	for (j = 0; j < num_to_process; ++j)
3289 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3290 
3291 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3292 		for (i = 0; i < num_to_process; ++i)
3293 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3294 
3295 		tp->start_time = rte_rdtsc_precise();
3296 		for (enqueued = 0; enqueued < num_to_process;) {
3297 			num_to_enq = burst_sz;
3298 
3299 			if (unlikely(num_to_process - enqueued < num_to_enq))
3300 				num_to_enq = num_to_process - enqueued;
3301 
3302 			enq = 0;
3303 			do {
3304 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3305 						queue_id, &ops[enqueued],
3306 						num_to_enq);
3307 			} while (unlikely(enq != num_to_enq));
3308 			enqueued += enq;
3309 
3310 			/* Write to thread burst_sz current number of enqueued
3311 			 * descriptors. It ensures that proper number of
3312 			 * descriptors will be dequeued in callback
3313 			 * function - needed for last batch in case where
3314 			 * the number of operations is not a multiple of
3315 			 * burst size.
3316 			 */
3317 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3318 
3319 			/* Wait until processing of previous batch is
3320 			 * completed
3321 			 */
3322 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3323 		}
3324 		if (j != TEST_REPETITIONS - 1)
3325 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3326 	}
3327 
3328 	return TEST_SUCCESS;
3329 }
3330 
3331 
3332 static int
3333 throughput_intr_lcore_ldpc_enc(void *arg)
3334 {
3335 	struct thread_params *tp = arg;
3336 	unsigned int enqueued;
3337 	const uint16_t queue_id = tp->queue_id;
3338 	const uint16_t burst_sz = tp->op_params->burst_sz;
3339 	const uint16_t num_to_process = tp->op_params->num_to_process;
3340 	struct rte_bbdev_enc_op *ops[num_to_process];
3341 	struct test_buffers *bufs = NULL;
3342 	struct rte_bbdev_info info;
3343 	int ret, i, j;
3344 	uint16_t num_to_enq, enq;
3345 
3346 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3347 			"BURST_SIZE should be <= %u", MAX_BURST);
3348 
3349 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3350 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3351 			tp->dev_id, queue_id);
3352 
3353 	rte_bbdev_info_get(tp->dev_id, &info);
3354 
3355 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3356 			"NUM_OPS cannot exceed %u for this device",
3357 			info.drv.queue_size_lim);
3358 
3359 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3360 
3361 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3362 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3363 
3364 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3365 
3366 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3367 			num_to_process);
3368 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3369 			num_to_process);
3370 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3371 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3372 				bufs->inputs, bufs->hard_outputs,
3373 				tp->op_params->ref_enc_op);
3374 
3375 	/* Set counter to validate the ordering */
3376 	for (j = 0; j < num_to_process; ++j)
3377 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3378 
3379 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3380 		for (i = 0; i < num_to_process; ++i)
3381 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3382 
3383 		tp->start_time = rte_rdtsc_precise();
3384 		for (enqueued = 0; enqueued < num_to_process;) {
3385 			num_to_enq = burst_sz;
3386 
3387 			if (unlikely(num_to_process - enqueued < num_to_enq))
3388 				num_to_enq = num_to_process - enqueued;
3389 
3390 			enq = 0;
3391 			do {
3392 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3393 						tp->dev_id,
3394 						queue_id, &ops[enqueued],
3395 						num_to_enq);
3396 			} while (unlikely(enq != num_to_enq));
3397 			enqueued += enq;
3398 
3399 			/* Write to thread burst_sz current number of enqueued
3400 			 * descriptors. It ensures that proper number of
3401 			 * descriptors will be dequeued in callback
3402 			 * function - needed for last batch in case where
3403 			 * the number of operations is not a multiple of
3404 			 * burst size.
3405 			 */
3406 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3407 
3408 			/* Wait until processing of previous batch is
3409 			 * completed
3410 			 */
3411 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3412 		}
3413 		if (j != TEST_REPETITIONS - 1)
3414 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3415 	}
3416 
3417 	return TEST_SUCCESS;
3418 }
3419 
3420 
3421 static int
3422 throughput_intr_lcore_fft(void *arg)
3423 {
3424 	struct thread_params *tp = arg;
3425 	unsigned int enqueued;
3426 	const uint16_t queue_id = tp->queue_id;
3427 	const uint16_t burst_sz = tp->op_params->burst_sz;
3428 	const uint16_t num_to_process = tp->op_params->num_to_process;
3429 	struct rte_bbdev_fft_op *ops[num_to_process];
3430 	struct test_buffers *bufs = NULL;
3431 	struct rte_bbdev_info info;
3432 	int ret, i, j;
3433 	uint16_t num_to_enq, enq;
3434 
3435 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3436 			"BURST_SIZE should be <= %u", MAX_BURST);
3437 
3438 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3439 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3440 			tp->dev_id, queue_id);
3441 
3442 	rte_bbdev_info_get(tp->dev_id, &info);
3443 
3444 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3445 			"NUM_OPS cannot exceed %u for this device",
3446 			info.drv.queue_size_lim);
3447 
3448 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3449 
3450 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3451 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3452 
3453 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3454 
3455 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops,
3456 			num_to_process);
3457 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3458 			num_to_process);
3459 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3460 		copy_reference_fft_op(ops, num_to_process, 0, bufs->inputs,
3461 				bufs->hard_outputs, bufs->soft_outputs, tp->op_params->ref_fft_op);
3462 
3463 	/* Set counter to validate the ordering */
3464 	for (j = 0; j < num_to_process; ++j)
3465 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3466 
3467 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3468 		for (i = 0; i < num_to_process; ++i)
3469 			rte_pktmbuf_reset(ops[i]->fft.base_output.data);
3470 
3471 		tp->start_time = rte_rdtsc_precise();
3472 		for (enqueued = 0; enqueued < num_to_process;) {
3473 			num_to_enq = burst_sz;
3474 
3475 			if (unlikely(num_to_process - enqueued < num_to_enq))
3476 				num_to_enq = num_to_process - enqueued;
3477 
3478 			enq = 0;
3479 			do {
3480 				enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
3481 						queue_id, &ops[enqueued],
3482 						num_to_enq);
3483 			} while (unlikely(enq != num_to_enq));
3484 			enqueued += enq;
3485 
3486 			/* Write to thread burst_sz current number of enqueued
3487 			 * descriptors. It ensures that proper number of
3488 			 * descriptors will be dequeued in callback
3489 			 * function - needed for last batch in case where
3490 			 * the number of operations is not a multiple of
3491 			 * burst size.
3492 			 */
3493 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3494 
3495 			/* Wait until processing of previous batch is
3496 			 * completed
3497 			 */
3498 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3499 		}
3500 		if (j != TEST_REPETITIONS - 1)
3501 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3502 	}
3503 
3504 	return TEST_SUCCESS;
3505 }
3506 
3507 static int
3508 throughput_pmd_lcore_dec(void *arg)
3509 {
3510 	struct thread_params *tp = arg;
3511 	uint16_t enq, deq;
3512 	uint64_t total_time = 0, start_time;
3513 	const uint16_t queue_id = tp->queue_id;
3514 	const uint16_t burst_sz = tp->op_params->burst_sz;
3515 	const uint16_t num_ops = tp->op_params->num_to_process;
3516 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3517 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3518 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3519 	struct test_buffers *bufs = NULL;
3520 	int i, j, ret;
3521 	struct rte_bbdev_info info;
3522 	uint16_t num_to_enq;
3523 	bool so_enable;
3524 
3525 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3526 			"BURST_SIZE should be <= %u", MAX_BURST);
3527 
3528 	rte_bbdev_info_get(tp->dev_id, &info);
3529 
3530 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3531 			"NUM_OPS cannot exceed %u for this device",
3532 			info.drv.queue_size_lim);
3533 
3534 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3535 
3536 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3537 
3538 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3539 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3540 	ref_op->turbo_dec.iter_max = get_iter_max();
3541 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3542 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3543 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3544 
3545 	so_enable = check_bit(ops_enq[0]->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT);
3546 
3547 	/* Set counter to validate the ordering */
3548 	for (j = 0; j < num_ops; ++j)
3549 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3550 
3551 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3552 
3553 		for (j = 0; j < num_ops; ++j)
3554 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3555 		if (so_enable)
3556 			for (j = 0; j < num_ops; ++j)
3557 				mbuf_reset(ops_enq[j]->turbo_dec.soft_output.data);
3558 
3559 		start_time = rte_rdtsc_precise();
3560 
3561 		for (enq = 0, deq = 0; enq < num_ops;) {
3562 			num_to_enq = burst_sz;
3563 
3564 			if (unlikely(num_ops - enq < num_to_enq))
3565 				num_to_enq = num_ops - enq;
3566 
3567 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3568 					queue_id, &ops_enq[enq], num_to_enq);
3569 
3570 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3571 					queue_id, &ops_deq[deq], enq - deq);
3572 		}
3573 
3574 		/* dequeue the remaining */
3575 		while (deq < enq) {
3576 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3577 					queue_id, &ops_deq[deq], enq - deq);
3578 		}
3579 
3580 		total_time += rte_rdtsc_precise() - start_time;
3581 	}
3582 
3583 	tp->iter_count = 0;
3584 	/* get the max of iter_count for all dequeued ops */
3585 	for (i = 0; i < num_ops; ++i) {
3586 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3587 				tp->iter_count);
3588 	}
3589 
3590 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3591 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3592 				tp->op_params->vector_mask);
3593 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3594 	}
3595 
3596 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3597 
3598 	double tb_len_bits = calc_dec_TB_size(ref_op);
3599 
3600 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3601 			((double)total_time / (double)rte_get_tsc_hz());
3602 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3603 			1000000.0) / ((double)total_time /
3604 			(double)rte_get_tsc_hz());
3605 
3606 	return TEST_SUCCESS;
3607 }
3608 
3609 static int
3610 bler_pmd_lcore_ldpc_dec(void *arg)
3611 {
3612 	struct thread_params *tp = arg;
3613 	uint16_t enq, deq;
3614 	uint64_t total_time = 0, start_time;
3615 	const uint16_t queue_id = tp->queue_id;
3616 	const uint16_t burst_sz = tp->op_params->burst_sz;
3617 	const uint16_t num_ops = tp->op_params->num_to_process;
3618 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3619 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3620 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3621 	struct test_buffers *bufs = NULL;
3622 	int i, j, ret;
3623 	float parity_bler = 0;
3624 	struct rte_bbdev_info info;
3625 	uint16_t num_to_enq;
3626 	bool extDdr = check_bit(ldpc_cap_flags,
3627 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3628 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3629 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3630 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3631 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3632 
3633 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3634 			"BURST_SIZE should be <= %u", MAX_BURST);
3635 
3636 	rte_bbdev_info_get(tp->dev_id, &info);
3637 
3638 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3639 			"NUM_OPS cannot exceed %u for this device",
3640 			info.drv.queue_size_lim);
3641 
3642 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3643 
3644 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3645 
3646 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3647 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3648 
3649 	/* For BLER tests we need to enable early termination */
3650 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3651 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3652 		ref_op->ldpc_dec.op_flags +=
3653 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3654 	ref_op->ldpc_dec.iter_max = get_iter_max();
3655 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3656 
3657 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3658 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3659 				bufs->hard_outputs, bufs->soft_outputs,
3660 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3661 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3662 
3663 	/* Set counter to validate the ordering */
3664 	for (j = 0; j < num_ops; ++j)
3665 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3666 
3667 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3668 		for (j = 0; j < num_ops; ++j) {
3669 			if (!loopback)
3670 				mbuf_reset(
3671 				ops_enq[j]->ldpc_dec.hard_output.data);
3672 			if (hc_out || loopback)
3673 				mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data);
3674 		}
3675 		if (extDdr)
3676 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3677 					num_ops, true);
3678 		start_time = rte_rdtsc_precise();
3679 
3680 		for (enq = 0, deq = 0; enq < num_ops;) {
3681 			num_to_enq = burst_sz;
3682 
3683 			if (unlikely(num_ops - enq < num_to_enq))
3684 				num_to_enq = num_ops - enq;
3685 
3686 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3687 					queue_id, &ops_enq[enq], num_to_enq);
3688 
3689 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3690 					queue_id, &ops_deq[deq], enq - deq);
3691 		}
3692 
3693 		/* dequeue the remaining */
3694 		while (deq < enq) {
3695 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3696 					queue_id, &ops_deq[deq], enq - deq);
3697 		}
3698 
3699 		total_time += rte_rdtsc_precise() - start_time;
3700 	}
3701 
3702 	tp->iter_count = 0;
3703 	tp->iter_average = 0;
3704 	/* get the max of iter_count for all dequeued ops */
3705 	for (i = 0; i < num_ops; ++i) {
3706 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3707 				tp->iter_count);
3708 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3709 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3710 			parity_bler += 1.0;
3711 	}
3712 
3713 	parity_bler /= num_ops; /* This one is based on SYND */
3714 	tp->iter_average /= num_ops;
3715 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3716 
3717 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3718 			&& tp->bler == 0
3719 			&& parity_bler == 0
3720 			&& !hc_out) {
3721 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3722 				tp->op_params->vector_mask);
3723 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3724 	}
3725 
3726 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3727 
3728 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3729 	tp->ops_per_sec = ((double)num_ops * 1) /
3730 			((double)total_time / (double)rte_get_tsc_hz());
3731 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3732 			1000000.0) / ((double)total_time /
3733 			(double)rte_get_tsc_hz());
3734 
3735 	return TEST_SUCCESS;
3736 }
3737 
3738 static int
3739 throughput_pmd_lcore_ldpc_dec(void *arg)
3740 {
3741 	struct thread_params *tp = arg;
3742 	uint16_t enq, deq;
3743 	uint64_t total_time = 0, start_time;
3744 	const uint16_t queue_id = tp->queue_id;
3745 	const uint16_t burst_sz = tp->op_params->burst_sz;
3746 	const uint16_t num_ops = tp->op_params->num_to_process;
3747 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3748 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3749 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3750 	struct test_buffers *bufs = NULL;
3751 	int i, j, ret;
3752 	struct rte_bbdev_info info;
3753 	uint16_t num_to_enq;
3754 	bool extDdr = check_bit(ldpc_cap_flags,
3755 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3756 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3757 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3758 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3759 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3760 
3761 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3762 			"BURST_SIZE should be <= %u", MAX_BURST);
3763 
3764 	rte_bbdev_info_get(tp->dev_id, &info);
3765 
3766 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3767 			"NUM_OPS cannot exceed %u for this device",
3768 			info.drv.queue_size_lim);
3769 
3770 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3771 
3772 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3773 
3774 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3775 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3776 
3777 	/* For throughput tests we need to disable early termination */
3778 	if (check_bit(ref_op->ldpc_dec.op_flags,
3779 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3780 		ref_op->ldpc_dec.op_flags -=
3781 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3782 	ref_op->ldpc_dec.iter_max = get_iter_max();
3783 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3784 
3785 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3786 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3787 				bufs->hard_outputs, bufs->soft_outputs,
3788 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3789 
3790 	/* Set counter to validate the ordering */
3791 	for (j = 0; j < num_ops; ++j)
3792 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3793 
3794 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3795 		for (j = 0; j < num_ops; ++j) {
3796 			if (!loopback)
3797 				mbuf_reset(
3798 				ops_enq[j]->ldpc_dec.hard_output.data);
3799 			if (hc_out || loopback)
3800 				mbuf_reset(
3801 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3802 		}
3803 		if (extDdr)
3804 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3805 					num_ops, true);
3806 		start_time = rte_rdtsc_precise();
3807 
3808 		for (enq = 0, deq = 0; enq < num_ops;) {
3809 			num_to_enq = burst_sz;
3810 
3811 			if (unlikely(num_ops - enq < num_to_enq))
3812 				num_to_enq = num_ops - enq;
3813 
3814 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3815 					queue_id, &ops_enq[enq], num_to_enq);
3816 
3817 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3818 					queue_id, &ops_deq[deq], enq - deq);
3819 		}
3820 
3821 		/* dequeue the remaining */
3822 		while (deq < enq) {
3823 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3824 					queue_id, &ops_deq[deq], enq - deq);
3825 		}
3826 
3827 		total_time += rte_rdtsc_precise() - start_time;
3828 	}
3829 
3830 	tp->iter_count = 0;
3831 	/* get the max of iter_count for all dequeued ops */
3832 	for (i = 0; i < num_ops; ++i) {
3833 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3834 				tp->iter_count);
3835 	}
3836 	if (extDdr) {
3837 		/* Read loopback is not thread safe */
3838 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3839 	}
3840 
3841 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3842 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3843 				tp->op_params->vector_mask);
3844 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3845 	}
3846 
3847 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3848 
3849 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3850 
3851 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3852 			((double)total_time / (double)rte_get_tsc_hz());
3853 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3854 			1000000.0) / ((double)total_time /
3855 			(double)rte_get_tsc_hz());
3856 
3857 	return TEST_SUCCESS;
3858 }
3859 
3860 static int
3861 throughput_pmd_lcore_enc(void *arg)
3862 {
3863 	struct thread_params *tp = arg;
3864 	uint16_t enq, deq;
3865 	uint64_t total_time = 0, start_time;
3866 	const uint16_t queue_id = tp->queue_id;
3867 	const uint16_t burst_sz = tp->op_params->burst_sz;
3868 	const uint16_t num_ops = tp->op_params->num_to_process;
3869 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3870 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3871 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3872 	struct test_buffers *bufs = NULL;
3873 	int i, j, ret;
3874 	struct rte_bbdev_info info;
3875 	uint16_t num_to_enq;
3876 
3877 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3878 			"BURST_SIZE should be <= %u", MAX_BURST);
3879 
3880 	rte_bbdev_info_get(tp->dev_id, &info);
3881 
3882 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3883 			"NUM_OPS cannot exceed %u for this device",
3884 			info.drv.queue_size_lim);
3885 
3886 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3887 
3888 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3889 
3890 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3891 			num_ops);
3892 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3893 			num_ops);
3894 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3895 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3896 				bufs->hard_outputs, ref_op);
3897 
3898 	/* Set counter to validate the ordering */
3899 	for (j = 0; j < num_ops; ++j)
3900 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3901 
3902 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3903 
3904 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3905 			for (j = 0; j < num_ops; ++j)
3906 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3907 
3908 		start_time = rte_rdtsc_precise();
3909 
3910 		for (enq = 0, deq = 0; enq < num_ops;) {
3911 			num_to_enq = burst_sz;
3912 
3913 			if (unlikely(num_ops - enq < num_to_enq))
3914 				num_to_enq = num_ops - enq;
3915 
3916 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3917 					queue_id, &ops_enq[enq], num_to_enq);
3918 
3919 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3920 					queue_id, &ops_deq[deq], enq - deq);
3921 		}
3922 
3923 		/* dequeue the remaining */
3924 		while (deq < enq) {
3925 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3926 					queue_id, &ops_deq[deq], enq - deq);
3927 		}
3928 
3929 		total_time += rte_rdtsc_precise() - start_time;
3930 	}
3931 
3932 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3933 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3934 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3935 	}
3936 
3937 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3938 
3939 	double tb_len_bits = calc_enc_TB_size(ref_op);
3940 
3941 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3942 			((double)total_time / (double)rte_get_tsc_hz());
3943 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3944 			/ 1000000.0) / ((double)total_time /
3945 			(double)rte_get_tsc_hz());
3946 
3947 	return TEST_SUCCESS;
3948 }
3949 
3950 static int
3951 throughput_pmd_lcore_ldpc_enc(void *arg)
3952 {
3953 	struct thread_params *tp = arg;
3954 	uint16_t enq, deq;
3955 	uint64_t total_time = 0, start_time;
3956 	const uint16_t queue_id = tp->queue_id;
3957 	const uint16_t burst_sz = tp->op_params->burst_sz;
3958 	const uint16_t num_ops = tp->op_params->num_to_process;
3959 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3960 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3961 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3962 	struct test_buffers *bufs = NULL;
3963 	int i, j, ret;
3964 	struct rte_bbdev_info info;
3965 	uint16_t num_to_enq;
3966 
3967 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3968 			"BURST_SIZE should be <= %u", MAX_BURST);
3969 
3970 	rte_bbdev_info_get(tp->dev_id, &info);
3971 
3972 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3973 			"NUM_OPS cannot exceed %u for this device",
3974 			info.drv.queue_size_lim);
3975 
3976 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3977 
3978 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3979 
3980 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3981 			num_ops);
3982 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3983 			num_ops);
3984 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3985 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3986 				bufs->hard_outputs, ref_op);
3987 
3988 	/* Set counter to validate the ordering */
3989 	for (j = 0; j < num_ops; ++j)
3990 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3991 
3992 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3993 
3994 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3995 			for (j = 0; j < num_ops; ++j)
3996 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3997 
3998 		start_time = rte_rdtsc_precise();
3999 
4000 		for (enq = 0, deq = 0; enq < num_ops;) {
4001 			num_to_enq = burst_sz;
4002 
4003 			if (unlikely(num_ops - enq < num_to_enq))
4004 				num_to_enq = num_ops - enq;
4005 
4006 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
4007 					queue_id, &ops_enq[enq], num_to_enq);
4008 
4009 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4010 					queue_id, &ops_deq[deq], enq - deq);
4011 		}
4012 
4013 		/* dequeue the remaining */
4014 		while (deq < enq) {
4015 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
4016 					queue_id, &ops_deq[deq], enq - deq);
4017 		}
4018 
4019 		total_time += rte_rdtsc_precise() - start_time;
4020 	}
4021 
4022 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4023 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
4024 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4025 	}
4026 
4027 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
4028 
4029 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
4030 
4031 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4032 			((double)total_time / (double)rte_get_tsc_hz());
4033 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
4034 			/ 1000000.0) / ((double)total_time /
4035 			(double)rte_get_tsc_hz());
4036 
4037 	return TEST_SUCCESS;
4038 }
4039 
4040 static int
4041 throughput_pmd_lcore_fft(void *arg)
4042 {
4043 	struct thread_params *tp = arg;
4044 	uint16_t enq, deq;
4045 	uint64_t total_time = 0, start_time;
4046 	const uint16_t queue_id = tp->queue_id;
4047 	const uint16_t burst_sz = tp->op_params->burst_sz;
4048 	const uint16_t num_ops = tp->op_params->num_to_process;
4049 	struct rte_bbdev_fft_op *ops_enq[num_ops];
4050 	struct rte_bbdev_fft_op *ops_deq[num_ops];
4051 	struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op;
4052 	struct test_buffers *bufs = NULL;
4053 	int i, j, ret;
4054 	struct rte_bbdev_info info;
4055 	uint16_t num_to_enq;
4056 
4057 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4058 			"BURST_SIZE should be <= %u", MAX_BURST);
4059 
4060 	rte_bbdev_info_get(tp->dev_id, &info);
4061 
4062 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
4063 			"NUM_OPS cannot exceed %u for this device",
4064 			info.drv.queue_size_lim);
4065 
4066 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4067 
4068 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4069 
4070 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
4071 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
4072 
4073 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4074 		copy_reference_fft_op(ops_enq, num_ops, 0, bufs->inputs,
4075 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
4076 
4077 	/* Set counter to validate the ordering */
4078 	for (j = 0; j < num_ops; ++j)
4079 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4080 
4081 	for (i = 0; i < TEST_REPETITIONS; ++i) {
4082 
4083 		for (j = 0; j < num_ops; ++j)
4084 			mbuf_reset(ops_enq[j]->fft.base_output.data);
4085 
4086 		start_time = rte_rdtsc_precise();
4087 
4088 		for (enq = 0, deq = 0; enq < num_ops;) {
4089 			num_to_enq = burst_sz;
4090 
4091 			if (unlikely(num_ops - enq < num_to_enq))
4092 				num_to_enq = num_ops - enq;
4093 
4094 			enq += rte_bbdev_enqueue_fft_ops(tp->dev_id,
4095 					queue_id, &ops_enq[enq], num_to_enq);
4096 
4097 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4098 					queue_id, &ops_deq[deq], enq - deq);
4099 		}
4100 
4101 		/* dequeue the remaining */
4102 		while (deq < enq) {
4103 			deq += rte_bbdev_dequeue_fft_ops(tp->dev_id,
4104 					queue_id, &ops_deq[deq], enq - deq);
4105 		}
4106 
4107 		total_time += rte_rdtsc_precise() - start_time;
4108 	}
4109 
4110 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4111 		ret = validate_fft_op(ops_deq, num_ops, ref_op);
4112 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4113 	}
4114 
4115 	rte_bbdev_fft_op_free_bulk(ops_enq, num_ops);
4116 
4117 	double tb_len_bits = calc_fft_size(ref_op);
4118 
4119 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
4120 			((double)total_time / (double)rte_get_tsc_hz());
4121 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
4122 			1000000.0) / ((double)total_time /
4123 			(double)rte_get_tsc_hz());
4124 
4125 	return TEST_SUCCESS;
4126 }
4127 
4128 static void
4129 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
4130 {
4131 	unsigned int iter = 0;
4132 	double total_mops = 0, total_mbps = 0;
4133 
4134 	for (iter = 0; iter < used_cores; iter++) {
4135 		printf(
4136 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
4137 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
4138 			t_params[iter].mbps);
4139 		total_mops += t_params[iter].ops_per_sec;
4140 		total_mbps += t_params[iter].mbps;
4141 	}
4142 	printf(
4143 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
4144 		used_cores, total_mops, total_mbps);
4145 }
4146 
4147 /* Aggregate the performance results over the number of cores used */
4148 static void
4149 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
4150 {
4151 	unsigned int core_idx = 0;
4152 	double total_mops = 0, total_mbps = 0;
4153 	uint8_t iter_count = 0;
4154 
4155 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4156 		printf(
4157 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
4158 			t_params[core_idx].lcore_id,
4159 			t_params[core_idx].ops_per_sec,
4160 			t_params[core_idx].mbps,
4161 			t_params[core_idx].iter_count);
4162 		total_mops += t_params[core_idx].ops_per_sec;
4163 		total_mbps += t_params[core_idx].mbps;
4164 		iter_count = RTE_MAX(iter_count,
4165 				t_params[core_idx].iter_count);
4166 	}
4167 	printf(
4168 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
4169 		used_cores, total_mops, total_mbps, iter_count);
4170 }
4171 
4172 /* Aggregate the performance results over the number of cores used */
4173 static void
4174 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
4175 {
4176 	unsigned int core_idx = 0;
4177 	double total_mbps = 0, total_bler = 0, total_iter = 0;
4178 	double snr = get_snr();
4179 
4180 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
4181 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
4182 				t_params[core_idx].lcore_id,
4183 				t_params[core_idx].bler * 100,
4184 				t_params[core_idx].iter_average,
4185 				t_params[core_idx].mbps,
4186 				get_vector_filename());
4187 		total_mbps += t_params[core_idx].mbps;
4188 		total_bler += t_params[core_idx].bler;
4189 		total_iter += t_params[core_idx].iter_average;
4190 	}
4191 	total_bler /= used_cores;
4192 	total_iter /= used_cores;
4193 
4194 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
4195 			snr, total_bler * 100, total_iter, get_iter_max(),
4196 			total_mbps, get_vector_filename());
4197 }
4198 
4199 /*
4200  * Test function that determines BLER wireless performance
4201  */
4202 static int
4203 bler_test(struct active_device *ad,
4204 		struct test_op_params *op_params)
4205 {
4206 	int ret;
4207 	unsigned int lcore_id, used_cores = 0;
4208 	struct thread_params *t_params;
4209 	struct rte_bbdev_info info;
4210 	lcore_function_t *bler_function;
4211 	uint16_t num_lcores;
4212 	const char *op_type_str;
4213 
4214 	rte_bbdev_info_get(ad->dev_id, &info);
4215 
4216 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4217 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4218 			test_vector.op_type);
4219 
4220 	printf("+ ------------------------------------------------------- +\n");
4221 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4222 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4223 			op_params->num_to_process, op_params->num_lcores,
4224 			op_type_str,
4225 			intr_enabled ? "Interrupt mode" : "PMD mode",
4226 			(double)rte_get_tsc_hz() / 1000000000.0);
4227 
4228 	/* Set number of lcores */
4229 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4230 			? ad->nb_queues
4231 			: op_params->num_lcores;
4232 
4233 	/* Allocate memory for thread parameters structure */
4234 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4235 			RTE_CACHE_LINE_SIZE);
4236 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4237 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4238 				RTE_CACHE_LINE_SIZE));
4239 
4240 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
4241 			!check_bit(test_vector.ldpc_dec.op_flags,
4242 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
4243 			&& !check_bit(test_vector.ldpc_dec.op_flags,
4244 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
4245 		bler_function = bler_pmd_lcore_ldpc_dec;
4246 	else
4247 		return TEST_SKIPPED;
4248 
4249 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4250 
4251 	/* Main core is set at first entry */
4252 	t_params[0].dev_id = ad->dev_id;
4253 	t_params[0].lcore_id = rte_lcore_id();
4254 	t_params[0].op_params = op_params;
4255 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4256 	t_params[0].iter_count = 0;
4257 
4258 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4259 		if (used_cores >= num_lcores)
4260 			break;
4261 
4262 		t_params[used_cores].dev_id = ad->dev_id;
4263 		t_params[used_cores].lcore_id = lcore_id;
4264 		t_params[used_cores].op_params = op_params;
4265 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4266 		t_params[used_cores].iter_count = 0;
4267 
4268 		rte_eal_remote_launch(bler_function,
4269 				&t_params[used_cores++], lcore_id);
4270 	}
4271 
4272 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4273 	ret = bler_function(&t_params[0]);
4274 
4275 	/* Main core is always used */
4276 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4277 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4278 
4279 	print_dec_bler(t_params, num_lcores);
4280 
4281 	/* Return if test failed */
4282 	if (ret) {
4283 		rte_free(t_params);
4284 		return ret;
4285 	}
4286 
4287 	/* Function to print something  here*/
4288 	rte_free(t_params);
4289 	return ret;
4290 }
4291 
4292 /*
4293  * Test function that determines how long an enqueue + dequeue of a burst
4294  * takes on available lcores.
4295  */
4296 static int
4297 throughput_test(struct active_device *ad,
4298 		struct test_op_params *op_params)
4299 {
4300 	int ret;
4301 	unsigned int lcore_id, used_cores = 0;
4302 	struct thread_params *t_params, *tp;
4303 	struct rte_bbdev_info info;
4304 	lcore_function_t *throughput_function;
4305 	uint16_t num_lcores;
4306 	const char *op_type_str;
4307 
4308 	rte_bbdev_info_get(ad->dev_id, &info);
4309 
4310 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
4311 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
4312 			test_vector.op_type);
4313 
4314 	printf("+ ------------------------------------------------------- +\n");
4315 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
4316 			info.dev_name, ad->nb_queues, op_params->burst_sz,
4317 			op_params->num_to_process, op_params->num_lcores,
4318 			op_type_str,
4319 			intr_enabled ? "Interrupt mode" : "PMD mode",
4320 			(double)rte_get_tsc_hz() / 1000000000.0);
4321 
4322 	/* Set number of lcores */
4323 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
4324 			? ad->nb_queues
4325 			: op_params->num_lcores;
4326 
4327 	/* Allocate memory for thread parameters structure */
4328 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
4329 			RTE_CACHE_LINE_SIZE);
4330 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
4331 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
4332 				RTE_CACHE_LINE_SIZE));
4333 
4334 	if (intr_enabled) {
4335 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4336 			throughput_function = throughput_intr_lcore_dec;
4337 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4338 			throughput_function = throughput_intr_lcore_ldpc_dec;
4339 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4340 			throughput_function = throughput_intr_lcore_enc;
4341 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4342 			throughput_function = throughput_intr_lcore_ldpc_enc;
4343 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4344 			throughput_function = throughput_intr_lcore_fft;
4345 		else
4346 			throughput_function = throughput_intr_lcore_enc;
4347 
4348 		/* Dequeue interrupt callback registration */
4349 		ret = rte_bbdev_callback_register(ad->dev_id,
4350 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
4351 				t_params);
4352 		if (ret < 0) {
4353 			rte_free(t_params);
4354 			return ret;
4355 		}
4356 	} else {
4357 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
4358 			throughput_function = throughput_pmd_lcore_dec;
4359 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4360 			throughput_function = throughput_pmd_lcore_ldpc_dec;
4361 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
4362 			throughput_function = throughput_pmd_lcore_enc;
4363 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4364 			throughput_function = throughput_pmd_lcore_ldpc_enc;
4365 		else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
4366 			throughput_function = throughput_pmd_lcore_fft;
4367 		else
4368 			throughput_function = throughput_pmd_lcore_enc;
4369 	}
4370 
4371 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
4372 
4373 	/* Main core is set at first entry */
4374 	t_params[0].dev_id = ad->dev_id;
4375 	t_params[0].lcore_id = rte_lcore_id();
4376 	t_params[0].op_params = op_params;
4377 	t_params[0].queue_id = ad->queue_ids[used_cores++];
4378 	t_params[0].iter_count = 0;
4379 
4380 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
4381 		if (used_cores >= num_lcores)
4382 			break;
4383 
4384 		t_params[used_cores].dev_id = ad->dev_id;
4385 		t_params[used_cores].lcore_id = lcore_id;
4386 		t_params[used_cores].op_params = op_params;
4387 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
4388 		t_params[used_cores].iter_count = 0;
4389 
4390 		rte_eal_remote_launch(throughput_function,
4391 				&t_params[used_cores++], lcore_id);
4392 	}
4393 
4394 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
4395 	ret = throughput_function(&t_params[0]);
4396 
4397 	/* Main core is always used */
4398 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
4399 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
4400 
4401 	/* Return if test failed */
4402 	if (ret) {
4403 		rte_free(t_params);
4404 		return ret;
4405 	}
4406 
4407 	/* Print throughput if interrupts are disabled and test passed */
4408 	if (!intr_enabled) {
4409 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4410 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4411 			print_dec_throughput(t_params, num_lcores);
4412 		else
4413 			print_enc_throughput(t_params, num_lcores);
4414 		rte_free(t_params);
4415 		return ret;
4416 	}
4417 
4418 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
4419 	 * all pending operations. Skip waiting for queues which reported an
4420 	 * error using processing_status variable.
4421 	 * Wait for main lcore operations.
4422 	 */
4423 	tp = &t_params[0];
4424 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4425 		op_params->num_to_process) &&
4426 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4427 		TEST_FAILED))
4428 		rte_pause();
4429 
4430 	tp->ops_per_sec /= TEST_REPETITIONS;
4431 	tp->mbps /= TEST_REPETITIONS;
4432 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4433 
4434 	/* Wait for worker lcores operations */
4435 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
4436 		tp = &t_params[used_cores];
4437 
4438 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
4439 			op_params->num_to_process) &&
4440 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
4441 			TEST_FAILED))
4442 			rte_pause();
4443 
4444 		tp->ops_per_sec /= TEST_REPETITIONS;
4445 		tp->mbps /= TEST_REPETITIONS;
4446 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
4447 	}
4448 
4449 	/* Print throughput if test passed */
4450 	if (!ret) {
4451 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
4452 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
4453 			print_dec_throughput(t_params, num_lcores);
4454 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
4455 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
4456 			print_enc_throughput(t_params, num_lcores);
4457 	}
4458 
4459 	rte_free(t_params);
4460 	return ret;
4461 }
4462 
4463 static int
4464 latency_test_dec(struct rte_mempool *mempool,
4465 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4466 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4467 		const uint16_t num_to_process, uint16_t burst_sz,
4468 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4469 {
4470 	int ret = TEST_SUCCESS;
4471 	uint16_t i, j, dequeued;
4472 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4473 	uint64_t start_time = 0, last_time = 0;
4474 
4475 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4476 		uint16_t enq = 0, deq = 0;
4477 		bool first_time = true;
4478 		last_time = 0;
4479 
4480 		if (unlikely(num_to_process - dequeued < burst_sz))
4481 			burst_sz = num_to_process - dequeued;
4482 
4483 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4484 		TEST_ASSERT_SUCCESS(ret,
4485 				"rte_bbdev_dec_op_alloc_bulk() failed");
4486 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4487 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4488 					bufs->inputs,
4489 					bufs->hard_outputs,
4490 					bufs->soft_outputs,
4491 					ref_op);
4492 
4493 		/* Set counter to validate the ordering */
4494 		for (j = 0; j < burst_sz; ++j)
4495 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4496 
4497 		start_time = rte_rdtsc_precise();
4498 
4499 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4500 				burst_sz);
4501 		TEST_ASSERT(enq == burst_sz,
4502 				"Error enqueueing burst, expected %u, got %u",
4503 				burst_sz, enq);
4504 
4505 		/* Dequeue */
4506 		do {
4507 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4508 					&ops_deq[deq], burst_sz - deq);
4509 			if (likely(first_time && (deq > 0))) {
4510 				last_time = rte_rdtsc_precise() - start_time;
4511 				first_time = false;
4512 			}
4513 		} while (unlikely(burst_sz != deq));
4514 
4515 		*max_time = RTE_MAX(*max_time, last_time);
4516 		*min_time = RTE_MIN(*min_time, last_time);
4517 		*total_time += last_time;
4518 
4519 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4520 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4521 					vector_mask);
4522 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4523 		}
4524 
4525 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4526 		dequeued += deq;
4527 	}
4528 
4529 	return i;
4530 }
4531 
4532 /* Test case for latency/validation for LDPC Decoder */
4533 static int
4534 latency_test_ldpc_dec(struct rte_mempool *mempool,
4535 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4536 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4537 		const uint16_t num_to_process, uint16_t burst_sz,
4538 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4539 		bool disable_et)
4540 {
4541 	int ret = TEST_SUCCESS;
4542 	uint16_t i, j, dequeued;
4543 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4544 	uint64_t start_time = 0, last_time = 0;
4545 	bool extDdr = ldpc_cap_flags &
4546 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4547 
4548 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4549 		uint16_t enq = 0, deq = 0;
4550 		bool first_time = true;
4551 		last_time = 0;
4552 
4553 		if (unlikely(num_to_process - dequeued < burst_sz))
4554 			burst_sz = num_to_process - dequeued;
4555 
4556 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4557 		TEST_ASSERT_SUCCESS(ret,
4558 				"rte_bbdev_dec_op_alloc_bulk() failed");
4559 
4560 		/* For latency tests we need to disable early termination */
4561 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4562 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4563 			ref_op->ldpc_dec.op_flags -=
4564 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4565 		ref_op->ldpc_dec.iter_max = get_iter_max();
4566 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4567 
4568 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4569 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4570 					bufs->inputs,
4571 					bufs->hard_outputs,
4572 					bufs->soft_outputs,
4573 					bufs->harq_inputs,
4574 					bufs->harq_outputs,
4575 					ref_op);
4576 
4577 		if (extDdr)
4578 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4579 					burst_sz, true);
4580 
4581 		/* Set counter to validate the ordering */
4582 		for (j = 0; j < burst_sz; ++j)
4583 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4584 
4585 		start_time = rte_rdtsc_precise();
4586 
4587 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4588 				&ops_enq[enq], burst_sz);
4589 		TEST_ASSERT(enq == burst_sz,
4590 				"Error enqueueing burst, expected %u, got %u",
4591 				burst_sz, enq);
4592 
4593 		/* Dequeue */
4594 		do {
4595 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4596 					&ops_deq[deq], burst_sz - deq);
4597 			if (likely(first_time && (deq > 0))) {
4598 				last_time = rte_rdtsc_precise() - start_time;
4599 				first_time = false;
4600 			}
4601 		} while (unlikely(burst_sz != deq));
4602 
4603 		*max_time = RTE_MAX(*max_time, last_time);
4604 		*min_time = RTE_MIN(*min_time, last_time);
4605 		*total_time += last_time;
4606 
4607 		if (extDdr)
4608 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4609 
4610 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4611 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4612 					vector_mask);
4613 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4614 		}
4615 
4616 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4617 		dequeued += deq;
4618 	}
4619 	return i;
4620 }
4621 
4622 static int
4623 latency_test_enc(struct rte_mempool *mempool,
4624 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4625 		uint16_t dev_id, uint16_t queue_id,
4626 		const uint16_t num_to_process, uint16_t burst_sz,
4627 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4628 {
4629 	int ret = TEST_SUCCESS;
4630 	uint16_t i, j, dequeued;
4631 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4632 	uint64_t start_time = 0, last_time = 0;
4633 
4634 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4635 		uint16_t enq = 0, deq = 0;
4636 		bool first_time = true;
4637 		last_time = 0;
4638 
4639 		if (unlikely(num_to_process - dequeued < burst_sz))
4640 			burst_sz = num_to_process - dequeued;
4641 
4642 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4643 		TEST_ASSERT_SUCCESS(ret,
4644 				"rte_bbdev_enc_op_alloc_bulk() failed");
4645 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4646 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4647 					bufs->inputs,
4648 					bufs->hard_outputs,
4649 					ref_op);
4650 
4651 		/* Set counter to validate the ordering */
4652 		for (j = 0; j < burst_sz; ++j)
4653 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4654 
4655 		start_time = rte_rdtsc_precise();
4656 
4657 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4658 				burst_sz);
4659 		TEST_ASSERT(enq == burst_sz,
4660 				"Error enqueueing burst, expected %u, got %u",
4661 				burst_sz, enq);
4662 
4663 		/* Dequeue */
4664 		do {
4665 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4666 					&ops_deq[deq], burst_sz - deq);
4667 			if (likely(first_time && (deq > 0))) {
4668 				last_time += rte_rdtsc_precise() - start_time;
4669 				first_time = false;
4670 			}
4671 		} while (unlikely(burst_sz != deq));
4672 
4673 		*max_time = RTE_MAX(*max_time, last_time);
4674 		*min_time = RTE_MIN(*min_time, last_time);
4675 		*total_time += last_time;
4676 
4677 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4678 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4679 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4680 		}
4681 
4682 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4683 		dequeued += deq;
4684 	}
4685 
4686 	return i;
4687 }
4688 
4689 static int
4690 latency_test_ldpc_enc(struct rte_mempool *mempool,
4691 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4692 		uint16_t dev_id, uint16_t queue_id,
4693 		const uint16_t num_to_process, uint16_t burst_sz,
4694 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4695 {
4696 	int ret = TEST_SUCCESS;
4697 	uint16_t i, j, dequeued;
4698 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4699 	uint64_t start_time = 0, last_time = 0;
4700 
4701 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4702 		uint16_t enq = 0, deq = 0;
4703 		bool first_time = true;
4704 		last_time = 0;
4705 
4706 		if (unlikely(num_to_process - dequeued < burst_sz))
4707 			burst_sz = num_to_process - dequeued;
4708 
4709 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4710 		TEST_ASSERT_SUCCESS(ret,
4711 				"rte_bbdev_enc_op_alloc_bulk() failed");
4712 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4713 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4714 					bufs->inputs,
4715 					bufs->hard_outputs,
4716 					ref_op);
4717 
4718 		/* Set counter to validate the ordering */
4719 		for (j = 0; j < burst_sz; ++j)
4720 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4721 
4722 		start_time = rte_rdtsc_precise();
4723 
4724 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4725 				&ops_enq[enq], burst_sz);
4726 		TEST_ASSERT(enq == burst_sz,
4727 				"Error enqueueing burst, expected %u, got %u",
4728 				burst_sz, enq);
4729 
4730 		/* Dequeue */
4731 		do {
4732 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4733 					&ops_deq[deq], burst_sz - deq);
4734 			if (likely(first_time && (deq > 0))) {
4735 				last_time += rte_rdtsc_precise() - start_time;
4736 				first_time = false;
4737 			}
4738 		} while (unlikely(burst_sz != deq));
4739 
4740 		*max_time = RTE_MAX(*max_time, last_time);
4741 		*min_time = RTE_MIN(*min_time, last_time);
4742 		*total_time += last_time;
4743 
4744 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4745 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4746 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4747 		}
4748 
4749 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4750 		dequeued += deq;
4751 	}
4752 
4753 	return i;
4754 }
4755 
4756 
4757 static int
4758 latency_test_fft(struct rte_mempool *mempool,
4759 		struct test_buffers *bufs, struct rte_bbdev_fft_op *ref_op,
4760 		uint16_t dev_id, uint16_t queue_id,
4761 		const uint16_t num_to_process, uint16_t burst_sz,
4762 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4763 {
4764 	int ret = TEST_SUCCESS;
4765 	uint16_t i, j, dequeued;
4766 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4767 	uint64_t start_time = 0, last_time = 0;
4768 
4769 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4770 		uint16_t enq = 0, deq = 0;
4771 		bool first_time = true;
4772 		last_time = 0;
4773 
4774 		if (unlikely(num_to_process - dequeued < burst_sz))
4775 			burst_sz = num_to_process - dequeued;
4776 
4777 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
4778 		TEST_ASSERT_SUCCESS(ret,
4779 				"rte_bbdev_fft_op_alloc_bulk() failed");
4780 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4781 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
4782 					bufs->inputs,
4783 					bufs->hard_outputs, bufs->soft_outputs,
4784 					ref_op);
4785 
4786 		/* Set counter to validate the ordering */
4787 		for (j = 0; j < burst_sz; ++j)
4788 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4789 
4790 		start_time = rte_rdtsc_precise();
4791 
4792 		enq = rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
4793 				&ops_enq[enq], burst_sz);
4794 		TEST_ASSERT(enq == burst_sz,
4795 				"Error enqueueing burst, expected %u, got %u",
4796 				burst_sz, enq);
4797 
4798 		/* Dequeue */
4799 		do {
4800 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
4801 					&ops_deq[deq], burst_sz - deq);
4802 			if (likely(first_time && (deq > 0))) {
4803 				last_time += rte_rdtsc_precise() - start_time;
4804 				first_time = false;
4805 			}
4806 		} while (unlikely(burst_sz != deq));
4807 
4808 		*max_time = RTE_MAX(*max_time, last_time);
4809 		*min_time = RTE_MIN(*min_time, last_time);
4810 		*total_time += last_time;
4811 
4812 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4813 			ret = validate_fft_op(ops_deq, burst_sz, ref_op);
4814 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4815 		}
4816 
4817 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
4818 		dequeued += deq;
4819 	}
4820 
4821 	return i;
4822 }
4823 
4824 /* Common function for running validation and latency test cases */
4825 static int
4826 validation_latency_test(struct active_device *ad,
4827 		struct test_op_params *op_params, bool latency_flag)
4828 {
4829 	int iter;
4830 	uint16_t burst_sz = op_params->burst_sz;
4831 	const uint16_t num_to_process = op_params->num_to_process;
4832 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4833 	const uint16_t queue_id = ad->queue_ids[0];
4834 	struct test_buffers *bufs = NULL;
4835 	struct rte_bbdev_info info;
4836 	uint64_t total_time, min_time, max_time;
4837 	const char *op_type_str;
4838 
4839 	total_time = max_time = 0;
4840 	min_time = UINT64_MAX;
4841 
4842 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4843 			"BURST_SIZE should be <= %u", MAX_BURST);
4844 
4845 	rte_bbdev_info_get(ad->dev_id, &info);
4846 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4847 
4848 	op_type_str = rte_bbdev_op_type_str(op_type);
4849 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4850 
4851 	printf("+ ------------------------------------------------------- +\n");
4852 	if (latency_flag)
4853 		printf("== test: latency\ndev:");
4854 	else
4855 		printf("== test: validation\ndev:");
4856 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4857 			info.dev_name, burst_sz, num_to_process, op_type_str);
4858 
4859 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4860 		iter = latency_test_dec(op_params->mp, bufs,
4861 				op_params->ref_dec_op, op_params->vector_mask,
4862 				ad->dev_id, queue_id, num_to_process,
4863 				burst_sz, &total_time, &min_time, &max_time);
4864 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4865 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4866 				op_params->ref_enc_op, ad->dev_id, queue_id,
4867 				num_to_process, burst_sz, &total_time,
4868 				&min_time, &max_time);
4869 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4870 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4871 				op_params->ref_dec_op, op_params->vector_mask,
4872 				ad->dev_id, queue_id, num_to_process,
4873 				burst_sz, &total_time, &min_time, &max_time,
4874 				latency_flag);
4875 	else if (op_type == RTE_BBDEV_OP_FFT)
4876 		iter = latency_test_fft(op_params->mp, bufs,
4877 				op_params->ref_fft_op,
4878 				ad->dev_id, queue_id,
4879 				num_to_process, burst_sz, &total_time,
4880 				&min_time, &max_time);
4881 	else /* RTE_BBDEV_OP_TURBO_ENC */
4882 		iter = latency_test_enc(op_params->mp, bufs,
4883 				op_params->ref_enc_op,
4884 				ad->dev_id, queue_id,
4885 				num_to_process, burst_sz, &total_time,
4886 				&min_time, &max_time);
4887 
4888 	if (iter <= 0)
4889 		return TEST_FAILED;
4890 
4891 	printf("Operation latency:\n"
4892 			"\tavg: %lg cycles, %lg us\n"
4893 			"\tmin: %lg cycles, %lg us\n"
4894 			"\tmax: %lg cycles, %lg us\n",
4895 			(double)total_time / (double)iter,
4896 			(double)(total_time * 1000000) / (double)iter /
4897 			(double)rte_get_tsc_hz(), (double)min_time,
4898 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4899 			(double)max_time, (double)(max_time * 1000000) /
4900 			(double)rte_get_tsc_hz());
4901 
4902 	return TEST_SUCCESS;
4903 }
4904 
4905 static int
4906 latency_test(struct active_device *ad, struct test_op_params *op_params)
4907 {
4908 	return validation_latency_test(ad, op_params, true);
4909 }
4910 
4911 static int
4912 validation_test(struct active_device *ad, struct test_op_params *op_params)
4913 {
4914 	return validation_latency_test(ad, op_params, false);
4915 }
4916 
4917 static int
4918 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4919 		struct rte_bbdev_stats *stats)
4920 {
4921 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4922 	struct rte_bbdev_stats *q_stats;
4923 
4924 	if (queue_id >= dev->data->num_queues)
4925 		return -1;
4926 
4927 	q_stats = &dev->data->queues[queue_id].queue_stats;
4928 
4929 	stats->enqueued_count = q_stats->enqueued_count;
4930 	stats->dequeued_count = q_stats->dequeued_count;
4931 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4932 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4933 	stats->enqueue_warn_count = q_stats->enqueue_warn_count;
4934 	stats->dequeue_warn_count = q_stats->dequeue_warn_count;
4935 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4936 
4937 	return 0;
4938 }
4939 
4940 static int
4941 offload_latency_test_fft(struct rte_mempool *mempool, struct test_buffers *bufs,
4942 		struct rte_bbdev_fft_op *ref_op, uint16_t dev_id,
4943 		uint16_t queue_id, const uint16_t num_to_process,
4944 		uint16_t burst_sz, struct test_time_stats *time_st)
4945 {
4946 	int i, dequeued, ret;
4947 	struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4948 	uint64_t enq_start_time, deq_start_time;
4949 	uint64_t enq_sw_last_time, deq_last_time;
4950 	struct rte_bbdev_stats stats;
4951 
4952 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4953 		uint16_t enq = 0, deq = 0;
4954 
4955 		if (unlikely(num_to_process - dequeued < burst_sz))
4956 			burst_sz = num_to_process - dequeued;
4957 
4958 		ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz);
4959 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
4960 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4961 			copy_reference_fft_op(ops_enq, burst_sz, dequeued,
4962 					bufs->inputs,
4963 					bufs->hard_outputs, bufs->soft_outputs,
4964 					ref_op);
4965 
4966 		/* Start time meas for enqueue function offload latency */
4967 		enq_start_time = rte_rdtsc_precise();
4968 		do {
4969 			enq += rte_bbdev_enqueue_fft_ops(dev_id, queue_id,
4970 					&ops_enq[enq], burst_sz - enq);
4971 		} while (unlikely(burst_sz != enq));
4972 
4973 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4974 		TEST_ASSERT_SUCCESS(ret,
4975 				"Failed to get stats for queue (%u) of device (%u)",
4976 				queue_id, dev_id);
4977 
4978 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4979 				stats.acc_offload_cycles;
4980 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4981 				enq_sw_last_time);
4982 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4983 				enq_sw_last_time);
4984 		time_st->enq_sw_total_time += enq_sw_last_time;
4985 
4986 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4987 				stats.acc_offload_cycles);
4988 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4989 				stats.acc_offload_cycles);
4990 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4991 
4992 		/* give time for device to process ops */
4993 		rte_delay_us(WAIT_OFFLOAD_US);
4994 
4995 		/* Start time meas for dequeue function offload latency */
4996 		deq_start_time = rte_rdtsc_precise();
4997 		/* Dequeue one operation */
4998 		do {
4999 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5000 					&ops_deq[deq], enq);
5001 		} while (unlikely(deq == 0));
5002 
5003 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5004 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5005 				deq_last_time);
5006 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5007 				deq_last_time);
5008 		time_st->deq_total_time += deq_last_time;
5009 
5010 		/* Dequeue remaining operations if needed*/
5011 		while (burst_sz != deq)
5012 			deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
5013 					&ops_deq[deq], burst_sz - deq);
5014 
5015 		rte_bbdev_fft_op_free_bulk(ops_enq, deq);
5016 		dequeued += deq;
5017 	}
5018 
5019 	return i;
5020 }
5021 
5022 static int
5023 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
5024 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5025 		uint16_t queue_id, const uint16_t num_to_process,
5026 		uint16_t burst_sz, struct test_time_stats *time_st)
5027 {
5028 	int i, dequeued, ret;
5029 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5030 	uint64_t enq_start_time, deq_start_time;
5031 	uint64_t enq_sw_last_time, deq_last_time;
5032 	struct rte_bbdev_stats stats;
5033 
5034 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5035 		uint16_t enq = 0, deq = 0;
5036 
5037 		if (unlikely(num_to_process - dequeued < burst_sz))
5038 			burst_sz = num_to_process - dequeued;
5039 
5040 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5041 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5042 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5043 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
5044 					bufs->inputs,
5045 					bufs->hard_outputs,
5046 					bufs->soft_outputs,
5047 					ref_op);
5048 
5049 		/* Start time meas for enqueue function offload latency */
5050 		enq_start_time = rte_rdtsc_precise();
5051 		do {
5052 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
5053 					&ops_enq[enq], burst_sz - enq);
5054 		} while (unlikely(burst_sz != enq));
5055 
5056 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5057 		TEST_ASSERT_SUCCESS(ret,
5058 				"Failed to get stats for queue (%u) of device (%u)",
5059 				queue_id, dev_id);
5060 
5061 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
5062 				stats.acc_offload_cycles;
5063 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5064 				enq_sw_last_time);
5065 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5066 				enq_sw_last_time);
5067 		time_st->enq_sw_total_time += enq_sw_last_time;
5068 
5069 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5070 				stats.acc_offload_cycles);
5071 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5072 				stats.acc_offload_cycles);
5073 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5074 
5075 		/* give time for device to process ops */
5076 		rte_delay_us(WAIT_OFFLOAD_US);
5077 
5078 		/* Start time meas for dequeue function offload latency */
5079 		deq_start_time = rte_rdtsc_precise();
5080 		/* Dequeue one operation */
5081 		do {
5082 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5083 					&ops_deq[deq], enq);
5084 		} while (unlikely(deq == 0));
5085 
5086 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5087 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5088 				deq_last_time);
5089 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5090 				deq_last_time);
5091 		time_st->deq_total_time += deq_last_time;
5092 
5093 		/* Dequeue remaining operations if needed*/
5094 		while (burst_sz != deq)
5095 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
5096 					&ops_deq[deq], burst_sz - deq);
5097 
5098 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5099 		dequeued += deq;
5100 	}
5101 
5102 	return i;
5103 }
5104 
5105 static int
5106 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
5107 		struct test_buffers *bufs,
5108 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
5109 		uint16_t queue_id, const uint16_t num_to_process,
5110 		uint16_t burst_sz, struct test_time_stats *time_st)
5111 {
5112 	int i, dequeued, ret;
5113 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5114 	uint64_t enq_start_time, deq_start_time;
5115 	uint64_t enq_sw_last_time, deq_last_time;
5116 	struct rte_bbdev_stats stats;
5117 	bool extDdr = ldpc_cap_flags &
5118 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
5119 
5120 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5121 		uint16_t enq = 0, deq = 0;
5122 
5123 		if (unlikely(num_to_process - dequeued < burst_sz))
5124 			burst_sz = num_to_process - dequeued;
5125 
5126 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
5127 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz);
5128 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5129 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
5130 					bufs->inputs,
5131 					bufs->hard_outputs,
5132 					bufs->soft_outputs,
5133 					bufs->harq_inputs,
5134 					bufs->harq_outputs,
5135 					ref_op);
5136 
5137 		if (extDdr)
5138 			preload_harq_ddr(dev_id, queue_id, ops_enq,
5139 					burst_sz, true);
5140 
5141 		/* Start time meas for enqueue function offload latency */
5142 		enq_start_time = rte_rdtsc_precise();
5143 		do {
5144 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
5145 					&ops_enq[enq], burst_sz - enq);
5146 		} while (unlikely(burst_sz != enq));
5147 
5148 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5149 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5150 		TEST_ASSERT_SUCCESS(ret,
5151 				"Failed to get stats for queue (%u) of device (%u)",
5152 				queue_id, dev_id);
5153 
5154 		enq_sw_last_time -= stats.acc_offload_cycles;
5155 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5156 				enq_sw_last_time);
5157 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5158 				enq_sw_last_time);
5159 		time_st->enq_sw_total_time += enq_sw_last_time;
5160 
5161 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5162 				stats.acc_offload_cycles);
5163 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5164 				stats.acc_offload_cycles);
5165 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5166 
5167 		/* give time for device to process ops */
5168 		rte_delay_us(WAIT_OFFLOAD_US);
5169 
5170 		/* Start time meas for dequeue function offload latency */
5171 		deq_start_time = rte_rdtsc_precise();
5172 		/* Dequeue one operation */
5173 		do {
5174 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5175 					&ops_deq[deq], enq);
5176 		} while (unlikely(deq == 0));
5177 
5178 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5179 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5180 				deq_last_time);
5181 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5182 				deq_last_time);
5183 		time_st->deq_total_time += deq_last_time;
5184 
5185 		/* Dequeue remaining operations if needed*/
5186 		while (burst_sz != deq)
5187 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
5188 					&ops_deq[deq], burst_sz - deq);
5189 
5190 		if (extDdr) {
5191 			/* Read loopback is not thread safe */
5192 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
5193 		}
5194 
5195 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
5196 		dequeued += deq;
5197 	}
5198 
5199 	return i;
5200 }
5201 
5202 static int
5203 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
5204 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5205 		uint16_t queue_id, const uint16_t num_to_process,
5206 		uint16_t burst_sz, struct test_time_stats *time_st)
5207 {
5208 	int i, dequeued, ret;
5209 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5210 	uint64_t enq_start_time, deq_start_time;
5211 	uint64_t enq_sw_last_time, deq_last_time;
5212 	struct rte_bbdev_stats stats;
5213 
5214 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5215 		uint16_t enq = 0, deq = 0;
5216 
5217 		if (unlikely(num_to_process - dequeued < burst_sz))
5218 			burst_sz = num_to_process - dequeued;
5219 
5220 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5221 		TEST_ASSERT_SUCCESS(ret,
5222 				"rte_bbdev_enc_op_alloc_bulk() failed");
5223 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5224 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
5225 					bufs->inputs,
5226 					bufs->hard_outputs,
5227 					ref_op);
5228 
5229 		/* Start time meas for enqueue function offload latency */
5230 		enq_start_time = rte_rdtsc_precise();
5231 		do {
5232 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
5233 					&ops_enq[enq], burst_sz - enq);
5234 		} while (unlikely(burst_sz != enq));
5235 
5236 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5237 
5238 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5239 		TEST_ASSERT_SUCCESS(ret,
5240 				"Failed to get stats for queue (%u) of device (%u)",
5241 				queue_id, dev_id);
5242 		enq_sw_last_time -= stats.acc_offload_cycles;
5243 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5244 				enq_sw_last_time);
5245 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5246 				enq_sw_last_time);
5247 		time_st->enq_sw_total_time += enq_sw_last_time;
5248 
5249 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5250 				stats.acc_offload_cycles);
5251 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5252 				stats.acc_offload_cycles);
5253 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5254 
5255 		/* give time for device to process ops */
5256 		rte_delay_us(WAIT_OFFLOAD_US);
5257 
5258 		/* Start time meas for dequeue function offload latency */
5259 		deq_start_time = rte_rdtsc_precise();
5260 		/* Dequeue one operation */
5261 		do {
5262 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5263 					&ops_deq[deq], enq);
5264 		} while (unlikely(deq == 0));
5265 
5266 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5267 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5268 				deq_last_time);
5269 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5270 				deq_last_time);
5271 		time_st->deq_total_time += deq_last_time;
5272 
5273 		while (burst_sz != deq)
5274 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
5275 					&ops_deq[deq], burst_sz - deq);
5276 
5277 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5278 		dequeued += deq;
5279 	}
5280 
5281 	return i;
5282 }
5283 
5284 static int
5285 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
5286 		struct test_buffers *bufs,
5287 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
5288 		uint16_t queue_id, const uint16_t num_to_process,
5289 		uint16_t burst_sz, struct test_time_stats *time_st)
5290 {
5291 	int i, dequeued, ret;
5292 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
5293 	uint64_t enq_start_time, deq_start_time;
5294 	uint64_t enq_sw_last_time, deq_last_time;
5295 	struct rte_bbdev_stats stats;
5296 
5297 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
5298 		uint16_t enq = 0, deq = 0;
5299 
5300 		if (unlikely(num_to_process - dequeued < burst_sz))
5301 			burst_sz = num_to_process - dequeued;
5302 
5303 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
5304 		TEST_ASSERT_SUCCESS(ret,
5305 				"rte_bbdev_enc_op_alloc_bulk() failed");
5306 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
5307 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
5308 					bufs->inputs,
5309 					bufs->hard_outputs,
5310 					ref_op);
5311 
5312 		/* Start time meas for enqueue function offload latency */
5313 		enq_start_time = rte_rdtsc_precise();
5314 		do {
5315 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
5316 					&ops_enq[enq], burst_sz - enq);
5317 		} while (unlikely(burst_sz != enq));
5318 
5319 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
5320 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
5321 		TEST_ASSERT_SUCCESS(ret,
5322 				"Failed to get stats for queue (%u) of device (%u)",
5323 				queue_id, dev_id);
5324 
5325 		enq_sw_last_time -= stats.acc_offload_cycles;
5326 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
5327 				enq_sw_last_time);
5328 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
5329 				enq_sw_last_time);
5330 		time_st->enq_sw_total_time += enq_sw_last_time;
5331 
5332 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
5333 				stats.acc_offload_cycles);
5334 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
5335 				stats.acc_offload_cycles);
5336 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
5337 
5338 		/* give time for device to process ops */
5339 		rte_delay_us(WAIT_OFFLOAD_US);
5340 
5341 		/* Start time meas for dequeue function offload latency */
5342 		deq_start_time = rte_rdtsc_precise();
5343 		/* Dequeue one operation */
5344 		do {
5345 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5346 					&ops_deq[deq], enq);
5347 		} while (unlikely(deq == 0));
5348 
5349 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5350 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
5351 				deq_last_time);
5352 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
5353 				deq_last_time);
5354 		time_st->deq_total_time += deq_last_time;
5355 
5356 		while (burst_sz != deq)
5357 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
5358 					&ops_deq[deq], burst_sz - deq);
5359 
5360 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
5361 		dequeued += deq;
5362 	}
5363 
5364 	return i;
5365 }
5366 
5367 static int
5368 offload_cost_test(struct active_device *ad,
5369 		struct test_op_params *op_params)
5370 {
5371 	int iter;
5372 	uint16_t burst_sz = op_params->burst_sz;
5373 	const uint16_t num_to_process = op_params->num_to_process;
5374 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5375 	const uint16_t queue_id = ad->queue_ids[0];
5376 	struct test_buffers *bufs = NULL;
5377 	struct rte_bbdev_info info;
5378 	const char *op_type_str;
5379 	struct test_time_stats time_st;
5380 
5381 	memset(&time_st, 0, sizeof(struct test_time_stats));
5382 	time_st.enq_sw_min_time = UINT64_MAX;
5383 	time_st.enq_acc_min_time = UINT64_MAX;
5384 	time_st.deq_min_time = UINT64_MAX;
5385 
5386 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5387 			"BURST_SIZE should be <= %u", MAX_BURST);
5388 
5389 	rte_bbdev_info_get(ad->dev_id, &info);
5390 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
5391 
5392 	op_type_str = rte_bbdev_op_type_str(op_type);
5393 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5394 
5395 	printf("+ ------------------------------------------------------- +\n");
5396 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5397 			info.dev_name, burst_sz, num_to_process, op_type_str);
5398 
5399 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
5400 		iter = offload_latency_test_dec(op_params->mp, bufs,
5401 				op_params->ref_dec_op, ad->dev_id, queue_id,
5402 				num_to_process, burst_sz, &time_st);
5403 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
5404 		iter = offload_latency_test_enc(op_params->mp, bufs,
5405 				op_params->ref_enc_op, ad->dev_id, queue_id,
5406 				num_to_process, burst_sz, &time_st);
5407 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5408 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
5409 				op_params->ref_enc_op, ad->dev_id, queue_id,
5410 				num_to_process, burst_sz, &time_st);
5411 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5412 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
5413 			op_params->ref_dec_op, ad->dev_id, queue_id,
5414 			num_to_process, burst_sz, &time_st);
5415 	else if (op_type == RTE_BBDEV_OP_FFT)
5416 		iter = offload_latency_test_fft(op_params->mp, bufs,
5417 			op_params->ref_fft_op, ad->dev_id, queue_id,
5418 			num_to_process, burst_sz, &time_st);
5419 	else
5420 		iter = offload_latency_test_enc(op_params->mp, bufs,
5421 				op_params->ref_enc_op, ad->dev_id, queue_id,
5422 				num_to_process, burst_sz, &time_st);
5423 
5424 	if (iter <= 0)
5425 		return TEST_FAILED;
5426 
5427 	printf("Enqueue driver offload cost latency:\n"
5428 			"\tavg: %lg cycles, %lg us\n"
5429 			"\tmin: %lg cycles, %lg us\n"
5430 			"\tmax: %lg cycles, %lg us\n"
5431 			"Enqueue accelerator offload cost latency:\n"
5432 			"\tavg: %lg cycles, %lg us\n"
5433 			"\tmin: %lg cycles, %lg us\n"
5434 			"\tmax: %lg cycles, %lg us\n",
5435 			(double)time_st.enq_sw_total_time / (double)iter,
5436 			(double)(time_st.enq_sw_total_time * 1000000) /
5437 			(double)iter / (double)rte_get_tsc_hz(),
5438 			(double)time_st.enq_sw_min_time,
5439 			(double)(time_st.enq_sw_min_time * 1000000) /
5440 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
5441 			(double)(time_st.enq_sw_max_time * 1000000) /
5442 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
5443 			(double)iter,
5444 			(double)(time_st.enq_acc_total_time * 1000000) /
5445 			(double)iter / (double)rte_get_tsc_hz(),
5446 			(double)time_st.enq_acc_min_time,
5447 			(double)(time_st.enq_acc_min_time * 1000000) /
5448 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
5449 			(double)(time_st.enq_acc_max_time * 1000000) /
5450 			rte_get_tsc_hz());
5451 
5452 	printf("Dequeue offload cost latency - one op:\n"
5453 			"\tavg: %lg cycles, %lg us\n"
5454 			"\tmin: %lg cycles, %lg us\n"
5455 			"\tmax: %lg cycles, %lg us\n",
5456 			(double)time_st.deq_total_time / (double)iter,
5457 			(double)(time_st.deq_total_time * 1000000) /
5458 			(double)iter / (double)rte_get_tsc_hz(),
5459 			(double)time_st.deq_min_time,
5460 			(double)(time_st.deq_min_time * 1000000) /
5461 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
5462 			(double)(time_st.deq_max_time * 1000000) /
5463 			rte_get_tsc_hz());
5464 
5465 	struct rte_bbdev_stats stats = {0};
5466 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
5467 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
5468 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
5469 				"Mismatch in enqueue count %10"PRIu64" %d",
5470 				stats.enqueued_count, num_to_process);
5471 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
5472 				"Mismatch in dequeue count %10"PRIu64" %d",
5473 				stats.dequeued_count, num_to_process);
5474 	}
5475 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
5476 			"Enqueue count Error %10"PRIu64"",
5477 			stats.enqueue_err_count);
5478 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
5479 			"Dequeue count Error (%10"PRIu64"",
5480 			stats.dequeue_err_count);
5481 
5482 	return TEST_SUCCESS;
5483 }
5484 
5485 static int
5486 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
5487 		const uint16_t num_to_process, uint16_t burst_sz,
5488 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5489 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5490 {
5491 	int i, deq_total;
5492 	struct rte_bbdev_dec_op *ops[MAX_BURST];
5493 	uint64_t deq_start_time, deq_last_time;
5494 
5495 	/* Test deq offload latency from an empty queue */
5496 
5497 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5498 			++i, deq_total += burst_sz) {
5499 		deq_start_time = rte_rdtsc_precise();
5500 
5501 		if (unlikely(num_to_process - deq_total < burst_sz))
5502 			burst_sz = num_to_process - deq_total;
5503 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
5504 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
5505 					burst_sz);
5506 		else
5507 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
5508 					burst_sz);
5509 
5510 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5511 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5512 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5513 		*deq_total_time += deq_last_time;
5514 	}
5515 
5516 	return i;
5517 }
5518 
5519 static int
5520 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
5521 		const uint16_t num_to_process, uint16_t burst_sz,
5522 		uint64_t *deq_total_time, uint64_t *deq_min_time,
5523 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
5524 {
5525 	int i, deq_total;
5526 	struct rte_bbdev_enc_op *ops[MAX_BURST];
5527 	uint64_t deq_start_time, deq_last_time;
5528 
5529 	/* Test deq offload latency from an empty queue */
5530 	for (i = 0, deq_total = 0; deq_total < num_to_process;
5531 			++i, deq_total += burst_sz) {
5532 		deq_start_time = rte_rdtsc_precise();
5533 
5534 		if (unlikely(num_to_process - deq_total < burst_sz))
5535 			burst_sz = num_to_process - deq_total;
5536 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
5537 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
5538 					burst_sz);
5539 		else
5540 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
5541 					burst_sz);
5542 
5543 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
5544 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
5545 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
5546 		*deq_total_time += deq_last_time;
5547 	}
5548 
5549 	return i;
5550 }
5551 
5552 static int
5553 offload_latency_empty_q_test(struct active_device *ad,
5554 		struct test_op_params *op_params)
5555 {
5556 	int iter;
5557 	uint64_t deq_total_time, deq_min_time, deq_max_time;
5558 	uint16_t burst_sz = op_params->burst_sz;
5559 	const uint16_t num_to_process = op_params->num_to_process;
5560 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
5561 	const uint16_t queue_id = ad->queue_ids[0];
5562 	struct rte_bbdev_info info;
5563 	const char *op_type_str;
5564 
5565 	deq_total_time = deq_max_time = 0;
5566 	deq_min_time = UINT64_MAX;
5567 
5568 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
5569 			"BURST_SIZE should be <= %u", MAX_BURST);
5570 
5571 	rte_bbdev_info_get(ad->dev_id, &info);
5572 
5573 	op_type_str = rte_bbdev_op_type_str(op_type);
5574 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
5575 
5576 	printf("+ ------------------------------------------------------- +\n");
5577 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
5578 			info.dev_name, burst_sz, num_to_process, op_type_str);
5579 
5580 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
5581 			op_type == RTE_BBDEV_OP_LDPC_DEC)
5582 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
5583 				num_to_process, burst_sz, &deq_total_time,
5584 				&deq_min_time, &deq_max_time, op_type);
5585 	else
5586 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
5587 				num_to_process, burst_sz, &deq_total_time,
5588 				&deq_min_time, &deq_max_time, op_type);
5589 
5590 	if (iter <= 0)
5591 		return TEST_FAILED;
5592 
5593 	printf("Empty dequeue offload:\n"
5594 			"\tavg: %lg cycles, %lg us\n"
5595 			"\tmin: %lg cycles, %lg us\n"
5596 			"\tmax: %lg cycles, %lg us\n",
5597 			(double)deq_total_time / (double)iter,
5598 			(double)(deq_total_time * 1000000) / (double)iter /
5599 			(double)rte_get_tsc_hz(), (double)deq_min_time,
5600 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
5601 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
5602 			rte_get_tsc_hz());
5603 
5604 	return TEST_SUCCESS;
5605 }
5606 
5607 static int
5608 bler_tc(void)
5609 {
5610 	return run_test_case(bler_test);
5611 }
5612 
5613 static int
5614 throughput_tc(void)
5615 {
5616 	return run_test_case(throughput_test);
5617 }
5618 
5619 static int
5620 offload_cost_tc(void)
5621 {
5622 	return run_test_case(offload_cost_test);
5623 }
5624 
5625 static int
5626 offload_latency_empty_q_tc(void)
5627 {
5628 	return run_test_case(offload_latency_empty_q_test);
5629 }
5630 
5631 static int
5632 latency_tc(void)
5633 {
5634 	return run_test_case(latency_test);
5635 }
5636 
5637 static int
5638 validation_tc(void)
5639 {
5640 	return run_test_case(validation_test);
5641 }
5642 
5643 static int
5644 interrupt_tc(void)
5645 {
5646 	return run_test_case(throughput_test);
5647 }
5648 
5649 static struct unit_test_suite bbdev_bler_testsuite = {
5650 	.suite_name = "BBdev BLER Tests",
5651 	.setup = testsuite_setup,
5652 	.teardown = testsuite_teardown,
5653 	.unit_test_cases = {
5654 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5655 		TEST_CASES_END() /**< NULL terminate unit test array */
5656 	}
5657 };
5658 
5659 static struct unit_test_suite bbdev_throughput_testsuite = {
5660 	.suite_name = "BBdev Throughput Tests",
5661 	.setup = testsuite_setup,
5662 	.teardown = testsuite_teardown,
5663 	.unit_test_cases = {
5664 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5665 		TEST_CASES_END() /**< NULL terminate unit test array */
5666 	}
5667 };
5668 
5669 static struct unit_test_suite bbdev_validation_testsuite = {
5670 	.suite_name = "BBdev Validation Tests",
5671 	.setup = testsuite_setup,
5672 	.teardown = testsuite_teardown,
5673 	.unit_test_cases = {
5674 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5675 		TEST_CASES_END() /**< NULL terminate unit test array */
5676 	}
5677 };
5678 
5679 static struct unit_test_suite bbdev_latency_testsuite = {
5680 	.suite_name = "BBdev Latency Tests",
5681 	.setup = testsuite_setup,
5682 	.teardown = testsuite_teardown,
5683 	.unit_test_cases = {
5684 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5685 		TEST_CASES_END() /**< NULL terminate unit test array */
5686 	}
5687 };
5688 
5689 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5690 	.suite_name = "BBdev Offload Cost Tests",
5691 	.setup = testsuite_setup,
5692 	.teardown = testsuite_teardown,
5693 	.unit_test_cases = {
5694 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5695 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5696 		TEST_CASES_END() /**< NULL terminate unit test array */
5697 	}
5698 };
5699 
5700 static struct unit_test_suite bbdev_interrupt_testsuite = {
5701 	.suite_name = "BBdev Interrupt Tests",
5702 	.setup = interrupt_testsuite_setup,
5703 	.teardown = testsuite_teardown,
5704 	.unit_test_cases = {
5705 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5706 		TEST_CASES_END() /**< NULL terminate unit test array */
5707 	}
5708 };
5709 
5710 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5711 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5712 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5713 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5714 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5715 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5716