xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 8f1d23ece06adff5eae9f1b4365bdbbd3abee2b2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <inttypes.h>
8 #include <math.h>
9 
10 #include <rte_eal.h>
11 #include <rte_common.h>
12 #include <rte_dev.h>
13 #include <rte_launch.h>
14 #include <rte_bbdev.h>
15 #include <rte_cycles.h>
16 #include <rte_lcore.h>
17 #include <rte_malloc.h>
18 #include <rte_random.h>
19 #include <rte_hexdump.h>
20 #include <rte_interrupts.h>
21 
22 #include "main.h"
23 #include "test_bbdev_vector.h"
24 
25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
26 
27 #define MAX_QUEUES RTE_MAX_LCORE
28 #define TEST_REPETITIONS 100
29 #define WAIT_OFFLOAD_US 1000
30 
31 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
32 #include <fpga_lte_fec.h>
33 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
34 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
35 #define VF_UL_4G_QUEUE_VALUE 4
36 #define VF_DL_4G_QUEUE_VALUE 4
37 #define UL_4G_BANDWIDTH 3
38 #define DL_4G_BANDWIDTH 3
39 #define UL_4G_LOAD_BALANCE 128
40 #define DL_4G_LOAD_BALANCE 128
41 #define FLR_4G_TIMEOUT 610
42 #endif
43 
44 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
45 #include <rte_pmd_fpga_5gnr_fec.h>
46 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
47 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
48 #define VF_UL_5G_QUEUE_VALUE 4
49 #define VF_DL_5G_QUEUE_VALUE 4
50 #define UL_5G_BANDWIDTH 3
51 #define DL_5G_BANDWIDTH 3
52 #define UL_5G_LOAD_BALANCE 128
53 #define DL_5G_LOAD_BALANCE 128
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67 
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70 
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74 
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89 
90 static struct test_bbdev_vector test_vector;
91 
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94 
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100 
101 /* Represents tested active devices */
102 static struct active_device {
103 	const char *driver_name;
104 	uint8_t dev_id;
105 	uint16_t supported_ops;
106 	uint16_t queue_ids[MAX_QUEUES];
107 	uint16_t nb_queues;
108 	struct rte_mempool *ops_mempool;
109 	struct rte_mempool *in_mbuf_pool;
110 	struct rte_mempool *hard_out_mbuf_pool;
111 	struct rte_mempool *soft_out_mbuf_pool;
112 	struct rte_mempool *harq_in_mbuf_pool;
113 	struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115 
116 static uint8_t nb_active_devs;
117 
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120 	struct rte_bbdev_op_data *inputs;
121 	struct rte_bbdev_op_data *hard_outputs;
122 	struct rte_bbdev_op_data *soft_outputs;
123 	struct rte_bbdev_op_data *harq_inputs;
124 	struct rte_bbdev_op_data *harq_outputs;
125 };
126 
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129 	struct rte_mempool *mp;
130 	struct rte_bbdev_dec_op *ref_dec_op;
131 	struct rte_bbdev_enc_op *ref_enc_op;
132 	uint16_t burst_sz;
133 	uint16_t num_to_process;
134 	uint16_t num_lcores;
135 	int vector_mask;
136 	uint16_t sync;
137 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139 
140 /* Contains per lcore params */
141 struct thread_params {
142 	uint8_t dev_id;
143 	uint16_t queue_id;
144 	uint32_t lcore_id;
145 	uint64_t start_time;
146 	double ops_per_sec;
147 	double mbps;
148 	uint8_t iter_count;
149 	double iter_average;
150 	double bler;
151 	uint16_t nb_dequeued;
152 	int16_t processing_status;
153 	uint16_t burst_sz;
154 	struct test_op_params *op_params;
155 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158 
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162 	/* Stores software enqueue total working time */
163 	uint64_t enq_sw_total_time;
164 	/* Stores minimum value of software enqueue working time */
165 	uint64_t enq_sw_min_time;
166 	/* Stores maximum value of software enqueue working time */
167 	uint64_t enq_sw_max_time;
168 	/* Stores turbo enqueue total working time */
169 	uint64_t enq_acc_total_time;
170 	/* Stores minimum value of accelerator enqueue working time */
171 	uint64_t enq_acc_min_time;
172 	/* Stores maximum value of accelerator enqueue working time */
173 	uint64_t enq_acc_max_time;
174 	/* Stores dequeue total working time */
175 	uint64_t deq_total_time;
176 	/* Stores minimum value of dequeue working time */
177 	uint64_t deq_min_time;
178 	/* Stores maximum value of dequeue working time */
179 	uint64_t deq_max_time;
180 };
181 #endif
182 
183 typedef int (test_case_function)(struct active_device *ad,
184 		struct test_op_params *op_params);
185 
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189 	m->pkt_len = 0;
190 
191 	do {
192 		m->data_len = 0;
193 		m = m->next;
194 	} while (m != NULL);
195 }
196 
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201 	return bitmap & bitmask;
202 }
203 
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207 	ad->supported_ops |= (1 << op_type);
208 }
209 
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213 	return ad->supported_ops & (1 << op_type);
214 }
215 
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219 	return (flags_req & flags_present) == flags_req;
220 }
221 
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229 
230 /* This API is to convert all the test vector op data entries
231  * to big endian format. It is used when the device supports
232  * the input in the big endian format.
233  */
234 static inline void
235 convert_op_data_to_be(void)
236 {
237 	struct op_data_entries *op;
238 	enum op_data_type type;
239 	uint8_t nb_segs, *rem_data, temp;
240 	uint32_t *data, len;
241 	int complete, rem, i, j;
242 
243 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
244 		nb_segs = test_vector.entries[type].nb_segments;
245 		op = &test_vector.entries[type];
246 
247 		/* Invert byte endianness for all the segments */
248 		for (i = 0; i < nb_segs; ++i) {
249 			len = op->segments[i].length;
250 			data = op->segments[i].addr;
251 
252 			/* Swap complete u32 bytes */
253 			complete = len / 4;
254 			for (j = 0; j < complete; j++)
255 				data[j] = rte_bswap32(data[j]);
256 
257 			/* Swap any remaining bytes */
258 			rem = len % 4;
259 			rem_data = (uint8_t *)&data[j];
260 			for (j = 0; j < rem/2; j++) {
261 				temp = rem_data[j];
262 				rem_data[j] = rem_data[rem - j - 1];
263 				rem_data[rem - j - 1] = temp;
264 			}
265 		}
266 	}
267 }
268 
269 static int
270 check_dev_cap(const struct rte_bbdev_info *dev_info)
271 {
272 	unsigned int i;
273 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
274 		nb_harq_inputs, nb_harq_outputs;
275 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
276 	uint8_t dev_data_endianness = dev_info->drv.data_endianness;
277 
278 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
279 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
280 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
281 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
282 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
283 
284 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
285 		if (op_cap->type != test_vector.op_type)
286 			continue;
287 
288 		if (dev_data_endianness == RTE_BIG_ENDIAN)
289 			convert_op_data_to_be();
290 
291 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
292 			const struct rte_bbdev_op_cap_turbo_dec *cap =
293 					&op_cap->cap.turbo_dec;
294 			/* Ignore lack of soft output capability, just skip
295 			 * checking if soft output is valid.
296 			 */
297 			if ((test_vector.turbo_dec.op_flags &
298 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
299 					!(cap->capability_flags &
300 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
301 				printf(
302 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
303 					dev_info->dev_name);
304 				clear_soft_out_cap(
305 					&test_vector.turbo_dec.op_flags);
306 			}
307 
308 			if (!flags_match(test_vector.turbo_dec.op_flags,
309 					cap->capability_flags))
310 				return TEST_FAILED;
311 			if (nb_inputs > cap->num_buffers_src) {
312 				printf("Too many inputs defined: %u, max: %u\n",
313 					nb_inputs, cap->num_buffers_src);
314 				return TEST_FAILED;
315 			}
316 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
317 					(test_vector.turbo_dec.op_flags &
318 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319 				printf(
320 					"Too many soft outputs defined: %u, max: %u\n",
321 						nb_soft_outputs,
322 						cap->num_buffers_soft_out);
323 				return TEST_FAILED;
324 			}
325 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
326 				printf(
327 					"Too many hard outputs defined: %u, max: %u\n",
328 						nb_hard_outputs,
329 						cap->num_buffers_hard_out);
330 				return TEST_FAILED;
331 			}
332 			if (intr_enabled && !(cap->capability_flags &
333 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
334 				printf(
335 					"Dequeue interrupts are not supported!\n");
336 				return TEST_FAILED;
337 			}
338 
339 			return TEST_SUCCESS;
340 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
341 			const struct rte_bbdev_op_cap_turbo_enc *cap =
342 					&op_cap->cap.turbo_enc;
343 
344 			if (!flags_match(test_vector.turbo_enc.op_flags,
345 					cap->capability_flags))
346 				return TEST_FAILED;
347 			if (nb_inputs > cap->num_buffers_src) {
348 				printf("Too many inputs defined: %u, max: %u\n",
349 					nb_inputs, cap->num_buffers_src);
350 				return TEST_FAILED;
351 			}
352 			if (nb_hard_outputs > cap->num_buffers_dst) {
353 				printf(
354 					"Too many hard outputs defined: %u, max: %u\n",
355 					nb_hard_outputs, cap->num_buffers_dst);
356 				return TEST_FAILED;
357 			}
358 			if (intr_enabled && !(cap->capability_flags &
359 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
360 				printf(
361 					"Dequeue interrupts are not supported!\n");
362 				return TEST_FAILED;
363 			}
364 
365 			return TEST_SUCCESS;
366 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
367 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
368 					&op_cap->cap.ldpc_enc;
369 
370 			if (!flags_match(test_vector.ldpc_enc.op_flags,
371 					cap->capability_flags)){
372 				printf("Flag Mismatch\n");
373 				return TEST_FAILED;
374 			}
375 			if (nb_inputs > cap->num_buffers_src) {
376 				printf("Too many inputs defined: %u, max: %u\n",
377 					nb_inputs, cap->num_buffers_src);
378 				return TEST_FAILED;
379 			}
380 			if (nb_hard_outputs > cap->num_buffers_dst) {
381 				printf(
382 					"Too many hard outputs defined: %u, max: %u\n",
383 					nb_hard_outputs, cap->num_buffers_dst);
384 				return TEST_FAILED;
385 			}
386 			if (intr_enabled && !(cap->capability_flags &
387 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
388 				printf(
389 					"Dequeue interrupts are not supported!\n");
390 				return TEST_FAILED;
391 			}
392 
393 			return TEST_SUCCESS;
394 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
395 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
396 					&op_cap->cap.ldpc_dec;
397 
398 			if (!flags_match(test_vector.ldpc_dec.op_flags,
399 					cap->capability_flags)){
400 				printf("Flag Mismatch\n");
401 				return TEST_FAILED;
402 			}
403 			if (nb_inputs > cap->num_buffers_src) {
404 				printf("Too many inputs defined: %u, max: %u\n",
405 					nb_inputs, cap->num_buffers_src);
406 				return TEST_FAILED;
407 			}
408 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
409 				printf(
410 					"Too many hard outputs defined: %u, max: %u\n",
411 					nb_hard_outputs,
412 					cap->num_buffers_hard_out);
413 				return TEST_FAILED;
414 			}
415 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
416 				printf(
417 					"Too many HARQ inputs defined: %u, max: %u\n",
418 					nb_harq_inputs,
419 					cap->num_buffers_hard_out);
420 				return TEST_FAILED;
421 			}
422 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
423 				printf(
424 					"Too many HARQ outputs defined: %u, max: %u\n",
425 					nb_harq_outputs,
426 					cap->num_buffers_hard_out);
427 				return TEST_FAILED;
428 			}
429 			if (intr_enabled && !(cap->capability_flags &
430 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
431 				printf(
432 					"Dequeue interrupts are not supported!\n");
433 				return TEST_FAILED;
434 			}
435 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
436 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
437 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
438 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
439 					))) {
440 				printf("Skip loop-back with interrupt\n");
441 				return TEST_FAILED;
442 			}
443 			return TEST_SUCCESS;
444 		}
445 	}
446 
447 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
448 		return TEST_SUCCESS; /* Special case for NULL device */
449 
450 	return TEST_FAILED;
451 }
452 
453 /* calculates optimal mempool size not smaller than the val */
454 static unsigned int
455 optimal_mempool_size(unsigned int val)
456 {
457 	return rte_align32pow2(val + 1) - 1;
458 }
459 
460 /* allocates mbuf mempool for inputs and outputs */
461 static struct rte_mempool *
462 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
463 		int socket_id, unsigned int mbuf_pool_size,
464 		const char *op_type_str)
465 {
466 	unsigned int i;
467 	uint32_t max_seg_sz = 0;
468 	char pool_name[RTE_MEMPOOL_NAMESIZE];
469 
470 	/* find max input segment size */
471 	for (i = 0; i < entries->nb_segments; ++i)
472 		if (entries->segments[i].length > max_seg_sz)
473 			max_seg_sz = entries->segments[i].length;
474 
475 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476 			dev_id);
477 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
478 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
479 					+ FILLER_HEADROOM,
480 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
481 }
482 
483 static int
484 create_mempools(struct active_device *ad, int socket_id,
485 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
486 {
487 	struct rte_mempool *mp;
488 	unsigned int ops_pool_size, mbuf_pool_size = 0;
489 	char pool_name[RTE_MEMPOOL_NAMESIZE];
490 	const char *op_type_str;
491 	enum rte_bbdev_op_type op_type = org_op_type;
492 
493 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
494 	struct op_data_entries *hard_out =
495 			&test_vector.entries[DATA_HARD_OUTPUT];
496 	struct op_data_entries *soft_out =
497 			&test_vector.entries[DATA_SOFT_OUTPUT];
498 	struct op_data_entries *harq_in =
499 			&test_vector.entries[DATA_HARQ_INPUT];
500 	struct op_data_entries *harq_out =
501 			&test_vector.entries[DATA_HARQ_OUTPUT];
502 
503 	/* allocate ops mempool */
504 	ops_pool_size = optimal_mempool_size(RTE_MAX(
505 			/* Ops used plus 1 reference op */
506 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
507 			/* Minimal cache size plus 1 reference op */
508 			(unsigned int)(1.5 * rte_lcore_count() *
509 					OPS_CACHE_SIZE + 1)),
510 			OPS_POOL_SIZE_MIN));
511 
512 	if (org_op_type == RTE_BBDEV_OP_NONE)
513 		op_type = RTE_BBDEV_OP_TURBO_ENC;
514 
515 	op_type_str = rte_bbdev_op_type_str(op_type);
516 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
517 
518 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
519 			ad->dev_id);
520 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
521 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
522 	TEST_ASSERT_NOT_NULL(mp,
523 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
524 			ops_pool_size,
525 			ad->dev_id,
526 			socket_id);
527 	ad->ops_mempool = mp;
528 
529 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
530 	if (org_op_type == RTE_BBDEV_OP_NONE)
531 		return TEST_SUCCESS;
532 
533 	/* Inputs */
534 	if (in->nb_segments > 0) {
535 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
536 				in->nb_segments);
537 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
538 				mbuf_pool_size, "in");
539 		TEST_ASSERT_NOT_NULL(mp,
540 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
541 				mbuf_pool_size,
542 				ad->dev_id,
543 				socket_id);
544 		ad->in_mbuf_pool = mp;
545 	}
546 
547 	/* Hard outputs */
548 	if (hard_out->nb_segments > 0) {
549 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
550 				hard_out->nb_segments);
551 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
552 				mbuf_pool_size,
553 				"hard_out");
554 		TEST_ASSERT_NOT_NULL(mp,
555 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
556 				mbuf_pool_size,
557 				ad->dev_id,
558 				socket_id);
559 		ad->hard_out_mbuf_pool = mp;
560 	}
561 
562 	/* Soft outputs */
563 	if (soft_out->nb_segments > 0) {
564 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
565 				soft_out->nb_segments);
566 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
567 				mbuf_pool_size,
568 				"soft_out");
569 		TEST_ASSERT_NOT_NULL(mp,
570 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
571 				mbuf_pool_size,
572 				ad->dev_id,
573 				socket_id);
574 		ad->soft_out_mbuf_pool = mp;
575 	}
576 
577 	/* HARQ inputs */
578 	if (harq_in->nb_segments > 0) {
579 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
580 				harq_in->nb_segments);
581 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
582 				mbuf_pool_size,
583 				"harq_in");
584 		TEST_ASSERT_NOT_NULL(mp,
585 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
586 				mbuf_pool_size,
587 				ad->dev_id,
588 				socket_id);
589 		ad->harq_in_mbuf_pool = mp;
590 	}
591 
592 	/* HARQ outputs */
593 	if (harq_out->nb_segments > 0) {
594 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
595 				harq_out->nb_segments);
596 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
597 				mbuf_pool_size,
598 				"harq_out");
599 		TEST_ASSERT_NOT_NULL(mp,
600 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
601 				mbuf_pool_size,
602 				ad->dev_id,
603 				socket_id);
604 		ad->harq_out_mbuf_pool = mp;
605 	}
606 
607 	return TEST_SUCCESS;
608 }
609 
610 static int
611 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
612 		struct test_bbdev_vector *vector)
613 {
614 	int ret;
615 	unsigned int queue_id;
616 	struct rte_bbdev_queue_conf qconf;
617 	struct active_device *ad = &active_devs[nb_active_devs];
618 	unsigned int nb_queues;
619 	enum rte_bbdev_op_type op_type = vector->op_type;
620 
621 /* Configure fpga lte fec with PF & VF values
622  * if '-i' flag is set and using fpga device
623  */
624 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
625 	if ((get_init_device() == true) &&
626 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
627 		struct rte_fpga_lte_fec_conf conf;
628 		unsigned int i;
629 
630 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
631 				info->drv.driver_name);
632 
633 		/* clear default configuration before initialization */
634 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
635 
636 		/* Set PF mode :
637 		 * true if PF is used for data plane
638 		 * false for VFs
639 		 */
640 		conf.pf_mode_en = true;
641 
642 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
643 			/* Number of UL queues per VF (fpga supports 8 VFs) */
644 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
645 			/* Number of DL queues per VF (fpga supports 8 VFs) */
646 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
647 		}
648 
649 		/* UL bandwidth. Needed for schedule algorithm */
650 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
651 		/* DL bandwidth */
652 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
653 
654 		/* UL & DL load Balance Factor to 64 */
655 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
656 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
657 
658 		/**< FLR timeout value */
659 		conf.flr_time_out = FLR_4G_TIMEOUT;
660 
661 		/* setup FPGA PF with configuration information */
662 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
663 		TEST_ASSERT_SUCCESS(ret,
664 				"Failed to configure 4G FPGA PF for bbdev %s",
665 				info->dev_name);
666 	}
667 #endif
668 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
669 	if ((get_init_device() == true) &&
670 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
671 		struct rte_fpga_5gnr_fec_conf conf;
672 		unsigned int i;
673 
674 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
675 				info->drv.driver_name);
676 
677 		/* clear default configuration before initialization */
678 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
679 
680 		/* Set PF mode :
681 		 * true if PF is used for data plane
682 		 * false for VFs
683 		 */
684 		conf.pf_mode_en = true;
685 
686 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
687 			/* Number of UL queues per VF (fpga supports 8 VFs) */
688 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
689 			/* Number of DL queues per VF (fpga supports 8 VFs) */
690 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
691 		}
692 
693 		/* UL bandwidth. Needed for schedule algorithm */
694 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
695 		/* DL bandwidth */
696 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
697 
698 		/* UL & DL load Balance Factor to 64 */
699 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
700 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
701 
702 		/* setup FPGA PF with configuration information */
703 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
704 		TEST_ASSERT_SUCCESS(ret,
705 				"Failed to configure 5G FPGA PF for bbdev %s",
706 				info->dev_name);
707 	}
708 #endif
709 #ifdef RTE_BASEBAND_ACC100
710 	if ((get_init_device() == true) &&
711 			(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
712 		struct rte_acc100_conf conf;
713 		unsigned int i;
714 
715 		printf("Configure ACC100/ACC101 FEC Driver %s with default values\n",
716 				info->drv.driver_name);
717 
718 		/* clear default configuration before initialization */
719 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
720 
721 		/* Always set in PF mode for built-in configuration */
722 		conf.pf_mode_en = true;
723 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
724 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
725 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
726 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
727 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
728 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
729 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
730 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
731 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
732 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
733 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
734 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
735 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
736 		}
737 
738 		conf.input_pos_llr_1_bit = true;
739 		conf.output_pos_llr_1_bit = true;
740 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
741 
742 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
743 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
744 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
745 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
746 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
747 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
748 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
749 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
750 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
751 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
752 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
753 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
754 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
755 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
756 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
757 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
758 
759 		/* setup PF with configuration information */
760 		ret = rte_acc10x_configure(info->dev_name, &conf);
761 		TEST_ASSERT_SUCCESS(ret,
762 				"Failed to configure ACC100 PF for bbdev %s",
763 				info->dev_name);
764 	}
765 #endif
766 	/* Let's refresh this now this is configured */
767 	rte_bbdev_info_get(dev_id, info);
768 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
769 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
770 
771 	/* setup device */
772 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
773 	if (ret < 0) {
774 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
775 				dev_id, nb_queues, info->socket_id, ret);
776 		return TEST_FAILED;
777 	}
778 
779 	/* configure interrupts if needed */
780 	if (intr_enabled) {
781 		ret = rte_bbdev_intr_enable(dev_id);
782 		if (ret < 0) {
783 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
784 					ret);
785 			return TEST_FAILED;
786 		}
787 	}
788 
789 	/* setup device queues */
790 	qconf.socket = info->socket_id;
791 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
792 	qconf.priority = 0;
793 	qconf.deferred_start = 0;
794 	qconf.op_type = op_type;
795 
796 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
797 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
798 		if (ret != 0) {
799 			printf(
800 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
801 					queue_id, qconf.priority, dev_id);
802 			qconf.priority++;
803 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
804 					&qconf);
805 		}
806 		if (ret != 0) {
807 			printf("All queues on dev %u allocated: %u\n",
808 					dev_id, queue_id);
809 			break;
810 		}
811 		ad->queue_ids[queue_id] = queue_id;
812 	}
813 	TEST_ASSERT(queue_id != 0,
814 			"ERROR Failed to configure any queues on dev %u",
815 			dev_id);
816 	ad->nb_queues = queue_id;
817 
818 	set_avail_op(ad, op_type);
819 
820 	return TEST_SUCCESS;
821 }
822 
823 static int
824 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
825 		struct test_bbdev_vector *vector)
826 {
827 	int ret;
828 
829 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
830 	active_devs[nb_active_devs].dev_id = dev_id;
831 
832 	ret = add_bbdev_dev(dev_id, info, vector);
833 	if (ret == TEST_SUCCESS)
834 		++nb_active_devs;
835 	return ret;
836 }
837 
838 static uint8_t
839 populate_active_devices(void)
840 {
841 	int ret;
842 	uint8_t dev_id;
843 	uint8_t nb_devs_added = 0;
844 	struct rte_bbdev_info info;
845 
846 	RTE_BBDEV_FOREACH(dev_id) {
847 		rte_bbdev_info_get(dev_id, &info);
848 
849 		if (check_dev_cap(&info)) {
850 			printf(
851 				"Device %d (%s) does not support specified capabilities\n",
852 					dev_id, info.dev_name);
853 			continue;
854 		}
855 
856 		ret = add_active_device(dev_id, &info, &test_vector);
857 		if (ret != 0) {
858 			printf("Adding active bbdev %s skipped\n",
859 					info.dev_name);
860 			continue;
861 		}
862 		nb_devs_added++;
863 	}
864 
865 	return nb_devs_added;
866 }
867 
868 static int
869 read_test_vector(void)
870 {
871 	int ret;
872 
873 	memset(&test_vector, 0, sizeof(test_vector));
874 	printf("Test vector file = %s\n", get_vector_filename());
875 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
876 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
877 			get_vector_filename());
878 
879 	return TEST_SUCCESS;
880 }
881 
882 static int
883 testsuite_setup(void)
884 {
885 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
886 
887 	if (populate_active_devices() == 0) {
888 		printf("No suitable devices found!\n");
889 		return TEST_SKIPPED;
890 	}
891 
892 	return TEST_SUCCESS;
893 }
894 
895 static int
896 interrupt_testsuite_setup(void)
897 {
898 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
899 
900 	/* Enable interrupts */
901 	intr_enabled = true;
902 
903 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
904 	if (populate_active_devices() == 0 ||
905 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
906 		intr_enabled = false;
907 		printf("No suitable devices found!\n");
908 		return TEST_SKIPPED;
909 	}
910 
911 	return TEST_SUCCESS;
912 }
913 
914 static void
915 testsuite_teardown(void)
916 {
917 	uint8_t dev_id;
918 
919 	/* Unconfigure devices */
920 	RTE_BBDEV_FOREACH(dev_id)
921 		rte_bbdev_close(dev_id);
922 
923 	/* Clear active devices structs. */
924 	memset(active_devs, 0, sizeof(active_devs));
925 	nb_active_devs = 0;
926 
927 	/* Disable interrupts */
928 	intr_enabled = false;
929 }
930 
931 static int
932 ut_setup(void)
933 {
934 	uint8_t i, dev_id;
935 
936 	for (i = 0; i < nb_active_devs; i++) {
937 		dev_id = active_devs[i].dev_id;
938 		/* reset bbdev stats */
939 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
940 				"Failed to reset stats of bbdev %u", dev_id);
941 		/* start the device */
942 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
943 				"Failed to start bbdev %u", dev_id);
944 	}
945 
946 	return TEST_SUCCESS;
947 }
948 
949 static void
950 ut_teardown(void)
951 {
952 	uint8_t i, dev_id;
953 	struct rte_bbdev_stats stats;
954 
955 	for (i = 0; i < nb_active_devs; i++) {
956 		dev_id = active_devs[i].dev_id;
957 		/* read stats and print */
958 		rte_bbdev_stats_get(dev_id, &stats);
959 		/* Stop the device */
960 		rte_bbdev_stop(dev_id);
961 	}
962 }
963 
964 static int
965 init_op_data_objs(struct rte_bbdev_op_data *bufs,
966 		struct op_data_entries *ref_entries,
967 		struct rte_mempool *mbuf_pool, const uint16_t n,
968 		enum op_data_type op_type, uint16_t min_alignment)
969 {
970 	int ret;
971 	unsigned int i, j;
972 	bool large_input = false;
973 
974 	for (i = 0; i < n; ++i) {
975 		char *data;
976 		struct op_data_buf *seg = &ref_entries->segments[0];
977 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
978 		TEST_ASSERT_NOT_NULL(m_head,
979 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
980 				op_type, n * ref_entries->nb_segments,
981 				mbuf_pool->size);
982 
983 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
984 			/*
985 			 * Special case when DPDK mbuf cannot handle
986 			 * the required input size
987 			 */
988 			printf("Warning: Larger input size than DPDK mbuf %d\n",
989 					seg->length);
990 			large_input = true;
991 		}
992 		bufs[i].data = m_head;
993 		bufs[i].offset = 0;
994 		bufs[i].length = 0;
995 
996 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
997 			if ((op_type == DATA_INPUT) && large_input) {
998 				/* Allocate a fake overused mbuf */
999 				data = rte_malloc(NULL, seg->length, 0);
1000 				TEST_ASSERT_NOT_NULL(data,
1001 					"rte malloc failed with %u bytes",
1002 					seg->length);
1003 				memcpy(data, seg->addr, seg->length);
1004 				m_head->buf_addr = data;
1005 				m_head->buf_iova = rte_malloc_virt2iova(data);
1006 				m_head->data_off = 0;
1007 				m_head->data_len = seg->length;
1008 			} else {
1009 				data = rte_pktmbuf_append(m_head, seg->length);
1010 				TEST_ASSERT_NOT_NULL(data,
1011 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1012 					seg->length, op_type);
1013 
1014 				TEST_ASSERT(data == RTE_PTR_ALIGN(
1015 						data, min_alignment),
1016 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1017 					data, min_alignment);
1018 				rte_memcpy(data, seg->addr, seg->length);
1019 			}
1020 
1021 			bufs[i].length += seg->length;
1022 
1023 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1024 				struct rte_mbuf *m_tail =
1025 						rte_pktmbuf_alloc(mbuf_pool);
1026 				TEST_ASSERT_NOT_NULL(m_tail,
1027 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1028 						op_type,
1029 						n * ref_entries->nb_segments,
1030 						mbuf_pool->size);
1031 				seg += 1;
1032 
1033 				data = rte_pktmbuf_append(m_tail, seg->length);
1034 				TEST_ASSERT_NOT_NULL(data,
1035 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1036 						seg->length, op_type);
1037 
1038 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1039 						min_alignment),
1040 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1041 						data, min_alignment);
1042 				rte_memcpy(data, seg->addr, seg->length);
1043 				bufs[i].length += seg->length;
1044 
1045 				ret = rte_pktmbuf_chain(m_head, m_tail);
1046 				TEST_ASSERT_SUCCESS(ret,
1047 						"Couldn't chain mbufs from %d data type mbuf pool",
1048 						op_type);
1049 			}
1050 		} else {
1051 
1052 			/* allocate chained-mbuf for output buffer */
1053 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1054 				struct rte_mbuf *m_tail =
1055 						rte_pktmbuf_alloc(mbuf_pool);
1056 				TEST_ASSERT_NOT_NULL(m_tail,
1057 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1058 						op_type,
1059 						n * ref_entries->nb_segments,
1060 						mbuf_pool->size);
1061 
1062 				ret = rte_pktmbuf_chain(m_head, m_tail);
1063 				TEST_ASSERT_SUCCESS(ret,
1064 						"Couldn't chain mbufs from %d data type mbuf pool",
1065 						op_type);
1066 			}
1067 		}
1068 	}
1069 
1070 	return 0;
1071 }
1072 
1073 static int
1074 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1075 		const int socket)
1076 {
1077 	int i;
1078 
1079 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1080 	if (*buffers == NULL) {
1081 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1082 				socket);
1083 		/* try to allocate memory on other detected sockets */
1084 		for (i = 0; i < socket; i++) {
1085 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1086 			if (*buffers != NULL)
1087 				break;
1088 		}
1089 	}
1090 
1091 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1092 }
1093 
1094 static void
1095 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1096 		const uint16_t n, const int8_t max_llr_modulus)
1097 {
1098 	uint16_t i, byte_idx;
1099 
1100 	for (i = 0; i < n; ++i) {
1101 		struct rte_mbuf *m = input_ops[i].data;
1102 		while (m != NULL) {
1103 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1104 					input_ops[i].offset);
1105 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1106 					++byte_idx)
1107 				llr[byte_idx] = round((double)max_llr_modulus *
1108 						llr[byte_idx] / INT8_MAX);
1109 
1110 			m = m->next;
1111 		}
1112 	}
1113 }
1114 
1115 /*
1116  * We may have to insert filler bits
1117  * when they are required by the HARQ assumption
1118  */
1119 static void
1120 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1121 		const uint16_t n, struct test_op_params *op_params)
1122 {
1123 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1124 
1125 	if (input_ops == NULL)
1126 		return;
1127 	/* No need to add filler if not required by device */
1128 	if (!(ldpc_cap_flags &
1129 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1130 		return;
1131 	/* No need to add filler for loopback operation */
1132 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1133 		return;
1134 
1135 	uint16_t i, j, parity_offset;
1136 	for (i = 0; i < n; ++i) {
1137 		struct rte_mbuf *m = input_ops[i].data;
1138 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1139 				input_ops[i].offset);
1140 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1141 				* dec.z_c - dec.n_filler;
1142 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1143 		m->data_len = new_hin_size;
1144 		input_ops[i].length = new_hin_size;
1145 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1146 				j--)
1147 			llr[j] = llr[j - dec.n_filler];
1148 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1149 		for (j = 0; j < dec.n_filler; j++)
1150 			llr[parity_offset + j] = llr_max_pre_scaling;
1151 	}
1152 }
1153 
1154 static void
1155 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1156 		const uint16_t n, const int8_t llr_size,
1157 		const int8_t llr_decimals)
1158 {
1159 	if (input_ops == NULL)
1160 		return;
1161 
1162 	uint16_t i, byte_idx;
1163 
1164 	int16_t llr_max, llr_min, llr_tmp;
1165 	llr_max = (1 << (llr_size - 1)) - 1;
1166 	llr_min = -llr_max;
1167 	for (i = 0; i < n; ++i) {
1168 		struct rte_mbuf *m = input_ops[i].data;
1169 		while (m != NULL) {
1170 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1171 					input_ops[i].offset);
1172 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1173 					++byte_idx) {
1174 
1175 				llr_tmp = llr[byte_idx];
1176 				if (llr_decimals == 4)
1177 					llr_tmp *= 8;
1178 				else if (llr_decimals == 2)
1179 					llr_tmp *= 2;
1180 				else if (llr_decimals == 0)
1181 					llr_tmp /= 2;
1182 				llr_tmp = RTE_MIN(llr_max,
1183 						RTE_MAX(llr_min, llr_tmp));
1184 				llr[byte_idx] = (int8_t) llr_tmp;
1185 			}
1186 
1187 			m = m->next;
1188 		}
1189 	}
1190 }
1191 
1192 
1193 
1194 static int
1195 fill_queue_buffers(struct test_op_params *op_params,
1196 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1197 		struct rte_mempool *soft_out_mp,
1198 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1199 		uint16_t queue_id,
1200 		const struct rte_bbdev_op_cap *capabilities,
1201 		uint16_t min_alignment, const int socket_id)
1202 {
1203 	int ret;
1204 	enum op_data_type type;
1205 	const uint16_t n = op_params->num_to_process;
1206 
1207 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1208 		in_mp,
1209 		soft_out_mp,
1210 		hard_out_mp,
1211 		harq_in_mp,
1212 		harq_out_mp,
1213 	};
1214 
1215 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1216 		&op_params->q_bufs[socket_id][queue_id].inputs,
1217 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1218 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1219 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1220 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1221 	};
1222 
1223 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1224 		struct op_data_entries *ref_entries =
1225 				&test_vector.entries[type];
1226 		if (ref_entries->nb_segments == 0)
1227 			continue;
1228 
1229 		ret = allocate_buffers_on_socket(queue_ops[type],
1230 				n * sizeof(struct rte_bbdev_op_data),
1231 				socket_id);
1232 		TEST_ASSERT_SUCCESS(ret,
1233 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1234 
1235 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1236 				mbuf_pools[type], n, type, min_alignment);
1237 		TEST_ASSERT_SUCCESS(ret,
1238 				"Couldn't init rte_bbdev_op_data structs");
1239 	}
1240 
1241 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1242 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1243 			capabilities->cap.turbo_dec.max_llr_modulus);
1244 
1245 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1246 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1247 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1248 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1249 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1250 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1251 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1252 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1253 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1254 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1255 		if (!loopback && !llr_comp)
1256 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1257 					ldpc_llr_size, ldpc_llr_decimals);
1258 		if (!loopback && !harq_comp)
1259 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1260 					ldpc_llr_size, ldpc_llr_decimals);
1261 		if (!loopback)
1262 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1263 					op_params);
1264 	}
1265 
1266 	return 0;
1267 }
1268 
1269 static void
1270 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1271 {
1272 	unsigned int i, j;
1273 
1274 	rte_mempool_free(ad->ops_mempool);
1275 	rte_mempool_free(ad->in_mbuf_pool);
1276 	rte_mempool_free(ad->hard_out_mbuf_pool);
1277 	rte_mempool_free(ad->soft_out_mbuf_pool);
1278 	rte_mempool_free(ad->harq_in_mbuf_pool);
1279 	rte_mempool_free(ad->harq_out_mbuf_pool);
1280 
1281 	for (i = 0; i < rte_lcore_count(); ++i) {
1282 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1283 			rte_free(op_params->q_bufs[j][i].inputs);
1284 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1285 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1286 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1287 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1288 		}
1289 	}
1290 }
1291 
1292 static void
1293 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1294 		unsigned int start_idx,
1295 		struct rte_bbdev_op_data *inputs,
1296 		struct rte_bbdev_op_data *hard_outputs,
1297 		struct rte_bbdev_op_data *soft_outputs,
1298 		struct rte_bbdev_dec_op *ref_op)
1299 {
1300 	unsigned int i;
1301 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1302 
1303 	for (i = 0; i < n; ++i) {
1304 		if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1305 			ops[i]->turbo_dec.tb_params.ea =
1306 					turbo_dec->tb_params.ea;
1307 			ops[i]->turbo_dec.tb_params.eb =
1308 					turbo_dec->tb_params.eb;
1309 			ops[i]->turbo_dec.tb_params.k_pos =
1310 					turbo_dec->tb_params.k_pos;
1311 			ops[i]->turbo_dec.tb_params.k_neg =
1312 					turbo_dec->tb_params.k_neg;
1313 			ops[i]->turbo_dec.tb_params.c =
1314 					turbo_dec->tb_params.c;
1315 			ops[i]->turbo_dec.tb_params.c_neg =
1316 					turbo_dec->tb_params.c_neg;
1317 			ops[i]->turbo_dec.tb_params.cab =
1318 					turbo_dec->tb_params.cab;
1319 			ops[i]->turbo_dec.tb_params.r =
1320 					turbo_dec->tb_params.r;
1321 		} else {
1322 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1323 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1324 		}
1325 
1326 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1327 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1328 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1329 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1330 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1331 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1332 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1333 
1334 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1335 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1336 		if (soft_outputs != NULL)
1337 			ops[i]->turbo_dec.soft_output =
1338 				soft_outputs[start_idx + i];
1339 	}
1340 }
1341 
1342 static void
1343 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1344 		unsigned int start_idx,
1345 		struct rte_bbdev_op_data *inputs,
1346 		struct rte_bbdev_op_data *outputs,
1347 		struct rte_bbdev_enc_op *ref_op)
1348 {
1349 	unsigned int i;
1350 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1351 	for (i = 0; i < n; ++i) {
1352 		if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1353 			ops[i]->turbo_enc.tb_params.ea =
1354 					turbo_enc->tb_params.ea;
1355 			ops[i]->turbo_enc.tb_params.eb =
1356 					turbo_enc->tb_params.eb;
1357 			ops[i]->turbo_enc.tb_params.k_pos =
1358 					turbo_enc->tb_params.k_pos;
1359 			ops[i]->turbo_enc.tb_params.k_neg =
1360 					turbo_enc->tb_params.k_neg;
1361 			ops[i]->turbo_enc.tb_params.c =
1362 					turbo_enc->tb_params.c;
1363 			ops[i]->turbo_enc.tb_params.c_neg =
1364 					turbo_enc->tb_params.c_neg;
1365 			ops[i]->turbo_enc.tb_params.cab =
1366 					turbo_enc->tb_params.cab;
1367 			ops[i]->turbo_enc.tb_params.ncb_pos =
1368 					turbo_enc->tb_params.ncb_pos;
1369 			ops[i]->turbo_enc.tb_params.ncb_neg =
1370 					turbo_enc->tb_params.ncb_neg;
1371 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1372 		} else {
1373 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1374 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1375 			ops[i]->turbo_enc.cb_params.ncb =
1376 					turbo_enc->cb_params.ncb;
1377 		}
1378 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1379 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1380 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1381 
1382 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1383 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1384 	}
1385 }
1386 
1387 
1388 /* Returns a random number drawn from a normal distribution
1389  * with mean of 0 and variance of 1
1390  * Marsaglia algorithm
1391  */
1392 static double
1393 randn(int n)
1394 {
1395 	double S, Z, U1, U2, u, v, fac;
1396 
1397 	do {
1398 		U1 = (double)rand() / RAND_MAX;
1399 		U2 = (double)rand() / RAND_MAX;
1400 		u = 2. * U1 - 1.;
1401 		v = 2. * U2 - 1.;
1402 		S = u * u + v * v;
1403 	} while (S >= 1 || S == 0);
1404 	fac = sqrt(-2. * log(S) / S);
1405 	Z = (n % 2) ? u * fac : v * fac;
1406 	return Z;
1407 }
1408 
1409 static inline double
1410 maxstar(double A, double B)
1411 {
1412 	if (fabs(A - B) > 5)
1413 		return RTE_MAX(A, B);
1414 	else
1415 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1416 }
1417 
1418 /*
1419  * Generate Qm LLRS for Qm==8
1420  * Modulation, AWGN and LLR estimation from max log development
1421  */
1422 static void
1423 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1424 {
1425 	int qm = 8;
1426 	int qam = 256;
1427 	int m, k;
1428 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1429 	/* 5.1.4 of TS38.211 */
1430 	const double symbols_I[256] = {
1431 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1432 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1433 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1434 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1435 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1436 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1437 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1438 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1439 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1440 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1441 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1442 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1443 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1444 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1445 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1446 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1447 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1448 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1449 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1450 			-13, -13, -15, -15, -13, -13, -15, -15};
1451 	const double symbols_Q[256] = {
1452 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1453 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1454 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1455 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1456 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1457 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1458 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1459 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1460 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1461 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1462 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1463 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1464 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1465 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1466 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1467 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1468 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1469 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1470 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1471 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1472 	/* Average constellation point energy */
1473 	N0 *= 170.0;
1474 	for (k = 0; k < qm; k++)
1475 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1476 	/* 5.1.4 of TS38.211 */
1477 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1478 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1479 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1480 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1481 	/* AWGN channel */
1482 	I += sqrt(N0 / 2) * randn(0);
1483 	Q += sqrt(N0 / 2) * randn(1);
1484 	/*
1485 	 * Calculate the log of the probability that each of
1486 	 * the constellation points was transmitted
1487 	 */
1488 	for (m = 0; m < qam; m++)
1489 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1490 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1491 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1492 	for (k = 0; k < qm; k++) {
1493 		p0 = -999999;
1494 		p1 = -999999;
1495 		/* For each constellation point */
1496 		for (m = 0; m < qam; m++) {
1497 			if ((m >> (qm - k - 1)) & 1)
1498 				p1 = maxstar(p1, log_syml_prob[m]);
1499 			else
1500 				p0 = maxstar(p0, log_syml_prob[m]);
1501 		}
1502 		/* Calculate the LLR */
1503 		llr_ = p0 - p1;
1504 		llr_ *= (1 << ldpc_llr_decimals);
1505 		llr_ = round(llr_);
1506 		if (llr_ > llr_max)
1507 			llr_ = llr_max;
1508 		if (llr_ < -llr_max)
1509 			llr_ = -llr_max;
1510 		llrs[qm * i + k] = (int8_t) llr_;
1511 	}
1512 }
1513 
1514 
1515 /*
1516  * Generate Qm LLRS for Qm==6
1517  * Modulation, AWGN and LLR estimation from max log development
1518  */
1519 static void
1520 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1521 {
1522 	int qm = 6;
1523 	int qam = 64;
1524 	int m, k;
1525 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1526 	/* 5.1.4 of TS38.211 */
1527 	const double symbols_I[64] = {
1528 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1529 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1530 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1531 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1532 			-5, -5, -7, -7, -5, -5, -7, -7};
1533 	const double symbols_Q[64] = {
1534 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1535 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1536 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1537 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1538 			-3, -1, -3, -1, -5, -7, -5, -7};
1539 	/* Average constellation point energy */
1540 	N0 *= 42.0;
1541 	for (k = 0; k < qm; k++)
1542 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1543 	/* 5.1.4 of TS38.211 */
1544 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1545 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1546 	/* AWGN channel */
1547 	I += sqrt(N0 / 2) * randn(0);
1548 	Q += sqrt(N0 / 2) * randn(1);
1549 	/*
1550 	 * Calculate the log of the probability that each of
1551 	 * the constellation points was transmitted
1552 	 */
1553 	for (m = 0; m < qam; m++)
1554 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1555 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1556 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1557 	for (k = 0; k < qm; k++) {
1558 		p0 = -999999;
1559 		p1 = -999999;
1560 		/* For each constellation point */
1561 		for (m = 0; m < qam; m++) {
1562 			if ((m >> (qm - k - 1)) & 1)
1563 				p1 = maxstar(p1, log_syml_prob[m]);
1564 			else
1565 				p0 = maxstar(p0, log_syml_prob[m]);
1566 		}
1567 		/* Calculate the LLR */
1568 		llr_ = p0 - p1;
1569 		llr_ *= (1 << ldpc_llr_decimals);
1570 		llr_ = round(llr_);
1571 		if (llr_ > llr_max)
1572 			llr_ = llr_max;
1573 		if (llr_ < -llr_max)
1574 			llr_ = -llr_max;
1575 		llrs[qm * i + k] = (int8_t) llr_;
1576 	}
1577 }
1578 
1579 /*
1580  * Generate Qm LLRS for Qm==4
1581  * Modulation, AWGN and LLR estimation from max log development
1582  */
1583 static void
1584 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1585 {
1586 	int qm = 4;
1587 	int qam = 16;
1588 	int m, k;
1589 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1590 	/* 5.1.4 of TS38.211 */
1591 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1592 			-1, -1, -3, -3, -1, -1, -3, -3};
1593 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1594 			1, 3, 1, 3, -1, -3, -1, -3};
1595 	/* Average constellation point energy */
1596 	N0 *= 10.0;
1597 	for (k = 0; k < qm; k++)
1598 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1599 	/* 5.1.4 of TS38.211 */
1600 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1601 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1602 	/* AWGN channel */
1603 	I += sqrt(N0 / 2) * randn(0);
1604 	Q += sqrt(N0 / 2) * randn(1);
1605 	/*
1606 	 * Calculate the log of the probability that each of
1607 	 * the constellation points was transmitted
1608 	 */
1609 	for (m = 0; m < qam; m++)
1610 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1611 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1612 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1613 	for (k = 0; k < qm; k++) {
1614 		p0 = -999999;
1615 		p1 = -999999;
1616 		/* For each constellation point */
1617 		for (m = 0; m < qam; m++) {
1618 			if ((m >> (qm - k - 1)) & 1)
1619 				p1 = maxstar(p1, log_syml_prob[m]);
1620 			else
1621 				p0 = maxstar(p0, log_syml_prob[m]);
1622 		}
1623 		/* Calculate the LLR */
1624 		llr_ = p0 - p1;
1625 		llr_ *= (1 << ldpc_llr_decimals);
1626 		llr_ = round(llr_);
1627 		if (llr_ > llr_max)
1628 			llr_ = llr_max;
1629 		if (llr_ < -llr_max)
1630 			llr_ = -llr_max;
1631 		llrs[qm * i + k] = (int8_t) llr_;
1632 	}
1633 }
1634 
1635 static void
1636 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1637 {
1638 	double b, b1, n;
1639 	double coeff = 2.0 * sqrt(N0);
1640 
1641 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1642 	if (llrs[j] < 8 && llrs[j] > -8)
1643 		return;
1644 
1645 	/* Note don't change sign here */
1646 	n = randn(j % 2);
1647 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1648 			+ coeff * n) / N0;
1649 	b = b1 * (1 << ldpc_llr_decimals);
1650 	b = round(b);
1651 	if (b > llr_max)
1652 		b = llr_max;
1653 	if (b < -llr_max)
1654 		b = -llr_max;
1655 	llrs[j] = (int8_t) b;
1656 }
1657 
1658 /* Generate LLR for a given SNR */
1659 static void
1660 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1661 		struct rte_bbdev_dec_op *ref_op)
1662 {
1663 	struct rte_mbuf *m;
1664 	uint16_t qm;
1665 	uint32_t i, j, e, range;
1666 	double N0, llr_max;
1667 
1668 	e = ref_op->ldpc_dec.cb_params.e;
1669 	qm = ref_op->ldpc_dec.q_m;
1670 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1671 	range = e / qm;
1672 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1673 
1674 	for (i = 0; i < n; ++i) {
1675 		m = inputs[i].data;
1676 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1677 		if (qm == 8) {
1678 			for (j = 0; j < range; ++j)
1679 				gen_qm8_llr(llrs, j, N0, llr_max);
1680 		} else if (qm == 6) {
1681 			for (j = 0; j < range; ++j)
1682 				gen_qm6_llr(llrs, j, N0, llr_max);
1683 		} else if (qm == 4) {
1684 			for (j = 0; j < range; ++j)
1685 				gen_qm4_llr(llrs, j, N0, llr_max);
1686 		} else {
1687 			for (j = 0; j < e; ++j)
1688 				gen_qm2_llr(llrs, j, N0, llr_max);
1689 		}
1690 	}
1691 }
1692 
1693 static void
1694 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1695 		unsigned int start_idx,
1696 		struct rte_bbdev_op_data *inputs,
1697 		struct rte_bbdev_op_data *hard_outputs,
1698 		struct rte_bbdev_op_data *soft_outputs,
1699 		struct rte_bbdev_op_data *harq_inputs,
1700 		struct rte_bbdev_op_data *harq_outputs,
1701 		struct rte_bbdev_dec_op *ref_op)
1702 {
1703 	unsigned int i;
1704 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1705 
1706 	for (i = 0; i < n; ++i) {
1707 		if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1708 			ops[i]->ldpc_dec.tb_params.ea =
1709 					ldpc_dec->tb_params.ea;
1710 			ops[i]->ldpc_dec.tb_params.eb =
1711 					ldpc_dec->tb_params.eb;
1712 			ops[i]->ldpc_dec.tb_params.c =
1713 					ldpc_dec->tb_params.c;
1714 			ops[i]->ldpc_dec.tb_params.cab =
1715 					ldpc_dec->tb_params.cab;
1716 			ops[i]->ldpc_dec.tb_params.r =
1717 					ldpc_dec->tb_params.r;
1718 		} else {
1719 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1720 		}
1721 
1722 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1723 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1724 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1725 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1726 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1727 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1728 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1729 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1730 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1731 
1732 		if (hard_outputs != NULL)
1733 			ops[i]->ldpc_dec.hard_output =
1734 					hard_outputs[start_idx + i];
1735 		if (inputs != NULL)
1736 			ops[i]->ldpc_dec.input =
1737 					inputs[start_idx + i];
1738 		if (soft_outputs != NULL)
1739 			ops[i]->ldpc_dec.soft_output =
1740 					soft_outputs[start_idx + i];
1741 		if (harq_inputs != NULL)
1742 			ops[i]->ldpc_dec.harq_combined_input =
1743 					harq_inputs[start_idx + i];
1744 		if (harq_outputs != NULL)
1745 			ops[i]->ldpc_dec.harq_combined_output =
1746 					harq_outputs[start_idx + i];
1747 	}
1748 }
1749 
1750 
1751 static void
1752 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1753 		unsigned int start_idx,
1754 		struct rte_bbdev_op_data *inputs,
1755 		struct rte_bbdev_op_data *outputs,
1756 		struct rte_bbdev_enc_op *ref_op)
1757 {
1758 	unsigned int i;
1759 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1760 	for (i = 0; i < n; ++i) {
1761 		if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1762 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1763 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1764 			ops[i]->ldpc_enc.tb_params.cab =
1765 					ldpc_enc->tb_params.cab;
1766 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1767 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1768 		} else {
1769 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1770 		}
1771 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1772 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1773 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1774 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1775 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1776 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1777 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1778 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1779 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1780 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1781 	}
1782 }
1783 
1784 static int
1785 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1786 		unsigned int order_idx, const int expected_status)
1787 {
1788 	int status = op->status;
1789 	/* ignore parity mismatch false alarms for long iterations */
1790 	if (get_iter_max() >= 10) {
1791 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1792 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1793 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1794 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1795 		}
1796 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1797 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1798 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1799 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1800 		}
1801 	}
1802 
1803 	TEST_ASSERT(status == expected_status,
1804 			"op_status (%d) != expected_status (%d)",
1805 			op->status, expected_status);
1806 
1807 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1808 			"Ordering error, expected %p, got %p",
1809 			(void *)(uintptr_t)order_idx, op->opaque_data);
1810 
1811 	return TEST_SUCCESS;
1812 }
1813 
1814 static int
1815 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1816 		unsigned int order_idx, const int expected_status)
1817 {
1818 	TEST_ASSERT(op->status == expected_status,
1819 			"op_status (%d) != expected_status (%d)",
1820 			op->status, expected_status);
1821 
1822 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1823 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1824 				"Ordering error, expected %p, got %p",
1825 				(void *)(uintptr_t)order_idx, op->opaque_data);
1826 
1827 	return TEST_SUCCESS;
1828 }
1829 
1830 static inline int
1831 validate_op_chain(struct rte_bbdev_op_data *op,
1832 		struct op_data_entries *orig_op)
1833 {
1834 	uint8_t i;
1835 	struct rte_mbuf *m = op->data;
1836 	uint8_t nb_dst_segments = orig_op->nb_segments;
1837 	uint32_t total_data_size = 0;
1838 
1839 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1840 			"Number of segments differ in original (%u) and filled (%u) op",
1841 			nb_dst_segments, m->nb_segs);
1842 
1843 	/* Validate each mbuf segment length */
1844 	for (i = 0; i < nb_dst_segments; ++i) {
1845 		/* Apply offset to the first mbuf segment */
1846 		uint16_t offset = (i == 0) ? op->offset : 0;
1847 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1848 		total_data_size += orig_op->segments[i].length;
1849 
1850 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1851 				"Length of segment differ in original (%u) and filled (%u) op",
1852 				orig_op->segments[i].length, data_len);
1853 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1854 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1855 				data_len,
1856 				"Output buffers (CB=%u) are not equal", i);
1857 		m = m->next;
1858 	}
1859 
1860 	/* Validate total mbuf pkt length */
1861 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1862 	TEST_ASSERT(total_data_size == pkt_len,
1863 			"Length of data differ in original (%u) and filled (%u) op",
1864 			total_data_size, pkt_len);
1865 
1866 	return TEST_SUCCESS;
1867 }
1868 
1869 /*
1870  * Compute K0 for a given configuration for HARQ output length computation
1871  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1872  */
1873 static inline uint16_t
1874 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1875 {
1876 	if (rv_index == 0)
1877 		return 0;
1878 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1879 	if (n_cb == n) {
1880 		if (rv_index == 1)
1881 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1882 		else if (rv_index == 2)
1883 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1884 		else
1885 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1886 	}
1887 	/* LBRM case - includes a division by N */
1888 	if (rv_index == 1)
1889 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1890 				/ n) * z_c;
1891 	else if (rv_index == 2)
1892 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1893 				/ n) * z_c;
1894 	else
1895 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1896 				/ n) * z_c;
1897 }
1898 
1899 /* HARQ output length including the Filler bits */
1900 static inline uint16_t
1901 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1902 {
1903 	uint16_t k0 = 0;
1904 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1905 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1906 	/* Compute RM out size and number of rows */
1907 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1908 			* ops_ld->z_c - ops_ld->n_filler;
1909 	uint16_t deRmOutSize = RTE_MIN(
1910 			k0 + ops_ld->cb_params.e +
1911 			((k0 > parity_offset) ?
1912 					0 : ops_ld->n_filler),
1913 					ops_ld->n_cb);
1914 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1915 			/ ops_ld->z_c);
1916 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1917 	return harq_output_len;
1918 }
1919 
1920 static inline int
1921 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1922 		struct op_data_entries *orig_op,
1923 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1924 {
1925 	uint8_t i;
1926 	uint32_t j, jj, k;
1927 	struct rte_mbuf *m = op->data;
1928 	uint8_t nb_dst_segments = orig_op->nb_segments;
1929 	uint32_t total_data_size = 0;
1930 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1931 	uint32_t byte_error = 0, cum_error = 0, error;
1932 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1933 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1934 	uint16_t parity_offset;
1935 
1936 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1937 			"Number of segments differ in original (%u) and filled (%u) op",
1938 			nb_dst_segments, m->nb_segs);
1939 
1940 	/* Validate each mbuf segment length */
1941 	for (i = 0; i < nb_dst_segments; ++i) {
1942 		/* Apply offset to the first mbuf segment */
1943 		uint16_t offset = (i == 0) ? op->offset : 0;
1944 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1945 		total_data_size += orig_op->segments[i].length;
1946 
1947 		TEST_ASSERT(orig_op->segments[i].length <
1948 				(uint32_t)(data_len + 64),
1949 				"Length of segment differ in original (%u) and filled (%u) op",
1950 				orig_op->segments[i].length, data_len);
1951 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1952 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1953 
1954 		if (!(ldpc_cap_flags &
1955 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1956 				) || (ops_ld->op_flags &
1957 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1958 			data_len -= ops_ld->z_c;
1959 			parity_offset = data_len;
1960 		} else {
1961 			/* Compute RM out size and number of rows */
1962 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1963 					* ops_ld->z_c - ops_ld->n_filler;
1964 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1965 					ops_ld->n_filler;
1966 			if (data_len > deRmOutSize)
1967 				data_len = deRmOutSize;
1968 			if (data_len > orig_op->segments[i].length)
1969 				data_len = orig_op->segments[i].length;
1970 		}
1971 		/*
1972 		 * HARQ output can have minor differences
1973 		 * due to integer representation and related scaling
1974 		 */
1975 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1976 			if (j == parity_offset) {
1977 				/* Special Handling of the filler bits */
1978 				for (k = 0; k < ops_ld->n_filler; k++) {
1979 					if (harq_out[jj] !=
1980 							llr_max_pre_scaling) {
1981 						printf("HARQ Filler issue %d: %d %d\n",
1982 							jj, harq_out[jj],
1983 							llr_max);
1984 						byte_error++;
1985 					}
1986 					jj++;
1987 				}
1988 			}
1989 			if (!(ops_ld->op_flags &
1990 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1991 				if (ldpc_llr_decimals > 1)
1992 					harq_out[jj] = (harq_out[jj] + 1)
1993 						>> (ldpc_llr_decimals - 1);
1994 				/* Saturated to S7 */
1995 				if (harq_orig[j] > llr_max)
1996 					harq_orig[j] = llr_max;
1997 				if (harq_orig[j] < -llr_max)
1998 					harq_orig[j] = -llr_max;
1999 			}
2000 			if (harq_orig[j] != harq_out[jj]) {
2001 				error = (harq_orig[j] > harq_out[jj]) ?
2002 						harq_orig[j] - harq_out[jj] :
2003 						harq_out[jj] - harq_orig[j];
2004 				abs_harq_origin = harq_orig[j] > 0 ?
2005 							harq_orig[j] :
2006 							-harq_orig[j];
2007 				/* Residual quantization error */
2008 				if ((error > 8 && (abs_harq_origin <
2009 						(llr_max - 16))) ||
2010 						(error > 16)) {
2011 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
2012 							j, harq_orig[j],
2013 							harq_out[jj], error);
2014 					byte_error++;
2015 					cum_error += error;
2016 				}
2017 			}
2018 		}
2019 		m = m->next;
2020 	}
2021 
2022 	if (byte_error)
2023 		TEST_ASSERT(byte_error <= 1,
2024 				"HARQ output mismatch (%d) %d",
2025 				byte_error, cum_error);
2026 
2027 	/* Validate total mbuf pkt length */
2028 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2029 	TEST_ASSERT(total_data_size < pkt_len + 64,
2030 			"Length of data differ in original (%u) and filled (%u) op",
2031 			total_data_size, pkt_len);
2032 
2033 	return TEST_SUCCESS;
2034 }
2035 
2036 static int
2037 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2038 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2039 {
2040 	unsigned int i;
2041 	int ret;
2042 	struct op_data_entries *hard_data_orig =
2043 			&test_vector.entries[DATA_HARD_OUTPUT];
2044 	struct op_data_entries *soft_data_orig =
2045 			&test_vector.entries[DATA_SOFT_OUTPUT];
2046 	struct rte_bbdev_op_turbo_dec *ops_td;
2047 	struct rte_bbdev_op_data *hard_output;
2048 	struct rte_bbdev_op_data *soft_output;
2049 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2050 
2051 	for (i = 0; i < n; ++i) {
2052 		ops_td = &ops[i]->turbo_dec;
2053 		hard_output = &ops_td->hard_output;
2054 		soft_output = &ops_td->soft_output;
2055 
2056 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2057 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2058 					"Returned iter_count (%d) > expected iter_count (%d)",
2059 					ops_td->iter_count, ref_td->iter_count);
2060 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2061 		TEST_ASSERT_SUCCESS(ret,
2062 				"Checking status and ordering for decoder failed");
2063 
2064 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2065 				hard_data_orig),
2066 				"Hard output buffers (CB=%u) are not equal",
2067 				i);
2068 
2069 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2070 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2071 					soft_data_orig),
2072 					"Soft output buffers (CB=%u) are not equal",
2073 					i);
2074 	}
2075 
2076 	return TEST_SUCCESS;
2077 }
2078 
2079 /* Check Number of code blocks errors */
2080 static int
2081 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2082 {
2083 	unsigned int i;
2084 	struct op_data_entries *hard_data_orig =
2085 			&test_vector.entries[DATA_HARD_OUTPUT];
2086 	struct rte_bbdev_op_ldpc_dec *ops_td;
2087 	struct rte_bbdev_op_data *hard_output;
2088 	int errors = 0;
2089 	struct rte_mbuf *m;
2090 
2091 	for (i = 0; i < n; ++i) {
2092 		ops_td = &ops[i]->ldpc_dec;
2093 		hard_output = &ops_td->hard_output;
2094 		m = hard_output->data;
2095 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2096 				hard_data_orig->segments[0].addr,
2097 				hard_data_orig->segments[0].length))
2098 			errors++;
2099 	}
2100 	return errors;
2101 }
2102 
2103 static int
2104 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2105 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2106 {
2107 	unsigned int i;
2108 	int ret;
2109 	struct op_data_entries *hard_data_orig =
2110 			&test_vector.entries[DATA_HARD_OUTPUT];
2111 	struct op_data_entries *soft_data_orig =
2112 			&test_vector.entries[DATA_SOFT_OUTPUT];
2113 	struct op_data_entries *harq_data_orig =
2114 				&test_vector.entries[DATA_HARQ_OUTPUT];
2115 	struct rte_bbdev_op_ldpc_dec *ops_td;
2116 	struct rte_bbdev_op_data *hard_output;
2117 	struct rte_bbdev_op_data *harq_output;
2118 	struct rte_bbdev_op_data *soft_output;
2119 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2120 
2121 	for (i = 0; i < n; ++i) {
2122 		ops_td = &ops[i]->ldpc_dec;
2123 		hard_output = &ops_td->hard_output;
2124 		harq_output = &ops_td->harq_combined_output;
2125 		soft_output = &ops_td->soft_output;
2126 
2127 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2128 		TEST_ASSERT_SUCCESS(ret,
2129 				"Checking status and ordering for decoder failed");
2130 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2131 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2132 					"Returned iter_count (%d) > expected iter_count (%d)",
2133 					ops_td->iter_count, ref_td->iter_count);
2134 		/*
2135 		 * We can ignore output data when the decoding failed to
2136 		 * converge or for loop-back cases
2137 		 */
2138 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2139 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2140 				) && (
2141 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2142 						)) == 0)
2143 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2144 					hard_data_orig),
2145 					"Hard output buffers (CB=%u) are not equal",
2146 					i);
2147 
2148 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2149 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2150 					soft_data_orig),
2151 					"Soft output buffers (CB=%u) are not equal",
2152 					i);
2153 		if (ref_op->ldpc_dec.op_flags &
2154 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2155 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2156 					harq_data_orig, ops_td),
2157 					"HARQ output buffers (CB=%u) are not equal",
2158 					i);
2159 		}
2160 		if (ref_op->ldpc_dec.op_flags &
2161 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2162 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2163 					harq_data_orig, ops_td),
2164 					"HARQ output buffers (CB=%u) are not equal",
2165 					i);
2166 
2167 	}
2168 
2169 	return TEST_SUCCESS;
2170 }
2171 
2172 
2173 static int
2174 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2175 		struct rte_bbdev_enc_op *ref_op)
2176 {
2177 	unsigned int i;
2178 	int ret;
2179 	struct op_data_entries *hard_data_orig =
2180 			&test_vector.entries[DATA_HARD_OUTPUT];
2181 
2182 	for (i = 0; i < n; ++i) {
2183 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2184 		TEST_ASSERT_SUCCESS(ret,
2185 				"Checking status and ordering for encoder failed");
2186 		TEST_ASSERT_SUCCESS(validate_op_chain(
2187 				&ops[i]->turbo_enc.output,
2188 				hard_data_orig),
2189 				"Output buffers (CB=%u) are not equal",
2190 				i);
2191 	}
2192 
2193 	return TEST_SUCCESS;
2194 }
2195 
2196 static int
2197 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2198 		struct rte_bbdev_enc_op *ref_op)
2199 {
2200 	unsigned int i;
2201 	int ret;
2202 	struct op_data_entries *hard_data_orig =
2203 			&test_vector.entries[DATA_HARD_OUTPUT];
2204 
2205 	for (i = 0; i < n; ++i) {
2206 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2207 		TEST_ASSERT_SUCCESS(ret,
2208 				"Checking status and ordering for encoder failed");
2209 		TEST_ASSERT_SUCCESS(validate_op_chain(
2210 				&ops[i]->ldpc_enc.output,
2211 				hard_data_orig),
2212 				"Output buffers (CB=%u) are not equal",
2213 				i);
2214 	}
2215 
2216 	return TEST_SUCCESS;
2217 }
2218 
2219 static void
2220 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2221 {
2222 	unsigned int i;
2223 	struct op_data_entries *entry;
2224 
2225 	op->turbo_dec = test_vector.turbo_dec;
2226 	entry = &test_vector.entries[DATA_INPUT];
2227 	for (i = 0; i < entry->nb_segments; ++i)
2228 		op->turbo_dec.input.length +=
2229 				entry->segments[i].length;
2230 }
2231 
2232 static void
2233 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2234 {
2235 	unsigned int i;
2236 	struct op_data_entries *entry;
2237 
2238 	op->ldpc_dec = test_vector.ldpc_dec;
2239 	entry = &test_vector.entries[DATA_INPUT];
2240 	for (i = 0; i < entry->nb_segments; ++i)
2241 		op->ldpc_dec.input.length +=
2242 				entry->segments[i].length;
2243 	if (test_vector.ldpc_dec.op_flags &
2244 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2245 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2246 		for (i = 0; i < entry->nb_segments; ++i)
2247 			op->ldpc_dec.harq_combined_input.length +=
2248 				entry->segments[i].length;
2249 	}
2250 }
2251 
2252 
2253 static void
2254 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2255 {
2256 	unsigned int i;
2257 	struct op_data_entries *entry;
2258 
2259 	op->turbo_enc = test_vector.turbo_enc;
2260 	entry = &test_vector.entries[DATA_INPUT];
2261 	for (i = 0; i < entry->nb_segments; ++i)
2262 		op->turbo_enc.input.length +=
2263 				entry->segments[i].length;
2264 }
2265 
2266 static void
2267 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2268 {
2269 	unsigned int i;
2270 	struct op_data_entries *entry;
2271 
2272 	op->ldpc_enc = test_vector.ldpc_enc;
2273 	entry = &test_vector.entries[DATA_INPUT];
2274 	for (i = 0; i < entry->nb_segments; ++i)
2275 		op->ldpc_enc.input.length +=
2276 				entry->segments[i].length;
2277 }
2278 
2279 static uint32_t
2280 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2281 {
2282 	uint8_t i;
2283 	uint32_t c, r, tb_size = 0;
2284 
2285 	if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2286 		tb_size = op->turbo_dec.tb_params.k_neg;
2287 	} else {
2288 		c = op->turbo_dec.tb_params.c;
2289 		r = op->turbo_dec.tb_params.r;
2290 		for (i = 0; i < c-r; i++)
2291 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2292 				op->turbo_dec.tb_params.k_neg :
2293 				op->turbo_dec.tb_params.k_pos;
2294 	}
2295 	return tb_size;
2296 }
2297 
2298 static uint32_t
2299 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2300 {
2301 	uint8_t i;
2302 	uint32_t c, r, tb_size = 0;
2303 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2304 
2305 	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2306 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2307 	} else {
2308 		c = op->ldpc_dec.tb_params.c;
2309 		r = op->ldpc_dec.tb_params.r;
2310 		for (i = 0; i < c-r; i++)
2311 			tb_size += sys_cols * op->ldpc_dec.z_c
2312 					- op->ldpc_dec.n_filler;
2313 	}
2314 	return tb_size;
2315 }
2316 
2317 static uint32_t
2318 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2319 {
2320 	uint8_t i;
2321 	uint32_t c, r, tb_size = 0;
2322 
2323 	if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2324 		tb_size = op->turbo_enc.tb_params.k_neg;
2325 	} else {
2326 		c = op->turbo_enc.tb_params.c;
2327 		r = op->turbo_enc.tb_params.r;
2328 		for (i = 0; i < c-r; i++)
2329 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2330 				op->turbo_enc.tb_params.k_neg :
2331 				op->turbo_enc.tb_params.k_pos;
2332 	}
2333 	return tb_size;
2334 }
2335 
2336 static uint32_t
2337 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2338 {
2339 	uint8_t i;
2340 	uint32_t c, r, tb_size = 0;
2341 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2342 
2343 	if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2344 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2345 	} else {
2346 		c = op->turbo_enc.tb_params.c;
2347 		r = op->turbo_enc.tb_params.r;
2348 		for (i = 0; i < c-r; i++)
2349 			tb_size += sys_cols * op->ldpc_enc.z_c
2350 					- op->ldpc_enc.n_filler;
2351 	}
2352 	return tb_size;
2353 }
2354 
2355 
2356 static int
2357 init_test_op_params(struct test_op_params *op_params,
2358 		enum rte_bbdev_op_type op_type, const int expected_status,
2359 		const int vector_mask, struct rte_mempool *ops_mp,
2360 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2361 {
2362 	int ret = 0;
2363 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2364 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2365 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2366 				&op_params->ref_dec_op, 1);
2367 	else
2368 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2369 				&op_params->ref_enc_op, 1);
2370 
2371 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2372 
2373 	op_params->mp = ops_mp;
2374 	op_params->burst_sz = burst_sz;
2375 	op_params->num_to_process = num_to_process;
2376 	op_params->num_lcores = num_lcores;
2377 	op_params->vector_mask = vector_mask;
2378 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2379 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2380 		op_params->ref_dec_op->status = expected_status;
2381 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2382 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2383 		op_params->ref_enc_op->status = expected_status;
2384 	return 0;
2385 }
2386 
2387 static int
2388 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2389 		struct test_op_params *op_params)
2390 {
2391 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2392 	unsigned int i;
2393 	struct active_device *ad;
2394 	unsigned int burst_sz = get_burst_sz();
2395 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2396 	const struct rte_bbdev_op_cap *capabilities = NULL;
2397 
2398 	ad = &active_devs[dev_id];
2399 
2400 	/* Check if device supports op_type */
2401 	if (!is_avail_op(ad, test_vector.op_type))
2402 		return TEST_SUCCESS;
2403 
2404 	struct rte_bbdev_info info;
2405 	rte_bbdev_info_get(ad->dev_id, &info);
2406 	socket_id = GET_SOCKET(info.socket_id);
2407 
2408 	f_ret = create_mempools(ad, socket_id, op_type,
2409 			get_num_ops());
2410 	if (f_ret != TEST_SUCCESS) {
2411 		printf("Couldn't create mempools");
2412 		goto fail;
2413 	}
2414 	if (op_type == RTE_BBDEV_OP_NONE)
2415 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2416 
2417 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2418 			test_vector.expected_status,
2419 			test_vector.mask,
2420 			ad->ops_mempool,
2421 			burst_sz,
2422 			get_num_ops(),
2423 			get_num_lcores());
2424 	if (f_ret != TEST_SUCCESS) {
2425 		printf("Couldn't init test op params");
2426 		goto fail;
2427 	}
2428 
2429 
2430 	/* Find capabilities */
2431 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2432 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2433 		if (cap->type == test_vector.op_type) {
2434 			capabilities = cap;
2435 			break;
2436 		}
2437 		cap++;
2438 	}
2439 	TEST_ASSERT_NOT_NULL(capabilities,
2440 			"Couldn't find capabilities");
2441 
2442 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2443 		create_reference_dec_op(op_params->ref_dec_op);
2444 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2445 		create_reference_enc_op(op_params->ref_enc_op);
2446 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2447 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2448 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2449 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2450 
2451 	for (i = 0; i < ad->nb_queues; ++i) {
2452 		f_ret = fill_queue_buffers(op_params,
2453 				ad->in_mbuf_pool,
2454 				ad->hard_out_mbuf_pool,
2455 				ad->soft_out_mbuf_pool,
2456 				ad->harq_in_mbuf_pool,
2457 				ad->harq_out_mbuf_pool,
2458 				ad->queue_ids[i],
2459 				capabilities,
2460 				info.drv.min_alignment,
2461 				socket_id);
2462 		if (f_ret != TEST_SUCCESS) {
2463 			printf("Couldn't init queue buffers");
2464 			goto fail;
2465 		}
2466 	}
2467 
2468 	/* Run test case function */
2469 	t_ret = test_case_func(ad, op_params);
2470 
2471 	/* Free active device resources and return */
2472 	free_buffers(ad, op_params);
2473 	return t_ret;
2474 
2475 fail:
2476 	free_buffers(ad, op_params);
2477 	return TEST_FAILED;
2478 }
2479 
2480 /* Run given test function per active device per supported op type
2481  * per burst size.
2482  */
2483 static int
2484 run_test_case(test_case_function *test_case_func)
2485 {
2486 	int ret = 0;
2487 	uint8_t dev;
2488 
2489 	/* Alloc op_params */
2490 	struct test_op_params *op_params = rte_zmalloc(NULL,
2491 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2492 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2493 			RTE_ALIGN(sizeof(struct test_op_params),
2494 				RTE_CACHE_LINE_SIZE));
2495 
2496 	/* For each device run test case function */
2497 	for (dev = 0; dev < nb_active_devs; ++dev)
2498 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2499 
2500 	rte_free(op_params);
2501 
2502 	return ret;
2503 }
2504 
2505 
2506 /* Push back the HARQ output from DDR to host */
2507 static void
2508 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2509 		struct rte_bbdev_dec_op **ops,
2510 		const uint16_t n)
2511 {
2512 	uint16_t j;
2513 	int save_status, ret;
2514 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2515 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2516 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2517 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2518 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2519 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2520 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2521 	for (j = 0; j < n; ++j) {
2522 		if ((loopback && mem_out) || hc_out) {
2523 			save_status = ops[j]->status;
2524 			ops[j]->ldpc_dec.op_flags =
2525 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2526 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2527 			if (h_comp)
2528 				ops[j]->ldpc_dec.op_flags +=
2529 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2530 			ops[j]->ldpc_dec.harq_combined_input.offset =
2531 					harq_offset;
2532 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2533 			harq_offset += HARQ_INCR;
2534 			if (!loopback)
2535 				ops[j]->ldpc_dec.harq_combined_input.length =
2536 				ops[j]->ldpc_dec.harq_combined_output.length;
2537 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2538 					&ops[j], 1);
2539 			ret = 0;
2540 			while (ret == 0)
2541 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2542 						dev_id, queue_id,
2543 						&ops_deq[j], 1);
2544 			ops[j]->ldpc_dec.op_flags = flags;
2545 			ops[j]->status = save_status;
2546 		}
2547 	}
2548 }
2549 
2550 /*
2551  * Push back the HARQ output from HW DDR to Host
2552  * Preload HARQ memory input and adjust HARQ offset
2553  */
2554 static void
2555 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2556 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2557 		bool preload)
2558 {
2559 	uint16_t j;
2560 	int deq;
2561 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2562 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2563 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2564 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2565 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2566 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2567 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2568 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2569 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2570 	if ((mem_in || hc_in) && preload) {
2571 		for (j = 0; j < n; ++j) {
2572 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2573 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2574 			ops[j]->ldpc_dec.op_flags =
2575 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2576 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2577 			if (h_comp)
2578 				ops[j]->ldpc_dec.op_flags +=
2579 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2580 			ops[j]->ldpc_dec.harq_combined_output.offset =
2581 					harq_offset;
2582 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2583 			harq_offset += HARQ_INCR;
2584 		}
2585 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2586 		deq = 0;
2587 		while (deq != n)
2588 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2589 					dev_id, queue_id, &ops_deq[deq],
2590 					n - deq);
2591 		/* Restore the operations */
2592 		for (j = 0; j < n; ++j) {
2593 			ops[j]->ldpc_dec.op_flags = flags;
2594 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2595 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2596 		}
2597 	}
2598 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2599 	for (j = 0; j < n; ++j) {
2600 		/* Adjust HARQ offset when we reach external DDR */
2601 		if (mem_in || hc_in)
2602 			ops[j]->ldpc_dec.harq_combined_input.offset
2603 				= harq_offset;
2604 		if (mem_out || hc_out)
2605 			ops[j]->ldpc_dec.harq_combined_output.offset
2606 				= harq_offset;
2607 		harq_offset += HARQ_INCR;
2608 	}
2609 }
2610 
2611 static void
2612 dequeue_event_callback(uint16_t dev_id,
2613 		enum rte_bbdev_event_type event, void *cb_arg,
2614 		void *ret_param)
2615 {
2616 	int ret;
2617 	uint16_t i;
2618 	uint64_t total_time;
2619 	uint16_t deq, burst_sz, num_ops;
2620 	uint16_t queue_id = *(uint16_t *) ret_param;
2621 	struct rte_bbdev_info info;
2622 	double tb_len_bits;
2623 	struct thread_params *tp = cb_arg;
2624 
2625 	/* Find matching thread params using queue_id */
2626 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2627 		if (tp->queue_id == queue_id)
2628 			break;
2629 
2630 	if (i == MAX_QUEUES) {
2631 		printf("%s: Queue_id from interrupt details was not found!\n",
2632 				__func__);
2633 		return;
2634 	}
2635 
2636 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2637 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2638 		printf(
2639 			"Dequeue interrupt handler called for incorrect event!\n");
2640 		return;
2641 	}
2642 
2643 	burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
2644 	num_ops = tp->op_params->num_to_process;
2645 
2646 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2647 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2648 				&tp->dec_ops[
2649 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2650 				burst_sz);
2651 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2652 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2653 				&tp->dec_ops[
2654 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2655 				burst_sz);
2656 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2657 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2658 				&tp->enc_ops[
2659 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2660 				burst_sz);
2661 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2662 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2663 				&tp->enc_ops[
2664 					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
2665 				burst_sz);
2666 
2667 	if (deq < burst_sz) {
2668 		printf(
2669 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2670 			burst_sz, deq);
2671 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2672 		return;
2673 	}
2674 
2675 	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
2676 		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2677 		return;
2678 	}
2679 
2680 	total_time = rte_rdtsc_precise() - tp->start_time;
2681 
2682 	rte_bbdev_info_get(dev_id, &info);
2683 
2684 	ret = TEST_SUCCESS;
2685 
2686 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2687 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2688 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2689 				tp->op_params->vector_mask);
2690 		/* get the max of iter_count for all dequeued ops */
2691 		for (i = 0; i < num_ops; ++i)
2692 			tp->iter_count = RTE_MAX(
2693 					tp->dec_ops[i]->turbo_dec.iter_count,
2694 					tp->iter_count);
2695 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2696 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2697 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2698 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2699 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2700 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2701 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2702 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2703 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2704 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2705 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2706 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2707 				tp->op_params->vector_mask);
2708 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2709 	}
2710 
2711 	if (ret) {
2712 		printf("Buffers validation failed\n");
2713 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2714 	}
2715 
2716 	switch (test_vector.op_type) {
2717 	case RTE_BBDEV_OP_TURBO_DEC:
2718 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2719 		break;
2720 	case RTE_BBDEV_OP_TURBO_ENC:
2721 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2722 		break;
2723 	case RTE_BBDEV_OP_LDPC_DEC:
2724 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2725 		break;
2726 	case RTE_BBDEV_OP_LDPC_ENC:
2727 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2728 		break;
2729 	case RTE_BBDEV_OP_NONE:
2730 		tb_len_bits = 0.0;
2731 		break;
2732 	default:
2733 		printf("Unknown op type: %d\n", test_vector.op_type);
2734 		__atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
2735 		return;
2736 	}
2737 
2738 	tp->ops_per_sec += ((double)num_ops) /
2739 			((double)total_time / (double)rte_get_tsc_hz());
2740 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2741 			((double)total_time / (double)rte_get_tsc_hz());
2742 
2743 	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
2744 }
2745 
2746 static int
2747 throughput_intr_lcore_ldpc_dec(void *arg)
2748 {
2749 	struct thread_params *tp = arg;
2750 	unsigned int enqueued;
2751 	const uint16_t queue_id = tp->queue_id;
2752 	const uint16_t burst_sz = tp->op_params->burst_sz;
2753 	const uint16_t num_to_process = tp->op_params->num_to_process;
2754 	struct rte_bbdev_dec_op *ops[num_to_process];
2755 	struct test_buffers *bufs = NULL;
2756 	struct rte_bbdev_info info;
2757 	int ret, i, j;
2758 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2759 	uint16_t num_to_enq, enq;
2760 
2761 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2762 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2763 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2764 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2765 
2766 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2767 			"BURST_SIZE should be <= %u", MAX_BURST);
2768 
2769 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2770 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2771 			tp->dev_id, queue_id);
2772 
2773 	rte_bbdev_info_get(tp->dev_id, &info);
2774 
2775 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2776 			"NUM_OPS cannot exceed %u for this device",
2777 			info.drv.queue_size_lim);
2778 
2779 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2780 
2781 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2782 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2783 
2784 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2785 
2786 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2787 				num_to_process);
2788 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2789 			num_to_process);
2790 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2791 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2792 				bufs->hard_outputs, bufs->soft_outputs,
2793 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2794 
2795 	/* Set counter to validate the ordering */
2796 	for (j = 0; j < num_to_process; ++j)
2797 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2798 
2799 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2800 		for (i = 0; i < num_to_process; ++i) {
2801 			if (!loopback)
2802 				rte_pktmbuf_reset(
2803 					ops[i]->ldpc_dec.hard_output.data);
2804 			if (hc_out || loopback)
2805 				mbuf_reset(
2806 				ops[i]->ldpc_dec.harq_combined_output.data);
2807 		}
2808 
2809 		tp->start_time = rte_rdtsc_precise();
2810 		for (enqueued = 0; enqueued < num_to_process;) {
2811 			num_to_enq = burst_sz;
2812 
2813 			if (unlikely(num_to_process - enqueued < num_to_enq))
2814 				num_to_enq = num_to_process - enqueued;
2815 
2816 			enq = 0;
2817 			do {
2818 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2819 						tp->dev_id,
2820 						queue_id, &ops[enqueued],
2821 						num_to_enq);
2822 			} while (unlikely(num_to_enq != enq));
2823 			enqueued += enq;
2824 
2825 			/* Write to thread burst_sz current number of enqueued
2826 			 * descriptors. It ensures that proper number of
2827 			 * descriptors will be dequeued in callback
2828 			 * function - needed for last batch in case where
2829 			 * the number of operations is not a multiple of
2830 			 * burst size.
2831 			 */
2832 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2833 
2834 			/* Wait until processing of previous batch is
2835 			 * completed
2836 			 */
2837 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2838 		}
2839 		if (j != TEST_REPETITIONS - 1)
2840 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2841 	}
2842 
2843 	return TEST_SUCCESS;
2844 }
2845 
2846 static int
2847 throughput_intr_lcore_dec(void *arg)
2848 {
2849 	struct thread_params *tp = arg;
2850 	unsigned int enqueued;
2851 	const uint16_t queue_id = tp->queue_id;
2852 	const uint16_t burst_sz = tp->op_params->burst_sz;
2853 	const uint16_t num_to_process = tp->op_params->num_to_process;
2854 	struct rte_bbdev_dec_op *ops[num_to_process];
2855 	struct test_buffers *bufs = NULL;
2856 	struct rte_bbdev_info info;
2857 	int ret, i, j;
2858 	uint16_t num_to_enq, enq;
2859 
2860 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2861 			"BURST_SIZE should be <= %u", MAX_BURST);
2862 
2863 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2864 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2865 			tp->dev_id, queue_id);
2866 
2867 	rte_bbdev_info_get(tp->dev_id, &info);
2868 
2869 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2870 			"NUM_OPS cannot exceed %u for this device",
2871 			info.drv.queue_size_lim);
2872 
2873 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2874 
2875 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2876 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2877 
2878 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2879 
2880 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2881 				num_to_process);
2882 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2883 			num_to_process);
2884 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2885 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2886 				bufs->hard_outputs, bufs->soft_outputs,
2887 				tp->op_params->ref_dec_op);
2888 
2889 	/* Set counter to validate the ordering */
2890 	for (j = 0; j < num_to_process; ++j)
2891 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2892 
2893 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2894 		for (i = 0; i < num_to_process; ++i)
2895 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2896 
2897 		tp->start_time = rte_rdtsc_precise();
2898 		for (enqueued = 0; enqueued < num_to_process;) {
2899 			num_to_enq = burst_sz;
2900 
2901 			if (unlikely(num_to_process - enqueued < num_to_enq))
2902 				num_to_enq = num_to_process - enqueued;
2903 
2904 			enq = 0;
2905 			do {
2906 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2907 						queue_id, &ops[enqueued],
2908 						num_to_enq);
2909 			} while (unlikely(num_to_enq != enq));
2910 			enqueued += enq;
2911 
2912 			/* Write to thread burst_sz current number of enqueued
2913 			 * descriptors. It ensures that proper number of
2914 			 * descriptors will be dequeued in callback
2915 			 * function - needed for last batch in case where
2916 			 * the number of operations is not a multiple of
2917 			 * burst size.
2918 			 */
2919 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
2920 
2921 			/* Wait until processing of previous batch is
2922 			 * completed
2923 			 */
2924 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
2925 		}
2926 		if (j != TEST_REPETITIONS - 1)
2927 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2928 	}
2929 
2930 	return TEST_SUCCESS;
2931 }
2932 
2933 static int
2934 throughput_intr_lcore_enc(void *arg)
2935 {
2936 	struct thread_params *tp = arg;
2937 	unsigned int enqueued;
2938 	const uint16_t queue_id = tp->queue_id;
2939 	const uint16_t burst_sz = tp->op_params->burst_sz;
2940 	const uint16_t num_to_process = tp->op_params->num_to_process;
2941 	struct rte_bbdev_enc_op *ops[num_to_process];
2942 	struct test_buffers *bufs = NULL;
2943 	struct rte_bbdev_info info;
2944 	int ret, i, j;
2945 	uint16_t num_to_enq, enq;
2946 
2947 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2948 			"BURST_SIZE should be <= %u", MAX_BURST);
2949 
2950 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2951 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2952 			tp->dev_id, queue_id);
2953 
2954 	rte_bbdev_info_get(tp->dev_id, &info);
2955 
2956 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2957 			"NUM_OPS cannot exceed %u for this device",
2958 			info.drv.queue_size_lim);
2959 
2960 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2961 
2962 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
2963 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
2964 
2965 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
2966 
2967 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2968 			num_to_process);
2969 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2970 			num_to_process);
2971 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2972 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2973 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2974 
2975 	/* Set counter to validate the ordering */
2976 	for (j = 0; j < num_to_process; ++j)
2977 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2978 
2979 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2980 		for (i = 0; i < num_to_process; ++i)
2981 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2982 
2983 		tp->start_time = rte_rdtsc_precise();
2984 		for (enqueued = 0; enqueued < num_to_process;) {
2985 			num_to_enq = burst_sz;
2986 
2987 			if (unlikely(num_to_process - enqueued < num_to_enq))
2988 				num_to_enq = num_to_process - enqueued;
2989 
2990 			enq = 0;
2991 			do {
2992 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2993 						queue_id, &ops[enqueued],
2994 						num_to_enq);
2995 			} while (unlikely(enq != num_to_enq));
2996 			enqueued += enq;
2997 
2998 			/* Write to thread burst_sz current number of enqueued
2999 			 * descriptors. It ensures that proper number of
3000 			 * descriptors will be dequeued in callback
3001 			 * function - needed for last batch in case where
3002 			 * the number of operations is not a multiple of
3003 			 * burst size.
3004 			 */
3005 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3006 
3007 			/* Wait until processing of previous batch is
3008 			 * completed
3009 			 */
3010 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3011 		}
3012 		if (j != TEST_REPETITIONS - 1)
3013 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3014 	}
3015 
3016 	return TEST_SUCCESS;
3017 }
3018 
3019 
3020 static int
3021 throughput_intr_lcore_ldpc_enc(void *arg)
3022 {
3023 	struct thread_params *tp = arg;
3024 	unsigned int enqueued;
3025 	const uint16_t queue_id = tp->queue_id;
3026 	const uint16_t burst_sz = tp->op_params->burst_sz;
3027 	const uint16_t num_to_process = tp->op_params->num_to_process;
3028 	struct rte_bbdev_enc_op *ops[num_to_process];
3029 	struct test_buffers *bufs = NULL;
3030 	struct rte_bbdev_info info;
3031 	int ret, i, j;
3032 	uint16_t num_to_enq, enq;
3033 
3034 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3035 			"BURST_SIZE should be <= %u", MAX_BURST);
3036 
3037 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3038 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3039 			tp->dev_id, queue_id);
3040 
3041 	rte_bbdev_info_get(tp->dev_id, &info);
3042 
3043 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3044 			"NUM_OPS cannot exceed %u for this device",
3045 			info.drv.queue_size_lim);
3046 
3047 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3048 
3049 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
3050 	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3051 
3052 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3053 
3054 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3055 			num_to_process);
3056 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3057 			num_to_process);
3058 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3059 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3060 				bufs->inputs, bufs->hard_outputs,
3061 				tp->op_params->ref_enc_op);
3062 
3063 	/* Set counter to validate the ordering */
3064 	for (j = 0; j < num_to_process; ++j)
3065 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3066 
3067 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3068 		for (i = 0; i < num_to_process; ++i)
3069 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3070 
3071 		tp->start_time = rte_rdtsc_precise();
3072 		for (enqueued = 0; enqueued < num_to_process;) {
3073 			num_to_enq = burst_sz;
3074 
3075 			if (unlikely(num_to_process - enqueued < num_to_enq))
3076 				num_to_enq = num_to_process - enqueued;
3077 
3078 			enq = 0;
3079 			do {
3080 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3081 						tp->dev_id,
3082 						queue_id, &ops[enqueued],
3083 						num_to_enq);
3084 			} while (unlikely(enq != num_to_enq));
3085 			enqueued += enq;
3086 
3087 			/* Write to thread burst_sz current number of enqueued
3088 			 * descriptors. It ensures that proper number of
3089 			 * descriptors will be dequeued in callback
3090 			 * function - needed for last batch in case where
3091 			 * the number of operations is not a multiple of
3092 			 * burst size.
3093 			 */
3094 			__atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
3095 
3096 			/* Wait until processing of previous batch is
3097 			 * completed
3098 			 */
3099 			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
3100 		}
3101 		if (j != TEST_REPETITIONS - 1)
3102 			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
3103 	}
3104 
3105 	return TEST_SUCCESS;
3106 }
3107 
3108 static int
3109 throughput_pmd_lcore_dec(void *arg)
3110 {
3111 	struct thread_params *tp = arg;
3112 	uint16_t enq, deq;
3113 	uint64_t total_time = 0, start_time;
3114 	const uint16_t queue_id = tp->queue_id;
3115 	const uint16_t burst_sz = tp->op_params->burst_sz;
3116 	const uint16_t num_ops = tp->op_params->num_to_process;
3117 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3118 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3119 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3120 	struct test_buffers *bufs = NULL;
3121 	int i, j, ret;
3122 	struct rte_bbdev_info info;
3123 	uint16_t num_to_enq;
3124 
3125 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3126 			"BURST_SIZE should be <= %u", MAX_BURST);
3127 
3128 	rte_bbdev_info_get(tp->dev_id, &info);
3129 
3130 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3131 			"NUM_OPS cannot exceed %u for this device",
3132 			info.drv.queue_size_lim);
3133 
3134 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3135 
3136 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3137 
3138 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3139 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3140 
3141 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3142 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3143 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3144 
3145 	/* Set counter to validate the ordering */
3146 	for (j = 0; j < num_ops; ++j)
3147 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3148 
3149 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3150 
3151 		for (j = 0; j < num_ops; ++j)
3152 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3153 
3154 		start_time = rte_rdtsc_precise();
3155 
3156 		for (enq = 0, deq = 0; enq < num_ops;) {
3157 			num_to_enq = burst_sz;
3158 
3159 			if (unlikely(num_ops - enq < num_to_enq))
3160 				num_to_enq = num_ops - enq;
3161 
3162 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3163 					queue_id, &ops_enq[enq], num_to_enq);
3164 
3165 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3166 					queue_id, &ops_deq[deq], enq - deq);
3167 		}
3168 
3169 		/* dequeue the remaining */
3170 		while (deq < enq) {
3171 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3172 					queue_id, &ops_deq[deq], enq - deq);
3173 		}
3174 
3175 		total_time += rte_rdtsc_precise() - start_time;
3176 	}
3177 
3178 	tp->iter_count = 0;
3179 	/* get the max of iter_count for all dequeued ops */
3180 	for (i = 0; i < num_ops; ++i) {
3181 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3182 				tp->iter_count);
3183 	}
3184 
3185 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3186 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3187 				tp->op_params->vector_mask);
3188 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3189 	}
3190 
3191 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3192 
3193 	double tb_len_bits = calc_dec_TB_size(ref_op);
3194 
3195 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3196 			((double)total_time / (double)rte_get_tsc_hz());
3197 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3198 			1000000.0) / ((double)total_time /
3199 			(double)rte_get_tsc_hz());
3200 
3201 	return TEST_SUCCESS;
3202 }
3203 
3204 static int
3205 bler_pmd_lcore_ldpc_dec(void *arg)
3206 {
3207 	struct thread_params *tp = arg;
3208 	uint16_t enq, deq;
3209 	uint64_t total_time = 0, start_time;
3210 	const uint16_t queue_id = tp->queue_id;
3211 	const uint16_t burst_sz = tp->op_params->burst_sz;
3212 	const uint16_t num_ops = tp->op_params->num_to_process;
3213 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3214 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3215 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3216 	struct test_buffers *bufs = NULL;
3217 	int i, j, ret;
3218 	float parity_bler = 0;
3219 	struct rte_bbdev_info info;
3220 	uint16_t num_to_enq;
3221 	bool extDdr = check_bit(ldpc_cap_flags,
3222 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3223 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3224 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3225 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3226 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3227 
3228 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3229 			"BURST_SIZE should be <= %u", MAX_BURST);
3230 
3231 	rte_bbdev_info_get(tp->dev_id, &info);
3232 
3233 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3234 			"NUM_OPS cannot exceed %u for this device",
3235 			info.drv.queue_size_lim);
3236 
3237 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3238 
3239 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3240 
3241 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3242 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3243 
3244 	/* For BLER tests we need to enable early termination */
3245 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3246 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3247 		ref_op->ldpc_dec.op_flags +=
3248 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3249 	ref_op->ldpc_dec.iter_max = get_iter_max();
3250 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3251 
3252 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3253 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3254 				bufs->hard_outputs, bufs->soft_outputs,
3255 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3256 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3257 
3258 	/* Set counter to validate the ordering */
3259 	for (j = 0; j < num_ops; ++j)
3260 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3261 
3262 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3263 		for (j = 0; j < num_ops; ++j) {
3264 			if (!loopback)
3265 				mbuf_reset(
3266 				ops_enq[j]->ldpc_dec.hard_output.data);
3267 			if (hc_out || loopback)
3268 				mbuf_reset(
3269 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3270 		}
3271 		if (extDdr)
3272 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3273 					num_ops, true);
3274 		start_time = rte_rdtsc_precise();
3275 
3276 		for (enq = 0, deq = 0; enq < num_ops;) {
3277 			num_to_enq = burst_sz;
3278 
3279 			if (unlikely(num_ops - enq < num_to_enq))
3280 				num_to_enq = num_ops - enq;
3281 
3282 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3283 					queue_id, &ops_enq[enq], num_to_enq);
3284 
3285 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3286 					queue_id, &ops_deq[deq], enq - deq);
3287 		}
3288 
3289 		/* dequeue the remaining */
3290 		while (deq < enq) {
3291 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3292 					queue_id, &ops_deq[deq], enq - deq);
3293 		}
3294 
3295 		total_time += rte_rdtsc_precise() - start_time;
3296 	}
3297 
3298 	tp->iter_count = 0;
3299 	tp->iter_average = 0;
3300 	/* get the max of iter_count for all dequeued ops */
3301 	for (i = 0; i < num_ops; ++i) {
3302 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3303 				tp->iter_count);
3304 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3305 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3306 			parity_bler += 1.0;
3307 	}
3308 
3309 	parity_bler /= num_ops; /* This one is based on SYND */
3310 	tp->iter_average /= num_ops;
3311 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3312 
3313 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3314 			&& tp->bler == 0
3315 			&& parity_bler == 0
3316 			&& !hc_out) {
3317 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3318 				tp->op_params->vector_mask);
3319 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3320 	}
3321 
3322 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3323 
3324 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3325 	tp->ops_per_sec = ((double)num_ops * 1) /
3326 			((double)total_time / (double)rte_get_tsc_hz());
3327 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3328 			1000000.0) / ((double)total_time /
3329 			(double)rte_get_tsc_hz());
3330 
3331 	return TEST_SUCCESS;
3332 }
3333 
3334 static int
3335 throughput_pmd_lcore_ldpc_dec(void *arg)
3336 {
3337 	struct thread_params *tp = arg;
3338 	uint16_t enq, deq;
3339 	uint64_t total_time = 0, start_time;
3340 	const uint16_t queue_id = tp->queue_id;
3341 	const uint16_t burst_sz = tp->op_params->burst_sz;
3342 	const uint16_t num_ops = tp->op_params->num_to_process;
3343 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3344 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3345 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3346 	struct test_buffers *bufs = NULL;
3347 	int i, j, ret;
3348 	struct rte_bbdev_info info;
3349 	uint16_t num_to_enq;
3350 	bool extDdr = check_bit(ldpc_cap_flags,
3351 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3352 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3353 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3354 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3355 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3356 
3357 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3358 			"BURST_SIZE should be <= %u", MAX_BURST);
3359 
3360 	rte_bbdev_info_get(tp->dev_id, &info);
3361 
3362 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3363 			"NUM_OPS cannot exceed %u for this device",
3364 			info.drv.queue_size_lim);
3365 
3366 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3367 
3368 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3369 
3370 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3371 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3372 
3373 	/* For throughput tests we need to disable early termination */
3374 	if (check_bit(ref_op->ldpc_dec.op_flags,
3375 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3376 		ref_op->ldpc_dec.op_flags -=
3377 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3378 	ref_op->ldpc_dec.iter_max = get_iter_max();
3379 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3380 
3381 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3382 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3383 				bufs->hard_outputs, bufs->soft_outputs,
3384 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3385 
3386 	/* Set counter to validate the ordering */
3387 	for (j = 0; j < num_ops; ++j)
3388 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3389 
3390 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3391 		for (j = 0; j < num_ops; ++j) {
3392 			if (!loopback)
3393 				mbuf_reset(
3394 				ops_enq[j]->ldpc_dec.hard_output.data);
3395 			if (hc_out || loopback)
3396 				mbuf_reset(
3397 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3398 		}
3399 		if (extDdr)
3400 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3401 					num_ops, true);
3402 		start_time = rte_rdtsc_precise();
3403 
3404 		for (enq = 0, deq = 0; enq < num_ops;) {
3405 			num_to_enq = burst_sz;
3406 
3407 			if (unlikely(num_ops - enq < num_to_enq))
3408 				num_to_enq = num_ops - enq;
3409 
3410 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3411 					queue_id, &ops_enq[enq], num_to_enq);
3412 
3413 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3414 					queue_id, &ops_deq[deq], enq - deq);
3415 		}
3416 
3417 		/* dequeue the remaining */
3418 		while (deq < enq) {
3419 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3420 					queue_id, &ops_deq[deq], enq - deq);
3421 		}
3422 
3423 		total_time += rte_rdtsc_precise() - start_time;
3424 	}
3425 
3426 	tp->iter_count = 0;
3427 	/* get the max of iter_count for all dequeued ops */
3428 	for (i = 0; i < num_ops; ++i) {
3429 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3430 				tp->iter_count);
3431 	}
3432 	if (extDdr) {
3433 		/* Read loopback is not thread safe */
3434 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3435 	}
3436 
3437 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3438 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3439 				tp->op_params->vector_mask);
3440 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3441 	}
3442 
3443 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3444 
3445 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3446 
3447 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3448 			((double)total_time / (double)rte_get_tsc_hz());
3449 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3450 			1000000.0) / ((double)total_time /
3451 			(double)rte_get_tsc_hz());
3452 
3453 	return TEST_SUCCESS;
3454 }
3455 
3456 static int
3457 throughput_pmd_lcore_enc(void *arg)
3458 {
3459 	struct thread_params *tp = arg;
3460 	uint16_t enq, deq;
3461 	uint64_t total_time = 0, start_time;
3462 	const uint16_t queue_id = tp->queue_id;
3463 	const uint16_t burst_sz = tp->op_params->burst_sz;
3464 	const uint16_t num_ops = tp->op_params->num_to_process;
3465 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3466 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3467 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3468 	struct test_buffers *bufs = NULL;
3469 	int i, j, ret;
3470 	struct rte_bbdev_info info;
3471 	uint16_t num_to_enq;
3472 
3473 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3474 			"BURST_SIZE should be <= %u", MAX_BURST);
3475 
3476 	rte_bbdev_info_get(tp->dev_id, &info);
3477 
3478 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3479 			"NUM_OPS cannot exceed %u for this device",
3480 			info.drv.queue_size_lim);
3481 
3482 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3483 
3484 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3485 
3486 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3487 			num_ops);
3488 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3489 			num_ops);
3490 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3491 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3492 				bufs->hard_outputs, ref_op);
3493 
3494 	/* Set counter to validate the ordering */
3495 	for (j = 0; j < num_ops; ++j)
3496 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3497 
3498 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3499 
3500 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3501 			for (j = 0; j < num_ops; ++j)
3502 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3503 
3504 		start_time = rte_rdtsc_precise();
3505 
3506 		for (enq = 0, deq = 0; enq < num_ops;) {
3507 			num_to_enq = burst_sz;
3508 
3509 			if (unlikely(num_ops - enq < num_to_enq))
3510 				num_to_enq = num_ops - enq;
3511 
3512 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3513 					queue_id, &ops_enq[enq], num_to_enq);
3514 
3515 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3516 					queue_id, &ops_deq[deq], enq - deq);
3517 		}
3518 
3519 		/* dequeue the remaining */
3520 		while (deq < enq) {
3521 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3522 					queue_id, &ops_deq[deq], enq - deq);
3523 		}
3524 
3525 		total_time += rte_rdtsc_precise() - start_time;
3526 	}
3527 
3528 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3529 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3530 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3531 	}
3532 
3533 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3534 
3535 	double tb_len_bits = calc_enc_TB_size(ref_op);
3536 
3537 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3538 			((double)total_time / (double)rte_get_tsc_hz());
3539 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3540 			/ 1000000.0) / ((double)total_time /
3541 			(double)rte_get_tsc_hz());
3542 
3543 	return TEST_SUCCESS;
3544 }
3545 
3546 static int
3547 throughput_pmd_lcore_ldpc_enc(void *arg)
3548 {
3549 	struct thread_params *tp = arg;
3550 	uint16_t enq, deq;
3551 	uint64_t total_time = 0, start_time;
3552 	const uint16_t queue_id = tp->queue_id;
3553 	const uint16_t burst_sz = tp->op_params->burst_sz;
3554 	const uint16_t num_ops = tp->op_params->num_to_process;
3555 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3556 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3557 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3558 	struct test_buffers *bufs = NULL;
3559 	int i, j, ret;
3560 	struct rte_bbdev_info info;
3561 	uint16_t num_to_enq;
3562 
3563 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3564 			"BURST_SIZE should be <= %u", MAX_BURST);
3565 
3566 	rte_bbdev_info_get(tp->dev_id, &info);
3567 
3568 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3569 			"NUM_OPS cannot exceed %u for this device",
3570 			info.drv.queue_size_lim);
3571 
3572 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3573 
3574 	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3575 
3576 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3577 			num_ops);
3578 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3579 			num_ops);
3580 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3581 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3582 				bufs->hard_outputs, ref_op);
3583 
3584 	/* Set counter to validate the ordering */
3585 	for (j = 0; j < num_ops; ++j)
3586 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3587 
3588 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3589 
3590 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3591 			for (j = 0; j < num_ops; ++j)
3592 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3593 
3594 		start_time = rte_rdtsc_precise();
3595 
3596 		for (enq = 0, deq = 0; enq < num_ops;) {
3597 			num_to_enq = burst_sz;
3598 
3599 			if (unlikely(num_ops - enq < num_to_enq))
3600 				num_to_enq = num_ops - enq;
3601 
3602 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3603 					queue_id, &ops_enq[enq], num_to_enq);
3604 
3605 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3606 					queue_id, &ops_deq[deq], enq - deq);
3607 		}
3608 
3609 		/* dequeue the remaining */
3610 		while (deq < enq) {
3611 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3612 					queue_id, &ops_deq[deq], enq - deq);
3613 		}
3614 
3615 		total_time += rte_rdtsc_precise() - start_time;
3616 	}
3617 
3618 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3619 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3620 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3621 	}
3622 
3623 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3624 
3625 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3626 
3627 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3628 			((double)total_time / (double)rte_get_tsc_hz());
3629 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3630 			/ 1000000.0) / ((double)total_time /
3631 			(double)rte_get_tsc_hz());
3632 
3633 	return TEST_SUCCESS;
3634 }
3635 
3636 static void
3637 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3638 {
3639 	unsigned int iter = 0;
3640 	double total_mops = 0, total_mbps = 0;
3641 
3642 	for (iter = 0; iter < used_cores; iter++) {
3643 		printf(
3644 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3645 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3646 			t_params[iter].mbps);
3647 		total_mops += t_params[iter].ops_per_sec;
3648 		total_mbps += t_params[iter].mbps;
3649 	}
3650 	printf(
3651 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3652 		used_cores, total_mops, total_mbps);
3653 }
3654 
3655 /* Aggregate the performance results over the number of cores used */
3656 static void
3657 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3658 {
3659 	unsigned int core_idx = 0;
3660 	double total_mops = 0, total_mbps = 0;
3661 	uint8_t iter_count = 0;
3662 
3663 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3664 		printf(
3665 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3666 			t_params[core_idx].lcore_id,
3667 			t_params[core_idx].ops_per_sec,
3668 			t_params[core_idx].mbps,
3669 			t_params[core_idx].iter_count);
3670 		total_mops += t_params[core_idx].ops_per_sec;
3671 		total_mbps += t_params[core_idx].mbps;
3672 		iter_count = RTE_MAX(iter_count,
3673 				t_params[core_idx].iter_count);
3674 	}
3675 	printf(
3676 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3677 		used_cores, total_mops, total_mbps, iter_count);
3678 }
3679 
3680 /* Aggregate the performance results over the number of cores used */
3681 static void
3682 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3683 {
3684 	unsigned int core_idx = 0;
3685 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3686 	double snr = get_snr();
3687 
3688 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3689 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3690 				t_params[core_idx].lcore_id,
3691 				t_params[core_idx].bler * 100,
3692 				t_params[core_idx].iter_average,
3693 				t_params[core_idx].mbps,
3694 				get_vector_filename());
3695 		total_mbps += t_params[core_idx].mbps;
3696 		total_bler += t_params[core_idx].bler;
3697 		total_iter += t_params[core_idx].iter_average;
3698 	}
3699 	total_bler /= used_cores;
3700 	total_iter /= used_cores;
3701 
3702 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3703 			snr, total_bler * 100, total_iter, get_iter_max(),
3704 			total_mbps, get_vector_filename());
3705 }
3706 
3707 /*
3708  * Test function that determines BLER wireless performance
3709  */
3710 static int
3711 bler_test(struct active_device *ad,
3712 		struct test_op_params *op_params)
3713 {
3714 	int ret;
3715 	unsigned int lcore_id, used_cores = 0;
3716 	struct thread_params *t_params;
3717 	struct rte_bbdev_info info;
3718 	lcore_function_t *bler_function;
3719 	uint16_t num_lcores;
3720 	const char *op_type_str;
3721 
3722 	rte_bbdev_info_get(ad->dev_id, &info);
3723 
3724 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3725 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3726 			test_vector.op_type);
3727 
3728 	printf("+ ------------------------------------------------------- +\n");
3729 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3730 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3731 			op_params->num_to_process, op_params->num_lcores,
3732 			op_type_str,
3733 			intr_enabled ? "Interrupt mode" : "PMD mode",
3734 			(double)rte_get_tsc_hz() / 1000000000.0);
3735 
3736 	/* Set number of lcores */
3737 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3738 			? ad->nb_queues
3739 			: op_params->num_lcores;
3740 
3741 	/* Allocate memory for thread parameters structure */
3742 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3743 			RTE_CACHE_LINE_SIZE);
3744 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3745 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3746 				RTE_CACHE_LINE_SIZE));
3747 
3748 	if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3749 			!check_bit(test_vector.ldpc_dec.op_flags,
3750 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3751 			&& !check_bit(test_vector.ldpc_dec.op_flags,
3752 			RTE_BBDEV_LDPC_LLR_COMPRESSION))
3753 		bler_function = bler_pmd_lcore_ldpc_dec;
3754 	else
3755 		return TEST_SKIPPED;
3756 
3757 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3758 
3759 	/* Main core is set at first entry */
3760 	t_params[0].dev_id = ad->dev_id;
3761 	t_params[0].lcore_id = rte_lcore_id();
3762 	t_params[0].op_params = op_params;
3763 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3764 	t_params[0].iter_count = 0;
3765 
3766 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3767 		if (used_cores >= num_lcores)
3768 			break;
3769 
3770 		t_params[used_cores].dev_id = ad->dev_id;
3771 		t_params[used_cores].lcore_id = lcore_id;
3772 		t_params[used_cores].op_params = op_params;
3773 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3774 		t_params[used_cores].iter_count = 0;
3775 
3776 		rte_eal_remote_launch(bler_function,
3777 				&t_params[used_cores++], lcore_id);
3778 	}
3779 
3780 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3781 	ret = bler_function(&t_params[0]);
3782 
3783 	/* Main core is always used */
3784 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3785 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3786 
3787 	print_dec_bler(t_params, num_lcores);
3788 
3789 	/* Return if test failed */
3790 	if (ret) {
3791 		rte_free(t_params);
3792 		return ret;
3793 	}
3794 
3795 	/* Function to print something  here*/
3796 	rte_free(t_params);
3797 	return ret;
3798 }
3799 
3800 /*
3801  * Test function that determines how long an enqueue + dequeue of a burst
3802  * takes on available lcores.
3803  */
3804 static int
3805 throughput_test(struct active_device *ad,
3806 		struct test_op_params *op_params)
3807 {
3808 	int ret;
3809 	unsigned int lcore_id, used_cores = 0;
3810 	struct thread_params *t_params, *tp;
3811 	struct rte_bbdev_info info;
3812 	lcore_function_t *throughput_function;
3813 	uint16_t num_lcores;
3814 	const char *op_type_str;
3815 
3816 	rte_bbdev_info_get(ad->dev_id, &info);
3817 
3818 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3819 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3820 			test_vector.op_type);
3821 
3822 	printf("+ ------------------------------------------------------- +\n");
3823 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3824 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3825 			op_params->num_to_process, op_params->num_lcores,
3826 			op_type_str,
3827 			intr_enabled ? "Interrupt mode" : "PMD mode",
3828 			(double)rte_get_tsc_hz() / 1000000000.0);
3829 
3830 	/* Set number of lcores */
3831 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3832 			? ad->nb_queues
3833 			: op_params->num_lcores;
3834 
3835 	/* Allocate memory for thread parameters structure */
3836 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3837 			RTE_CACHE_LINE_SIZE);
3838 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3839 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3840 				RTE_CACHE_LINE_SIZE));
3841 
3842 	if (intr_enabled) {
3843 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3844 			throughput_function = throughput_intr_lcore_dec;
3845 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3846 			throughput_function = throughput_intr_lcore_ldpc_dec;
3847 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3848 			throughput_function = throughput_intr_lcore_enc;
3849 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3850 			throughput_function = throughput_intr_lcore_ldpc_enc;
3851 		else
3852 			throughput_function = throughput_intr_lcore_enc;
3853 
3854 		/* Dequeue interrupt callback registration */
3855 		ret = rte_bbdev_callback_register(ad->dev_id,
3856 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3857 				t_params);
3858 		if (ret < 0) {
3859 			rte_free(t_params);
3860 			return ret;
3861 		}
3862 	} else {
3863 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3864 			throughput_function = throughput_pmd_lcore_dec;
3865 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3866 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3867 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3868 			throughput_function = throughput_pmd_lcore_enc;
3869 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3870 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3871 		else
3872 			throughput_function = throughput_pmd_lcore_enc;
3873 	}
3874 
3875 	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
3876 
3877 	/* Main core is set at first entry */
3878 	t_params[0].dev_id = ad->dev_id;
3879 	t_params[0].lcore_id = rte_lcore_id();
3880 	t_params[0].op_params = op_params;
3881 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3882 	t_params[0].iter_count = 0;
3883 
3884 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3885 		if (used_cores >= num_lcores)
3886 			break;
3887 
3888 		t_params[used_cores].dev_id = ad->dev_id;
3889 		t_params[used_cores].lcore_id = lcore_id;
3890 		t_params[used_cores].op_params = op_params;
3891 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3892 		t_params[used_cores].iter_count = 0;
3893 
3894 		rte_eal_remote_launch(throughput_function,
3895 				&t_params[used_cores++], lcore_id);
3896 	}
3897 
3898 	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
3899 	ret = throughput_function(&t_params[0]);
3900 
3901 	/* Main core is always used */
3902 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3903 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3904 
3905 	/* Return if test failed */
3906 	if (ret) {
3907 		rte_free(t_params);
3908 		return ret;
3909 	}
3910 
3911 	/* Print throughput if interrupts are disabled and test passed */
3912 	if (!intr_enabled) {
3913 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3914 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3915 			print_dec_throughput(t_params, num_lcores);
3916 		else
3917 			print_enc_throughput(t_params, num_lcores);
3918 		rte_free(t_params);
3919 		return ret;
3920 	}
3921 
3922 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3923 	 * all pending operations. Skip waiting for queues which reported an
3924 	 * error using processing_status variable.
3925 	 * Wait for main lcore operations.
3926 	 */
3927 	tp = &t_params[0];
3928 	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3929 		op_params->num_to_process) &&
3930 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3931 		TEST_FAILED))
3932 		rte_pause();
3933 
3934 	tp->ops_per_sec /= TEST_REPETITIONS;
3935 	tp->mbps /= TEST_REPETITIONS;
3936 	ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3937 
3938 	/* Wait for worker lcores operations */
3939 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3940 		tp = &t_params[used_cores];
3941 
3942 		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
3943 			op_params->num_to_process) &&
3944 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
3945 			TEST_FAILED))
3946 			rte_pause();
3947 
3948 		tp->ops_per_sec /= TEST_REPETITIONS;
3949 		tp->mbps /= TEST_REPETITIONS;
3950 		ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
3951 	}
3952 
3953 	/* Print throughput if test passed */
3954 	if (!ret) {
3955 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3956 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3957 			print_dec_throughput(t_params, num_lcores);
3958 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3959 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3960 			print_enc_throughput(t_params, num_lcores);
3961 	}
3962 
3963 	rte_free(t_params);
3964 	return ret;
3965 }
3966 
3967 static int
3968 latency_test_dec(struct rte_mempool *mempool,
3969 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3970 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3971 		const uint16_t num_to_process, uint16_t burst_sz,
3972 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3973 {
3974 	int ret = TEST_SUCCESS;
3975 	uint16_t i, j, dequeued;
3976 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3977 	uint64_t start_time = 0, last_time = 0;
3978 
3979 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3980 		uint16_t enq = 0, deq = 0;
3981 		bool first_time = true;
3982 		last_time = 0;
3983 
3984 		if (unlikely(num_to_process - dequeued < burst_sz))
3985 			burst_sz = num_to_process - dequeued;
3986 
3987 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3988 		TEST_ASSERT_SUCCESS(ret,
3989 				"rte_bbdev_dec_op_alloc_bulk() failed");
3990 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3991 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3992 					bufs->inputs,
3993 					bufs->hard_outputs,
3994 					bufs->soft_outputs,
3995 					ref_op);
3996 
3997 		/* Set counter to validate the ordering */
3998 		for (j = 0; j < burst_sz; ++j)
3999 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4000 
4001 		start_time = rte_rdtsc_precise();
4002 
4003 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4004 				burst_sz);
4005 		TEST_ASSERT(enq == burst_sz,
4006 				"Error enqueueing burst, expected %u, got %u",
4007 				burst_sz, enq);
4008 
4009 		/* Dequeue */
4010 		do {
4011 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4012 					&ops_deq[deq], burst_sz - deq);
4013 			if (likely(first_time && (deq > 0))) {
4014 				last_time = rte_rdtsc_precise() - start_time;
4015 				first_time = false;
4016 			}
4017 		} while (unlikely(burst_sz != deq));
4018 
4019 		*max_time = RTE_MAX(*max_time, last_time);
4020 		*min_time = RTE_MIN(*min_time, last_time);
4021 		*total_time += last_time;
4022 
4023 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4024 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4025 					vector_mask);
4026 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4027 		}
4028 
4029 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4030 		dequeued += deq;
4031 	}
4032 
4033 	return i;
4034 }
4035 
4036 /* Test case for latency/validation for LDPC Decoder */
4037 static int
4038 latency_test_ldpc_dec(struct rte_mempool *mempool,
4039 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4040 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4041 		const uint16_t num_to_process, uint16_t burst_sz,
4042 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4043 		bool disable_et)
4044 {
4045 	int ret = TEST_SUCCESS;
4046 	uint16_t i, j, dequeued;
4047 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4048 	uint64_t start_time = 0, last_time = 0;
4049 	bool extDdr = ldpc_cap_flags &
4050 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4051 
4052 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4053 		uint16_t enq = 0, deq = 0;
4054 		bool first_time = true;
4055 		last_time = 0;
4056 
4057 		if (unlikely(num_to_process - dequeued < burst_sz))
4058 			burst_sz = num_to_process - dequeued;
4059 
4060 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4061 		TEST_ASSERT_SUCCESS(ret,
4062 				"rte_bbdev_dec_op_alloc_bulk() failed");
4063 
4064 		/* For latency tests we need to disable early termination */
4065 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4066 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4067 			ref_op->ldpc_dec.op_flags -=
4068 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4069 		ref_op->ldpc_dec.iter_max = get_iter_max();
4070 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4071 
4072 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4073 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4074 					bufs->inputs,
4075 					bufs->hard_outputs,
4076 					bufs->soft_outputs,
4077 					bufs->harq_inputs,
4078 					bufs->harq_outputs,
4079 					ref_op);
4080 
4081 		if (extDdr)
4082 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4083 					burst_sz, true);
4084 
4085 		/* Set counter to validate the ordering */
4086 		for (j = 0; j < burst_sz; ++j)
4087 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4088 
4089 		start_time = rte_rdtsc_precise();
4090 
4091 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4092 				&ops_enq[enq], burst_sz);
4093 		TEST_ASSERT(enq == burst_sz,
4094 				"Error enqueueing burst, expected %u, got %u",
4095 				burst_sz, enq);
4096 
4097 		/* Dequeue */
4098 		do {
4099 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4100 					&ops_deq[deq], burst_sz - deq);
4101 			if (likely(first_time && (deq > 0))) {
4102 				last_time = rte_rdtsc_precise() - start_time;
4103 				first_time = false;
4104 			}
4105 		} while (unlikely(burst_sz != deq));
4106 
4107 		*max_time = RTE_MAX(*max_time, last_time);
4108 		*min_time = RTE_MIN(*min_time, last_time);
4109 		*total_time += last_time;
4110 
4111 		if (extDdr)
4112 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4113 
4114 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4115 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4116 					vector_mask);
4117 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4118 		}
4119 
4120 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4121 		dequeued += deq;
4122 	}
4123 	return i;
4124 }
4125 
4126 static int
4127 latency_test_enc(struct rte_mempool *mempool,
4128 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4129 		uint16_t dev_id, uint16_t queue_id,
4130 		const uint16_t num_to_process, uint16_t burst_sz,
4131 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4132 {
4133 	int ret = TEST_SUCCESS;
4134 	uint16_t i, j, dequeued;
4135 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4136 	uint64_t start_time = 0, last_time = 0;
4137 
4138 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4139 		uint16_t enq = 0, deq = 0;
4140 		bool first_time = true;
4141 		last_time = 0;
4142 
4143 		if (unlikely(num_to_process - dequeued < burst_sz))
4144 			burst_sz = num_to_process - dequeued;
4145 
4146 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4147 		TEST_ASSERT_SUCCESS(ret,
4148 				"rte_bbdev_enc_op_alloc_bulk() failed");
4149 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4150 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4151 					bufs->inputs,
4152 					bufs->hard_outputs,
4153 					ref_op);
4154 
4155 		/* Set counter to validate the ordering */
4156 		for (j = 0; j < burst_sz; ++j)
4157 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4158 
4159 		start_time = rte_rdtsc_precise();
4160 
4161 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4162 				burst_sz);
4163 		TEST_ASSERT(enq == burst_sz,
4164 				"Error enqueueing burst, expected %u, got %u",
4165 				burst_sz, enq);
4166 
4167 		/* Dequeue */
4168 		do {
4169 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4170 					&ops_deq[deq], burst_sz - deq);
4171 			if (likely(first_time && (deq > 0))) {
4172 				last_time += rte_rdtsc_precise() - start_time;
4173 				first_time = false;
4174 			}
4175 		} while (unlikely(burst_sz != deq));
4176 
4177 		*max_time = RTE_MAX(*max_time, last_time);
4178 		*min_time = RTE_MIN(*min_time, last_time);
4179 		*total_time += last_time;
4180 
4181 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4182 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4183 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4184 		}
4185 
4186 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4187 		dequeued += deq;
4188 	}
4189 
4190 	return i;
4191 }
4192 
4193 static int
4194 latency_test_ldpc_enc(struct rte_mempool *mempool,
4195 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4196 		uint16_t dev_id, uint16_t queue_id,
4197 		const uint16_t num_to_process, uint16_t burst_sz,
4198 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4199 {
4200 	int ret = TEST_SUCCESS;
4201 	uint16_t i, j, dequeued;
4202 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4203 	uint64_t start_time = 0, last_time = 0;
4204 
4205 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4206 		uint16_t enq = 0, deq = 0;
4207 		bool first_time = true;
4208 		last_time = 0;
4209 
4210 		if (unlikely(num_to_process - dequeued < burst_sz))
4211 			burst_sz = num_to_process - dequeued;
4212 
4213 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4214 		TEST_ASSERT_SUCCESS(ret,
4215 				"rte_bbdev_enc_op_alloc_bulk() failed");
4216 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4217 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4218 					bufs->inputs,
4219 					bufs->hard_outputs,
4220 					ref_op);
4221 
4222 		/* Set counter to validate the ordering */
4223 		for (j = 0; j < burst_sz; ++j)
4224 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4225 
4226 		start_time = rte_rdtsc_precise();
4227 
4228 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4229 				&ops_enq[enq], burst_sz);
4230 		TEST_ASSERT(enq == burst_sz,
4231 				"Error enqueueing burst, expected %u, got %u",
4232 				burst_sz, enq);
4233 
4234 		/* Dequeue */
4235 		do {
4236 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4237 					&ops_deq[deq], burst_sz - deq);
4238 			if (likely(first_time && (deq > 0))) {
4239 				last_time += rte_rdtsc_precise() - start_time;
4240 				first_time = false;
4241 			}
4242 		} while (unlikely(burst_sz != deq));
4243 
4244 		*max_time = RTE_MAX(*max_time, last_time);
4245 		*min_time = RTE_MIN(*min_time, last_time);
4246 		*total_time += last_time;
4247 
4248 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4249 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4250 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4251 		}
4252 
4253 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4254 		dequeued += deq;
4255 	}
4256 
4257 	return i;
4258 }
4259 
4260 /* Common function for running validation and latency test cases */
4261 static int
4262 validation_latency_test(struct active_device *ad,
4263 		struct test_op_params *op_params, bool latency_flag)
4264 {
4265 	int iter;
4266 	uint16_t burst_sz = op_params->burst_sz;
4267 	const uint16_t num_to_process = op_params->num_to_process;
4268 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4269 	const uint16_t queue_id = ad->queue_ids[0];
4270 	struct test_buffers *bufs = NULL;
4271 	struct rte_bbdev_info info;
4272 	uint64_t total_time, min_time, max_time;
4273 	const char *op_type_str;
4274 
4275 	total_time = max_time = 0;
4276 	min_time = UINT64_MAX;
4277 
4278 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4279 			"BURST_SIZE should be <= %u", MAX_BURST);
4280 
4281 	rte_bbdev_info_get(ad->dev_id, &info);
4282 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4283 
4284 	op_type_str = rte_bbdev_op_type_str(op_type);
4285 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4286 
4287 	printf("+ ------------------------------------------------------- +\n");
4288 	if (latency_flag)
4289 		printf("== test: latency\ndev:");
4290 	else
4291 		printf("== test: validation\ndev:");
4292 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4293 			info.dev_name, burst_sz, num_to_process, op_type_str);
4294 
4295 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4296 		iter = latency_test_dec(op_params->mp, bufs,
4297 				op_params->ref_dec_op, op_params->vector_mask,
4298 				ad->dev_id, queue_id, num_to_process,
4299 				burst_sz, &total_time, &min_time, &max_time);
4300 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4301 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4302 				op_params->ref_enc_op, ad->dev_id, queue_id,
4303 				num_to_process, burst_sz, &total_time,
4304 				&min_time, &max_time);
4305 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4306 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4307 				op_params->ref_dec_op, op_params->vector_mask,
4308 				ad->dev_id, queue_id, num_to_process,
4309 				burst_sz, &total_time, &min_time, &max_time,
4310 				latency_flag);
4311 	else /* RTE_BBDEV_OP_TURBO_ENC */
4312 		iter = latency_test_enc(op_params->mp, bufs,
4313 				op_params->ref_enc_op,
4314 				ad->dev_id, queue_id,
4315 				num_to_process, burst_sz, &total_time,
4316 				&min_time, &max_time);
4317 
4318 	if (iter <= 0)
4319 		return TEST_FAILED;
4320 
4321 	printf("Operation latency:\n"
4322 			"\tavg: %lg cycles, %lg us\n"
4323 			"\tmin: %lg cycles, %lg us\n"
4324 			"\tmax: %lg cycles, %lg us\n",
4325 			(double)total_time / (double)iter,
4326 			(double)(total_time * 1000000) / (double)iter /
4327 			(double)rte_get_tsc_hz(), (double)min_time,
4328 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4329 			(double)max_time, (double)(max_time * 1000000) /
4330 			(double)rte_get_tsc_hz());
4331 
4332 	return TEST_SUCCESS;
4333 }
4334 
4335 static int
4336 latency_test(struct active_device *ad, struct test_op_params *op_params)
4337 {
4338 	return validation_latency_test(ad, op_params, true);
4339 }
4340 
4341 static int
4342 validation_test(struct active_device *ad, struct test_op_params *op_params)
4343 {
4344 	return validation_latency_test(ad, op_params, false);
4345 }
4346 
4347 #ifdef RTE_BBDEV_OFFLOAD_COST
4348 static int
4349 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4350 		struct rte_bbdev_stats *stats)
4351 {
4352 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4353 	struct rte_bbdev_stats *q_stats;
4354 
4355 	if (queue_id >= dev->data->num_queues)
4356 		return -1;
4357 
4358 	q_stats = &dev->data->queues[queue_id].queue_stats;
4359 
4360 	stats->enqueued_count = q_stats->enqueued_count;
4361 	stats->dequeued_count = q_stats->dequeued_count;
4362 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4363 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4364 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4365 
4366 	return 0;
4367 }
4368 
4369 static int
4370 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4371 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4372 		uint16_t queue_id, const uint16_t num_to_process,
4373 		uint16_t burst_sz, struct test_time_stats *time_st)
4374 {
4375 	int i, dequeued, ret;
4376 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4377 	uint64_t enq_start_time, deq_start_time;
4378 	uint64_t enq_sw_last_time, deq_last_time;
4379 	struct rte_bbdev_stats stats;
4380 
4381 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4382 		uint16_t enq = 0, deq = 0;
4383 
4384 		if (unlikely(num_to_process - dequeued < burst_sz))
4385 			burst_sz = num_to_process - dequeued;
4386 
4387 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4388 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4389 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4390 					bufs->inputs,
4391 					bufs->hard_outputs,
4392 					bufs->soft_outputs,
4393 					ref_op);
4394 
4395 		/* Start time meas for enqueue function offload latency */
4396 		enq_start_time = rte_rdtsc_precise();
4397 		do {
4398 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4399 					&ops_enq[enq], burst_sz - enq);
4400 		} while (unlikely(burst_sz != enq));
4401 
4402 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4403 		TEST_ASSERT_SUCCESS(ret,
4404 				"Failed to get stats for queue (%u) of device (%u)",
4405 				queue_id, dev_id);
4406 
4407 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4408 				stats.acc_offload_cycles;
4409 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4410 				enq_sw_last_time);
4411 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4412 				enq_sw_last_time);
4413 		time_st->enq_sw_total_time += enq_sw_last_time;
4414 
4415 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4416 				stats.acc_offload_cycles);
4417 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4418 				stats.acc_offload_cycles);
4419 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4420 
4421 		/* give time for device to process ops */
4422 		rte_delay_us(WAIT_OFFLOAD_US);
4423 
4424 		/* Start time meas for dequeue function offload latency */
4425 		deq_start_time = rte_rdtsc_precise();
4426 		/* Dequeue one operation */
4427 		do {
4428 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4429 					&ops_deq[deq], enq);
4430 		} while (unlikely(deq == 0));
4431 
4432 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4433 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4434 				deq_last_time);
4435 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4436 				deq_last_time);
4437 		time_st->deq_total_time += deq_last_time;
4438 
4439 		/* Dequeue remaining operations if needed*/
4440 		while (burst_sz != deq)
4441 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4442 					&ops_deq[deq], burst_sz - deq);
4443 
4444 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4445 		dequeued += deq;
4446 	}
4447 
4448 	return i;
4449 }
4450 
4451 static int
4452 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4453 		struct test_buffers *bufs,
4454 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4455 		uint16_t queue_id, const uint16_t num_to_process,
4456 		uint16_t burst_sz, struct test_time_stats *time_st)
4457 {
4458 	int i, dequeued, ret;
4459 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4460 	uint64_t enq_start_time, deq_start_time;
4461 	uint64_t enq_sw_last_time, deq_last_time;
4462 	struct rte_bbdev_stats stats;
4463 	bool extDdr = ldpc_cap_flags &
4464 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4465 
4466 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4467 		uint16_t enq = 0, deq = 0;
4468 
4469 		if (unlikely(num_to_process - dequeued < burst_sz))
4470 			burst_sz = num_to_process - dequeued;
4471 
4472 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4473 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4474 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4475 					bufs->inputs,
4476 					bufs->hard_outputs,
4477 					bufs->soft_outputs,
4478 					bufs->harq_inputs,
4479 					bufs->harq_outputs,
4480 					ref_op);
4481 
4482 		if (extDdr)
4483 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4484 					burst_sz, true);
4485 
4486 		/* Start time meas for enqueue function offload latency */
4487 		enq_start_time = rte_rdtsc_precise();
4488 		do {
4489 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4490 					&ops_enq[enq], burst_sz - enq);
4491 		} while (unlikely(burst_sz != enq));
4492 
4493 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4494 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4495 		TEST_ASSERT_SUCCESS(ret,
4496 				"Failed to get stats for queue (%u) of device (%u)",
4497 				queue_id, dev_id);
4498 
4499 		enq_sw_last_time -= stats.acc_offload_cycles;
4500 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4501 				enq_sw_last_time);
4502 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4503 				enq_sw_last_time);
4504 		time_st->enq_sw_total_time += enq_sw_last_time;
4505 
4506 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4507 				stats.acc_offload_cycles);
4508 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4509 				stats.acc_offload_cycles);
4510 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4511 
4512 		/* give time for device to process ops */
4513 		rte_delay_us(WAIT_OFFLOAD_US);
4514 
4515 		/* Start time meas for dequeue function offload latency */
4516 		deq_start_time = rte_rdtsc_precise();
4517 		/* Dequeue one operation */
4518 		do {
4519 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4520 					&ops_deq[deq], enq);
4521 		} while (unlikely(deq == 0));
4522 
4523 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4524 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4525 				deq_last_time);
4526 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4527 				deq_last_time);
4528 		time_st->deq_total_time += deq_last_time;
4529 
4530 		/* Dequeue remaining operations if needed*/
4531 		while (burst_sz != deq)
4532 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4533 					&ops_deq[deq], burst_sz - deq);
4534 
4535 		if (extDdr) {
4536 			/* Read loopback is not thread safe */
4537 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4538 		}
4539 
4540 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4541 		dequeued += deq;
4542 	}
4543 
4544 	return i;
4545 }
4546 
4547 static int
4548 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4549 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4550 		uint16_t queue_id, const uint16_t num_to_process,
4551 		uint16_t burst_sz, struct test_time_stats *time_st)
4552 {
4553 	int i, dequeued, ret;
4554 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4555 	uint64_t enq_start_time, deq_start_time;
4556 	uint64_t enq_sw_last_time, deq_last_time;
4557 	struct rte_bbdev_stats stats;
4558 
4559 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4560 		uint16_t enq = 0, deq = 0;
4561 
4562 		if (unlikely(num_to_process - dequeued < burst_sz))
4563 			burst_sz = num_to_process - dequeued;
4564 
4565 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4566 		TEST_ASSERT_SUCCESS(ret,
4567 				"rte_bbdev_enc_op_alloc_bulk() failed");
4568 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4569 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4570 					bufs->inputs,
4571 					bufs->hard_outputs,
4572 					ref_op);
4573 
4574 		/* Start time meas for enqueue function offload latency */
4575 		enq_start_time = rte_rdtsc_precise();
4576 		do {
4577 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4578 					&ops_enq[enq], burst_sz - enq);
4579 		} while (unlikely(burst_sz != enq));
4580 
4581 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4582 
4583 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4584 		TEST_ASSERT_SUCCESS(ret,
4585 				"Failed to get stats for queue (%u) of device (%u)",
4586 				queue_id, dev_id);
4587 		enq_sw_last_time -= stats.acc_offload_cycles;
4588 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4589 				enq_sw_last_time);
4590 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4591 				enq_sw_last_time);
4592 		time_st->enq_sw_total_time += enq_sw_last_time;
4593 
4594 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4595 				stats.acc_offload_cycles);
4596 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4597 				stats.acc_offload_cycles);
4598 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4599 
4600 		/* give time for device to process ops */
4601 		rte_delay_us(WAIT_OFFLOAD_US);
4602 
4603 		/* Start time meas for dequeue function offload latency */
4604 		deq_start_time = rte_rdtsc_precise();
4605 		/* Dequeue one operation */
4606 		do {
4607 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4608 					&ops_deq[deq], enq);
4609 		} while (unlikely(deq == 0));
4610 
4611 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4612 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4613 				deq_last_time);
4614 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4615 				deq_last_time);
4616 		time_st->deq_total_time += deq_last_time;
4617 
4618 		while (burst_sz != deq)
4619 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4620 					&ops_deq[deq], burst_sz - deq);
4621 
4622 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4623 		dequeued += deq;
4624 	}
4625 
4626 	return i;
4627 }
4628 
4629 static int
4630 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4631 		struct test_buffers *bufs,
4632 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4633 		uint16_t queue_id, const uint16_t num_to_process,
4634 		uint16_t burst_sz, struct test_time_stats *time_st)
4635 {
4636 	int i, dequeued, ret;
4637 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4638 	uint64_t enq_start_time, deq_start_time;
4639 	uint64_t enq_sw_last_time, deq_last_time;
4640 	struct rte_bbdev_stats stats;
4641 
4642 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4643 		uint16_t enq = 0, deq = 0;
4644 
4645 		if (unlikely(num_to_process - dequeued < burst_sz))
4646 			burst_sz = num_to_process - dequeued;
4647 
4648 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4649 		TEST_ASSERT_SUCCESS(ret,
4650 				"rte_bbdev_enc_op_alloc_bulk() failed");
4651 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4652 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4653 					bufs->inputs,
4654 					bufs->hard_outputs,
4655 					ref_op);
4656 
4657 		/* Start time meas for enqueue function offload latency */
4658 		enq_start_time = rte_rdtsc_precise();
4659 		do {
4660 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4661 					&ops_enq[enq], burst_sz - enq);
4662 		} while (unlikely(burst_sz != enq));
4663 
4664 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4665 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4666 		TEST_ASSERT_SUCCESS(ret,
4667 				"Failed to get stats for queue (%u) of device (%u)",
4668 				queue_id, dev_id);
4669 
4670 		enq_sw_last_time -= stats.acc_offload_cycles;
4671 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4672 				enq_sw_last_time);
4673 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4674 				enq_sw_last_time);
4675 		time_st->enq_sw_total_time += enq_sw_last_time;
4676 
4677 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4678 				stats.acc_offload_cycles);
4679 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4680 				stats.acc_offload_cycles);
4681 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4682 
4683 		/* give time for device to process ops */
4684 		rte_delay_us(WAIT_OFFLOAD_US);
4685 
4686 		/* Start time meas for dequeue function offload latency */
4687 		deq_start_time = rte_rdtsc_precise();
4688 		/* Dequeue one operation */
4689 		do {
4690 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4691 					&ops_deq[deq], enq);
4692 		} while (unlikely(deq == 0));
4693 
4694 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4695 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4696 				deq_last_time);
4697 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4698 				deq_last_time);
4699 		time_st->deq_total_time += deq_last_time;
4700 
4701 		while (burst_sz != deq)
4702 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4703 					&ops_deq[deq], burst_sz - deq);
4704 
4705 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4706 		dequeued += deq;
4707 	}
4708 
4709 	return i;
4710 }
4711 #endif
4712 
4713 static int
4714 offload_cost_test(struct active_device *ad,
4715 		struct test_op_params *op_params)
4716 {
4717 #ifndef RTE_BBDEV_OFFLOAD_COST
4718 	RTE_SET_USED(ad);
4719 	RTE_SET_USED(op_params);
4720 	printf("Offload latency test is disabled.\n");
4721 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4722 	return TEST_SKIPPED;
4723 #else
4724 	int iter;
4725 	uint16_t burst_sz = op_params->burst_sz;
4726 	const uint16_t num_to_process = op_params->num_to_process;
4727 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4728 	const uint16_t queue_id = ad->queue_ids[0];
4729 	struct test_buffers *bufs = NULL;
4730 	struct rte_bbdev_info info;
4731 	const char *op_type_str;
4732 	struct test_time_stats time_st;
4733 
4734 	memset(&time_st, 0, sizeof(struct test_time_stats));
4735 	time_st.enq_sw_min_time = UINT64_MAX;
4736 	time_st.enq_acc_min_time = UINT64_MAX;
4737 	time_st.deq_min_time = UINT64_MAX;
4738 
4739 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4740 			"BURST_SIZE should be <= %u", MAX_BURST);
4741 
4742 	rte_bbdev_info_get(ad->dev_id, &info);
4743 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4744 
4745 	op_type_str = rte_bbdev_op_type_str(op_type);
4746 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4747 
4748 	printf("+ ------------------------------------------------------- +\n");
4749 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4750 			info.dev_name, burst_sz, num_to_process, op_type_str);
4751 
4752 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4753 		iter = offload_latency_test_dec(op_params->mp, bufs,
4754 				op_params->ref_dec_op, ad->dev_id, queue_id,
4755 				num_to_process, burst_sz, &time_st);
4756 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4757 		iter = offload_latency_test_enc(op_params->mp, bufs,
4758 				op_params->ref_enc_op, ad->dev_id, queue_id,
4759 				num_to_process, burst_sz, &time_st);
4760 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4761 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4762 				op_params->ref_enc_op, ad->dev_id, queue_id,
4763 				num_to_process, burst_sz, &time_st);
4764 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4765 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4766 			op_params->ref_dec_op, ad->dev_id, queue_id,
4767 			num_to_process, burst_sz, &time_st);
4768 	else
4769 		iter = offload_latency_test_enc(op_params->mp, bufs,
4770 				op_params->ref_enc_op, ad->dev_id, queue_id,
4771 				num_to_process, burst_sz, &time_st);
4772 
4773 	if (iter <= 0)
4774 		return TEST_FAILED;
4775 
4776 	printf("Enqueue driver offload cost latency:\n"
4777 			"\tavg: %lg cycles, %lg us\n"
4778 			"\tmin: %lg cycles, %lg us\n"
4779 			"\tmax: %lg cycles, %lg us\n"
4780 			"Enqueue accelerator offload cost latency:\n"
4781 			"\tavg: %lg cycles, %lg us\n"
4782 			"\tmin: %lg cycles, %lg us\n"
4783 			"\tmax: %lg cycles, %lg us\n",
4784 			(double)time_st.enq_sw_total_time / (double)iter,
4785 			(double)(time_st.enq_sw_total_time * 1000000) /
4786 			(double)iter / (double)rte_get_tsc_hz(),
4787 			(double)time_st.enq_sw_min_time,
4788 			(double)(time_st.enq_sw_min_time * 1000000) /
4789 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4790 			(double)(time_st.enq_sw_max_time * 1000000) /
4791 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4792 			(double)iter,
4793 			(double)(time_st.enq_acc_total_time * 1000000) /
4794 			(double)iter / (double)rte_get_tsc_hz(),
4795 			(double)time_st.enq_acc_min_time,
4796 			(double)(time_st.enq_acc_min_time * 1000000) /
4797 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4798 			(double)(time_st.enq_acc_max_time * 1000000) /
4799 			rte_get_tsc_hz());
4800 
4801 	printf("Dequeue offload cost latency - one op:\n"
4802 			"\tavg: %lg cycles, %lg us\n"
4803 			"\tmin: %lg cycles, %lg us\n"
4804 			"\tmax: %lg cycles, %lg us\n",
4805 			(double)time_st.deq_total_time / (double)iter,
4806 			(double)(time_st.deq_total_time * 1000000) /
4807 			(double)iter / (double)rte_get_tsc_hz(),
4808 			(double)time_st.deq_min_time,
4809 			(double)(time_st.deq_min_time * 1000000) /
4810 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4811 			(double)(time_st.deq_max_time * 1000000) /
4812 			rte_get_tsc_hz());
4813 
4814 	struct rte_bbdev_stats stats = {0};
4815 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4816 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4817 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4818 				"Mismatch in enqueue count %10"PRIu64" %d",
4819 				stats.enqueued_count, num_to_process);
4820 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4821 				"Mismatch in dequeue count %10"PRIu64" %d",
4822 				stats.dequeued_count, num_to_process);
4823 	}
4824 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4825 			"Enqueue count Error %10"PRIu64"",
4826 			stats.enqueue_err_count);
4827 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4828 			"Dequeue count Error (%10"PRIu64"",
4829 			stats.dequeue_err_count);
4830 
4831 	return TEST_SUCCESS;
4832 #endif
4833 }
4834 
4835 #ifdef RTE_BBDEV_OFFLOAD_COST
4836 static int
4837 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4838 		const uint16_t num_to_process, uint16_t burst_sz,
4839 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4840 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4841 {
4842 	int i, deq_total;
4843 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4844 	uint64_t deq_start_time, deq_last_time;
4845 
4846 	/* Test deq offload latency from an empty queue */
4847 
4848 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4849 			++i, deq_total += burst_sz) {
4850 		deq_start_time = rte_rdtsc_precise();
4851 
4852 		if (unlikely(num_to_process - deq_total < burst_sz))
4853 			burst_sz = num_to_process - deq_total;
4854 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4855 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4856 					burst_sz);
4857 		else
4858 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4859 					burst_sz);
4860 
4861 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4862 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4863 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4864 		*deq_total_time += deq_last_time;
4865 	}
4866 
4867 	return i;
4868 }
4869 
4870 static int
4871 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4872 		const uint16_t num_to_process, uint16_t burst_sz,
4873 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4874 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4875 {
4876 	int i, deq_total;
4877 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4878 	uint64_t deq_start_time, deq_last_time;
4879 
4880 	/* Test deq offload latency from an empty queue */
4881 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4882 			++i, deq_total += burst_sz) {
4883 		deq_start_time = rte_rdtsc_precise();
4884 
4885 		if (unlikely(num_to_process - deq_total < burst_sz))
4886 			burst_sz = num_to_process - deq_total;
4887 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4888 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4889 					burst_sz);
4890 		else
4891 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4892 					burst_sz);
4893 
4894 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4895 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4896 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4897 		*deq_total_time += deq_last_time;
4898 	}
4899 
4900 	return i;
4901 }
4902 
4903 #endif
4904 
4905 static int
4906 offload_latency_empty_q_test(struct active_device *ad,
4907 		struct test_op_params *op_params)
4908 {
4909 #ifndef RTE_BBDEV_OFFLOAD_COST
4910 	RTE_SET_USED(ad);
4911 	RTE_SET_USED(op_params);
4912 	printf("Offload latency empty dequeue test is disabled.\n");
4913 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4914 	return TEST_SKIPPED;
4915 #else
4916 	int iter;
4917 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4918 	uint16_t burst_sz = op_params->burst_sz;
4919 	const uint16_t num_to_process = op_params->num_to_process;
4920 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4921 	const uint16_t queue_id = ad->queue_ids[0];
4922 	struct rte_bbdev_info info;
4923 	const char *op_type_str;
4924 
4925 	deq_total_time = deq_max_time = 0;
4926 	deq_min_time = UINT64_MAX;
4927 
4928 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4929 			"BURST_SIZE should be <= %u", MAX_BURST);
4930 
4931 	rte_bbdev_info_get(ad->dev_id, &info);
4932 
4933 	op_type_str = rte_bbdev_op_type_str(op_type);
4934 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4935 
4936 	printf("+ ------------------------------------------------------- +\n");
4937 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4938 			info.dev_name, burst_sz, num_to_process, op_type_str);
4939 
4940 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4941 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4942 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4943 				num_to_process, burst_sz, &deq_total_time,
4944 				&deq_min_time, &deq_max_time, op_type);
4945 	else
4946 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4947 				num_to_process, burst_sz, &deq_total_time,
4948 				&deq_min_time, &deq_max_time, op_type);
4949 
4950 	if (iter <= 0)
4951 		return TEST_FAILED;
4952 
4953 	printf("Empty dequeue offload:\n"
4954 			"\tavg: %lg cycles, %lg us\n"
4955 			"\tmin: %lg cycles, %lg us\n"
4956 			"\tmax: %lg cycles, %lg us\n",
4957 			(double)deq_total_time / (double)iter,
4958 			(double)(deq_total_time * 1000000) / (double)iter /
4959 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4960 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4961 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4962 			rte_get_tsc_hz());
4963 
4964 	return TEST_SUCCESS;
4965 #endif
4966 }
4967 
4968 static int
4969 bler_tc(void)
4970 {
4971 	return run_test_case(bler_test);
4972 }
4973 
4974 static int
4975 throughput_tc(void)
4976 {
4977 	return run_test_case(throughput_test);
4978 }
4979 
4980 static int
4981 offload_cost_tc(void)
4982 {
4983 	return run_test_case(offload_cost_test);
4984 }
4985 
4986 static int
4987 offload_latency_empty_q_tc(void)
4988 {
4989 	return run_test_case(offload_latency_empty_q_test);
4990 }
4991 
4992 static int
4993 latency_tc(void)
4994 {
4995 	return run_test_case(latency_test);
4996 }
4997 
4998 static int
4999 validation_tc(void)
5000 {
5001 	return run_test_case(validation_test);
5002 }
5003 
5004 static int
5005 interrupt_tc(void)
5006 {
5007 	return run_test_case(throughput_test);
5008 }
5009 
5010 static struct unit_test_suite bbdev_bler_testsuite = {
5011 	.suite_name = "BBdev BLER Tests",
5012 	.setup = testsuite_setup,
5013 	.teardown = testsuite_teardown,
5014 	.unit_test_cases = {
5015 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5016 		TEST_CASES_END() /**< NULL terminate unit test array */
5017 	}
5018 };
5019 
5020 static struct unit_test_suite bbdev_throughput_testsuite = {
5021 	.suite_name = "BBdev Throughput Tests",
5022 	.setup = testsuite_setup,
5023 	.teardown = testsuite_teardown,
5024 	.unit_test_cases = {
5025 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5026 		TEST_CASES_END() /**< NULL terminate unit test array */
5027 	}
5028 };
5029 
5030 static struct unit_test_suite bbdev_validation_testsuite = {
5031 	.suite_name = "BBdev Validation Tests",
5032 	.setup = testsuite_setup,
5033 	.teardown = testsuite_teardown,
5034 	.unit_test_cases = {
5035 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5036 		TEST_CASES_END() /**< NULL terminate unit test array */
5037 	}
5038 };
5039 
5040 static struct unit_test_suite bbdev_latency_testsuite = {
5041 	.suite_name = "BBdev Latency Tests",
5042 	.setup = testsuite_setup,
5043 	.teardown = testsuite_teardown,
5044 	.unit_test_cases = {
5045 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5046 		TEST_CASES_END() /**< NULL terminate unit test array */
5047 	}
5048 };
5049 
5050 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5051 	.suite_name = "BBdev Offload Cost Tests",
5052 	.setup = testsuite_setup,
5053 	.teardown = testsuite_teardown,
5054 	.unit_test_cases = {
5055 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5056 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5057 		TEST_CASES_END() /**< NULL terminate unit test array */
5058 	}
5059 };
5060 
5061 static struct unit_test_suite bbdev_interrupt_testsuite = {
5062 	.suite_name = "BBdev Interrupt Tests",
5063 	.setup = interrupt_testsuite_setup,
5064 	.teardown = testsuite_teardown,
5065 	.unit_test_cases = {
5066 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5067 		TEST_CASES_END() /**< NULL terminate unit test array */
5068 	}
5069 };
5070 
5071 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5072 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5073 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5074 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5075 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5076 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5077