xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 10b71caecbe1cddcbb65c050ca775fba575e88db)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28 
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41 
42 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC
43 #include <rte_pmd_fpga_5gnr_fec.h>
44 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
45 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
46 #define VF_UL_5G_QUEUE_VALUE 4
47 #define VF_DL_5G_QUEUE_VALUE 4
48 #define UL_5G_BANDWIDTH 3
49 #define DL_5G_BANDWIDTH 3
50 #define UL_5G_LOAD_BALANCE 128
51 #define DL_5G_LOAD_BALANCE 128
52 #define FLR_5G_TIMEOUT 610
53 #endif
54 
55 #define OPS_CACHE_SIZE 256U
56 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
57 
58 #define SYNC_WAIT 0
59 #define SYNC_START 1
60 #define INVALID_OPAQUE -1
61 
62 #define INVALID_QUEUE_ID -1
63 /* Increment for next code block in external HARQ memory */
64 #define HARQ_INCR 32768
65 /* Headroom for filler LLRs insertion in HARQ buffer */
66 #define FILLER_HEADROOM 1024
67 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
68 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
69 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
70 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
71 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
72 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
73 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
74 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
75 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
76 
77 static struct test_bbdev_vector test_vector;
78 
79 /* Switch between PMD and Interrupt for throughput TC */
80 static bool intr_enabled;
81 
82 /* LLR arithmetic representation for numerical conversion */
83 static int ldpc_llr_decimals;
84 static int ldpc_llr_size;
85 /* Keep track of the LDPC decoder device capability flag */
86 static uint32_t ldpc_cap_flags;
87 
88 /* Represents tested active devices */
89 static struct active_device {
90 	const char *driver_name;
91 	uint8_t dev_id;
92 	uint16_t supported_ops;
93 	uint16_t queue_ids[MAX_QUEUES];
94 	uint16_t nb_queues;
95 	struct rte_mempool *ops_mempool;
96 	struct rte_mempool *in_mbuf_pool;
97 	struct rte_mempool *hard_out_mbuf_pool;
98 	struct rte_mempool *soft_out_mbuf_pool;
99 	struct rte_mempool *harq_in_mbuf_pool;
100 	struct rte_mempool *harq_out_mbuf_pool;
101 } active_devs[RTE_BBDEV_MAX_DEVS];
102 
103 static uint8_t nb_active_devs;
104 
105 /* Data buffers used by BBDEV ops */
106 struct test_buffers {
107 	struct rte_bbdev_op_data *inputs;
108 	struct rte_bbdev_op_data *hard_outputs;
109 	struct rte_bbdev_op_data *soft_outputs;
110 	struct rte_bbdev_op_data *harq_inputs;
111 	struct rte_bbdev_op_data *harq_outputs;
112 };
113 
114 /* Operation parameters specific for given test case */
115 struct test_op_params {
116 	struct rte_mempool *mp;
117 	struct rte_bbdev_dec_op *ref_dec_op;
118 	struct rte_bbdev_enc_op *ref_enc_op;
119 	uint16_t burst_sz;
120 	uint16_t num_to_process;
121 	uint16_t num_lcores;
122 	int vector_mask;
123 	rte_atomic16_t sync;
124 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
125 };
126 
127 /* Contains per lcore params */
128 struct thread_params {
129 	uint8_t dev_id;
130 	uint16_t queue_id;
131 	uint32_t lcore_id;
132 	uint64_t start_time;
133 	double ops_per_sec;
134 	double mbps;
135 	uint8_t iter_count;
136 	double iter_average;
137 	double bler;
138 	rte_atomic16_t nb_dequeued;
139 	rte_atomic16_t processing_status;
140 	rte_atomic16_t burst_sz;
141 	struct test_op_params *op_params;
142 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
143 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
144 };
145 
146 #ifdef RTE_BBDEV_OFFLOAD_COST
147 /* Stores time statistics */
148 struct test_time_stats {
149 	/* Stores software enqueue total working time */
150 	uint64_t enq_sw_total_time;
151 	/* Stores minimum value of software enqueue working time */
152 	uint64_t enq_sw_min_time;
153 	/* Stores maximum value of software enqueue working time */
154 	uint64_t enq_sw_max_time;
155 	/* Stores turbo enqueue total working time */
156 	uint64_t enq_acc_total_time;
157 	/* Stores minimum value of accelerator enqueue working time */
158 	uint64_t enq_acc_min_time;
159 	/* Stores maximum value of accelerator enqueue working time */
160 	uint64_t enq_acc_max_time;
161 	/* Stores dequeue total working time */
162 	uint64_t deq_total_time;
163 	/* Stores minimum value of dequeue working time */
164 	uint64_t deq_min_time;
165 	/* Stores maximum value of dequeue working time */
166 	uint64_t deq_max_time;
167 };
168 #endif
169 
170 typedef int (test_case_function)(struct active_device *ad,
171 		struct test_op_params *op_params);
172 
173 static inline void
174 mbuf_reset(struct rte_mbuf *m)
175 {
176 	m->pkt_len = 0;
177 
178 	do {
179 		m->data_len = 0;
180 		m = m->next;
181 	} while (m != NULL);
182 }
183 
184 /* Read flag value 0/1 from bitmap */
185 static inline bool
186 check_bit(uint32_t bitmap, uint32_t bitmask)
187 {
188 	return bitmap & bitmask;
189 }
190 
191 static inline void
192 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
193 {
194 	ad->supported_ops |= (1 << op_type);
195 }
196 
197 static inline bool
198 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
199 {
200 	return ad->supported_ops & (1 << op_type);
201 }
202 
203 static inline bool
204 flags_match(uint32_t flags_req, uint32_t flags_present)
205 {
206 	return (flags_req & flags_present) == flags_req;
207 }
208 
209 static void
210 clear_soft_out_cap(uint32_t *op_flags)
211 {
212 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
213 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
214 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
215 }
216 
217 static int
218 check_dev_cap(const struct rte_bbdev_info *dev_info)
219 {
220 	unsigned int i;
221 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
222 		nb_harq_inputs, nb_harq_outputs;
223 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
224 
225 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
226 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
227 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
228 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
229 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
230 
231 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
232 		if (op_cap->type != test_vector.op_type)
233 			continue;
234 
235 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
236 			const struct rte_bbdev_op_cap_turbo_dec *cap =
237 					&op_cap->cap.turbo_dec;
238 			/* Ignore lack of soft output capability, just skip
239 			 * checking if soft output is valid.
240 			 */
241 			if ((test_vector.turbo_dec.op_flags &
242 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
243 					!(cap->capability_flags &
244 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
245 				printf(
246 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
247 					dev_info->dev_name);
248 				clear_soft_out_cap(
249 					&test_vector.turbo_dec.op_flags);
250 			}
251 
252 			if (!flags_match(test_vector.turbo_dec.op_flags,
253 					cap->capability_flags))
254 				return TEST_FAILED;
255 			if (nb_inputs > cap->num_buffers_src) {
256 				printf("Too many inputs defined: %u, max: %u\n",
257 					nb_inputs, cap->num_buffers_src);
258 				return TEST_FAILED;
259 			}
260 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
261 					(test_vector.turbo_dec.op_flags &
262 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
263 				printf(
264 					"Too many soft outputs defined: %u, max: %u\n",
265 						nb_soft_outputs,
266 						cap->num_buffers_soft_out);
267 				return TEST_FAILED;
268 			}
269 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
270 				printf(
271 					"Too many hard outputs defined: %u, max: %u\n",
272 						nb_hard_outputs,
273 						cap->num_buffers_hard_out);
274 				return TEST_FAILED;
275 			}
276 			if (intr_enabled && !(cap->capability_flags &
277 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
278 				printf(
279 					"Dequeue interrupts are not supported!\n");
280 				return TEST_FAILED;
281 			}
282 
283 			return TEST_SUCCESS;
284 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
285 			const struct rte_bbdev_op_cap_turbo_enc *cap =
286 					&op_cap->cap.turbo_enc;
287 
288 			if (!flags_match(test_vector.turbo_enc.op_flags,
289 					cap->capability_flags))
290 				return TEST_FAILED;
291 			if (nb_inputs > cap->num_buffers_src) {
292 				printf("Too many inputs defined: %u, max: %u\n",
293 					nb_inputs, cap->num_buffers_src);
294 				return TEST_FAILED;
295 			}
296 			if (nb_hard_outputs > cap->num_buffers_dst) {
297 				printf(
298 					"Too many hard outputs defined: %u, max: %u\n",
299 					nb_hard_outputs, cap->num_buffers_dst);
300 				return TEST_FAILED;
301 			}
302 			if (intr_enabled && !(cap->capability_flags &
303 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
304 				printf(
305 					"Dequeue interrupts are not supported!\n");
306 				return TEST_FAILED;
307 			}
308 
309 			return TEST_SUCCESS;
310 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
311 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
312 					&op_cap->cap.ldpc_enc;
313 
314 			if (!flags_match(test_vector.ldpc_enc.op_flags,
315 					cap->capability_flags)){
316 				printf("Flag Mismatch\n");
317 				return TEST_FAILED;
318 			}
319 			if (nb_inputs > cap->num_buffers_src) {
320 				printf("Too many inputs defined: %u, max: %u\n",
321 					nb_inputs, cap->num_buffers_src);
322 				return TEST_FAILED;
323 			}
324 			if (nb_hard_outputs > cap->num_buffers_dst) {
325 				printf(
326 					"Too many hard outputs defined: %u, max: %u\n",
327 					nb_hard_outputs, cap->num_buffers_dst);
328 				return TEST_FAILED;
329 			}
330 			if (intr_enabled && !(cap->capability_flags &
331 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
332 				printf(
333 					"Dequeue interrupts are not supported!\n");
334 				return TEST_FAILED;
335 			}
336 
337 			return TEST_SUCCESS;
338 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
339 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
340 					&op_cap->cap.ldpc_dec;
341 
342 			if (!flags_match(test_vector.ldpc_dec.op_flags,
343 					cap->capability_flags)){
344 				printf("Flag Mismatch\n");
345 				return TEST_FAILED;
346 			}
347 			if (nb_inputs > cap->num_buffers_src) {
348 				printf("Too many inputs defined: %u, max: %u\n",
349 					nb_inputs, cap->num_buffers_src);
350 				return TEST_FAILED;
351 			}
352 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
353 				printf(
354 					"Too many hard outputs defined: %u, max: %u\n",
355 					nb_hard_outputs,
356 					cap->num_buffers_hard_out);
357 				return TEST_FAILED;
358 			}
359 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
360 				printf(
361 					"Too many HARQ inputs defined: %u, max: %u\n",
362 					nb_hard_outputs,
363 					cap->num_buffers_hard_out);
364 				return TEST_FAILED;
365 			}
366 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
367 				printf(
368 					"Too many HARQ outputs defined: %u, max: %u\n",
369 					nb_hard_outputs,
370 					cap->num_buffers_hard_out);
371 				return TEST_FAILED;
372 			}
373 			if (intr_enabled && !(cap->capability_flags &
374 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
375 				printf(
376 					"Dequeue interrupts are not supported!\n");
377 				return TEST_FAILED;
378 			}
379 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
380 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
381 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
382 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
383 					))) {
384 				printf("Skip loop-back with interrupt\n");
385 				return TEST_FAILED;
386 			}
387 			return TEST_SUCCESS;
388 		}
389 	}
390 
391 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
392 		return TEST_SUCCESS; /* Special case for NULL device */
393 
394 	return TEST_FAILED;
395 }
396 
397 /* calculates optimal mempool size not smaller than the val */
398 static unsigned int
399 optimal_mempool_size(unsigned int val)
400 {
401 	return rte_align32pow2(val + 1) - 1;
402 }
403 
404 /* allocates mbuf mempool for inputs and outputs */
405 static struct rte_mempool *
406 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
407 		int socket_id, unsigned int mbuf_pool_size,
408 		const char *op_type_str)
409 {
410 	unsigned int i;
411 	uint32_t max_seg_sz = 0;
412 	char pool_name[RTE_MEMPOOL_NAMESIZE];
413 
414 	/* find max input segment size */
415 	for (i = 0; i < entries->nb_segments; ++i)
416 		if (entries->segments[i].length > max_seg_sz)
417 			max_seg_sz = entries->segments[i].length;
418 
419 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
420 			dev_id);
421 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
422 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
423 					+ FILLER_HEADROOM,
424 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
425 }
426 
427 static int
428 create_mempools(struct active_device *ad, int socket_id,
429 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
430 {
431 	struct rte_mempool *mp;
432 	unsigned int ops_pool_size, mbuf_pool_size = 0;
433 	char pool_name[RTE_MEMPOOL_NAMESIZE];
434 	const char *op_type_str;
435 	enum rte_bbdev_op_type op_type = org_op_type;
436 
437 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
438 	struct op_data_entries *hard_out =
439 			&test_vector.entries[DATA_HARD_OUTPUT];
440 	struct op_data_entries *soft_out =
441 			&test_vector.entries[DATA_SOFT_OUTPUT];
442 	struct op_data_entries *harq_in =
443 			&test_vector.entries[DATA_HARQ_INPUT];
444 	struct op_data_entries *harq_out =
445 			&test_vector.entries[DATA_HARQ_OUTPUT];
446 
447 	/* allocate ops mempool */
448 	ops_pool_size = optimal_mempool_size(RTE_MAX(
449 			/* Ops used plus 1 reference op */
450 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
451 			/* Minimal cache size plus 1 reference op */
452 			(unsigned int)(1.5 * rte_lcore_count() *
453 					OPS_CACHE_SIZE + 1)),
454 			OPS_POOL_SIZE_MIN));
455 
456 	if (org_op_type == RTE_BBDEV_OP_NONE)
457 		op_type = RTE_BBDEV_OP_TURBO_ENC;
458 
459 	op_type_str = rte_bbdev_op_type_str(op_type);
460 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
461 
462 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
463 			ad->dev_id);
464 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
465 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
466 	TEST_ASSERT_NOT_NULL(mp,
467 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
468 			ops_pool_size,
469 			ad->dev_id,
470 			socket_id);
471 	ad->ops_mempool = mp;
472 
473 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
474 	if (org_op_type == RTE_BBDEV_OP_NONE)
475 		return TEST_SUCCESS;
476 
477 	/* Inputs */
478 	if (in->nb_segments > 0) {
479 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
480 				in->nb_segments);
481 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
482 				mbuf_pool_size, "in");
483 		TEST_ASSERT_NOT_NULL(mp,
484 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
485 				mbuf_pool_size,
486 				ad->dev_id,
487 				socket_id);
488 		ad->in_mbuf_pool = mp;
489 	}
490 
491 	/* Hard outputs */
492 	if (hard_out->nb_segments > 0) {
493 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
494 				hard_out->nb_segments);
495 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
496 				mbuf_pool_size,
497 				"hard_out");
498 		TEST_ASSERT_NOT_NULL(mp,
499 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
500 				mbuf_pool_size,
501 				ad->dev_id,
502 				socket_id);
503 		ad->hard_out_mbuf_pool = mp;
504 	}
505 
506 	/* Soft outputs */
507 	if (soft_out->nb_segments > 0) {
508 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
509 				soft_out->nb_segments);
510 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
511 				mbuf_pool_size,
512 				"soft_out");
513 		TEST_ASSERT_NOT_NULL(mp,
514 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
515 				mbuf_pool_size,
516 				ad->dev_id,
517 				socket_id);
518 		ad->soft_out_mbuf_pool = mp;
519 	}
520 
521 	/* HARQ inputs */
522 	if (harq_in->nb_segments > 0) {
523 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
524 				harq_in->nb_segments);
525 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
526 				mbuf_pool_size,
527 				"harq_in");
528 		TEST_ASSERT_NOT_NULL(mp,
529 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
530 				mbuf_pool_size,
531 				ad->dev_id,
532 				socket_id);
533 		ad->harq_in_mbuf_pool = mp;
534 	}
535 
536 	/* HARQ outputs */
537 	if (harq_out->nb_segments > 0) {
538 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
539 				harq_out->nb_segments);
540 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
541 				mbuf_pool_size,
542 				"harq_out");
543 		TEST_ASSERT_NOT_NULL(mp,
544 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
545 				mbuf_pool_size,
546 				ad->dev_id,
547 				socket_id);
548 		ad->harq_out_mbuf_pool = mp;
549 	}
550 
551 	return TEST_SUCCESS;
552 }
553 
554 static int
555 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
556 		struct test_bbdev_vector *vector)
557 {
558 	int ret;
559 	unsigned int queue_id;
560 	struct rte_bbdev_queue_conf qconf;
561 	struct active_device *ad = &active_devs[nb_active_devs];
562 	unsigned int nb_queues;
563 	enum rte_bbdev_op_type op_type = vector->op_type;
564 
565 /* Configure fpga lte fec with PF & VF values
566  * if '-i' flag is set and using fpga device
567  */
568 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
569 	if ((get_init_device() == true) &&
570 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
571 		struct fpga_lte_fec_conf conf;
572 		unsigned int i;
573 
574 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
575 				info->drv.driver_name);
576 
577 		/* clear default configuration before initialization */
578 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
579 
580 		/* Set PF mode :
581 		 * true if PF is used for data plane
582 		 * false for VFs
583 		 */
584 		conf.pf_mode_en = true;
585 
586 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
587 			/* Number of UL queues per VF (fpga supports 8 VFs) */
588 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
589 			/* Number of DL queues per VF (fpga supports 8 VFs) */
590 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
591 		}
592 
593 		/* UL bandwidth. Needed for schedule algorithm */
594 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
595 		/* DL bandwidth */
596 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
597 
598 		/* UL & DL load Balance Factor to 64 */
599 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
600 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
601 
602 		/**< FLR timeout value */
603 		conf.flr_time_out = FLR_4G_TIMEOUT;
604 
605 		/* setup FPGA PF with configuration information */
606 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
607 		TEST_ASSERT_SUCCESS(ret,
608 				"Failed to configure 4G FPGA PF for bbdev %s",
609 				info->dev_name);
610 	}
611 #endif
612 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC
613 	if ((get_init_device() == true) &&
614 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
615 		struct fpga_5gnr_fec_conf conf;
616 		unsigned int i;
617 
618 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
619 				info->drv.driver_name);
620 
621 		/* clear default configuration before initialization */
622 		memset(&conf, 0, sizeof(struct fpga_5gnr_fec_conf));
623 
624 		/* Set PF mode :
625 		 * true if PF is used for data plane
626 		 * false for VFs
627 		 */
628 		conf.pf_mode_en = true;
629 
630 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
631 			/* Number of UL queues per VF (fpga supports 8 VFs) */
632 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
633 			/* Number of DL queues per VF (fpga supports 8 VFs) */
634 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
635 		}
636 
637 		/* UL bandwidth. Needed for schedule algorithm */
638 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
639 		/* DL bandwidth */
640 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
641 
642 		/* UL & DL load Balance Factor to 64 */
643 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
644 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
645 
646 		/**< FLR timeout value */
647 		conf.flr_time_out = FLR_5G_TIMEOUT;
648 
649 		/* setup FPGA PF with configuration information */
650 		ret = fpga_5gnr_fec_configure(info->dev_name, &conf);
651 		TEST_ASSERT_SUCCESS(ret,
652 				"Failed to configure 5G FPGA PF for bbdev %s",
653 				info->dev_name);
654 	}
655 #endif
656 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
657 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
658 
659 	/* setup device */
660 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
661 	if (ret < 0) {
662 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
663 				dev_id, nb_queues, info->socket_id, ret);
664 		return TEST_FAILED;
665 	}
666 
667 	/* configure interrupts if needed */
668 	if (intr_enabled) {
669 		ret = rte_bbdev_intr_enable(dev_id);
670 		if (ret < 0) {
671 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
672 					ret);
673 			return TEST_FAILED;
674 		}
675 	}
676 
677 	/* setup device queues */
678 	qconf.socket = info->socket_id;
679 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
680 	qconf.priority = 0;
681 	qconf.deferred_start = 0;
682 	qconf.op_type = op_type;
683 
684 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
685 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
686 		if (ret != 0) {
687 			printf(
688 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
689 					queue_id, qconf.priority, dev_id);
690 			qconf.priority++;
691 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
692 					&qconf);
693 		}
694 		if (ret != 0) {
695 			printf("All queues on dev %u allocated: %u\n",
696 					dev_id, queue_id);
697 			break;
698 		}
699 		ad->queue_ids[queue_id] = queue_id;
700 	}
701 	TEST_ASSERT(queue_id != 0,
702 			"ERROR Failed to configure any queues on dev %u",
703 			dev_id);
704 	ad->nb_queues = queue_id;
705 
706 	set_avail_op(ad, op_type);
707 
708 	return TEST_SUCCESS;
709 }
710 
711 static int
712 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
713 		struct test_bbdev_vector *vector)
714 {
715 	int ret;
716 
717 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
718 	active_devs[nb_active_devs].dev_id = dev_id;
719 
720 	ret = add_bbdev_dev(dev_id, info, vector);
721 	if (ret == TEST_SUCCESS)
722 		++nb_active_devs;
723 	return ret;
724 }
725 
726 static uint8_t
727 populate_active_devices(void)
728 {
729 	int ret;
730 	uint8_t dev_id;
731 	uint8_t nb_devs_added = 0;
732 	struct rte_bbdev_info info;
733 
734 	RTE_BBDEV_FOREACH(dev_id) {
735 		rte_bbdev_info_get(dev_id, &info);
736 
737 		if (check_dev_cap(&info)) {
738 			printf(
739 				"Device %d (%s) does not support specified capabilities\n",
740 					dev_id, info.dev_name);
741 			continue;
742 		}
743 
744 		ret = add_active_device(dev_id, &info, &test_vector);
745 		if (ret != 0) {
746 			printf("Adding active bbdev %s skipped\n",
747 					info.dev_name);
748 			continue;
749 		}
750 		nb_devs_added++;
751 	}
752 
753 	return nb_devs_added;
754 }
755 
756 static int
757 read_test_vector(void)
758 {
759 	int ret;
760 
761 	memset(&test_vector, 0, sizeof(test_vector));
762 	printf("Test vector file = %s\n", get_vector_filename());
763 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
764 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
765 			get_vector_filename());
766 
767 	return TEST_SUCCESS;
768 }
769 
770 static int
771 testsuite_setup(void)
772 {
773 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
774 
775 	if (populate_active_devices() == 0) {
776 		printf("No suitable devices found!\n");
777 		return TEST_SKIPPED;
778 	}
779 
780 	return TEST_SUCCESS;
781 }
782 
783 static int
784 interrupt_testsuite_setup(void)
785 {
786 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
787 
788 	/* Enable interrupts */
789 	intr_enabled = true;
790 
791 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
792 	if (populate_active_devices() == 0 ||
793 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
794 		intr_enabled = false;
795 		printf("No suitable devices found!\n");
796 		return TEST_SKIPPED;
797 	}
798 
799 	return TEST_SUCCESS;
800 }
801 
802 static void
803 testsuite_teardown(void)
804 {
805 	uint8_t dev_id;
806 
807 	/* Unconfigure devices */
808 	RTE_BBDEV_FOREACH(dev_id)
809 		rte_bbdev_close(dev_id);
810 
811 	/* Clear active devices structs. */
812 	memset(active_devs, 0, sizeof(active_devs));
813 	nb_active_devs = 0;
814 
815 	/* Disable interrupts */
816 	intr_enabled = false;
817 }
818 
819 static int
820 ut_setup(void)
821 {
822 	uint8_t i, dev_id;
823 
824 	for (i = 0; i < nb_active_devs; i++) {
825 		dev_id = active_devs[i].dev_id;
826 		/* reset bbdev stats */
827 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
828 				"Failed to reset stats of bbdev %u", dev_id);
829 		/* start the device */
830 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
831 				"Failed to start bbdev %u", dev_id);
832 	}
833 
834 	return TEST_SUCCESS;
835 }
836 
837 static void
838 ut_teardown(void)
839 {
840 	uint8_t i, dev_id;
841 	struct rte_bbdev_stats stats;
842 
843 	for (i = 0; i < nb_active_devs; i++) {
844 		dev_id = active_devs[i].dev_id;
845 		/* read stats and print */
846 		rte_bbdev_stats_get(dev_id, &stats);
847 		/* Stop the device */
848 		rte_bbdev_stop(dev_id);
849 	}
850 }
851 
852 static int
853 init_op_data_objs(struct rte_bbdev_op_data *bufs,
854 		struct op_data_entries *ref_entries,
855 		struct rte_mempool *mbuf_pool, const uint16_t n,
856 		enum op_data_type op_type, uint16_t min_alignment)
857 {
858 	int ret;
859 	unsigned int i, j;
860 	bool large_input = false;
861 
862 	for (i = 0; i < n; ++i) {
863 		char *data;
864 		struct op_data_buf *seg = &ref_entries->segments[0];
865 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
866 		TEST_ASSERT_NOT_NULL(m_head,
867 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
868 				op_type, n * ref_entries->nb_segments,
869 				mbuf_pool->size);
870 
871 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
872 			/*
873 			 * Special case when DPDK mbuf cannot handle
874 			 * the required input size
875 			 */
876 			printf("Warning: Larger input size than DPDK mbuf %d\n",
877 					seg->length);
878 			large_input = true;
879 		}
880 		bufs[i].data = m_head;
881 		bufs[i].offset = 0;
882 		bufs[i].length = 0;
883 
884 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
885 			if ((op_type == DATA_INPUT) && large_input) {
886 				/* Allocate a fake overused mbuf */
887 				data = rte_malloc(NULL, seg->length, 0);
888 				memcpy(data, seg->addr, seg->length);
889 				m_head->buf_addr = data;
890 				m_head->buf_iova = rte_malloc_virt2iova(data);
891 				m_head->data_off = 0;
892 				m_head->data_len = seg->length;
893 			} else {
894 				data = rte_pktmbuf_append(m_head, seg->length);
895 				TEST_ASSERT_NOT_NULL(data,
896 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
897 					seg->length, op_type);
898 
899 				TEST_ASSERT(data == RTE_PTR_ALIGN(
900 						data, min_alignment),
901 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
902 					data, min_alignment);
903 				rte_memcpy(data, seg->addr, seg->length);
904 			}
905 
906 			bufs[i].length += seg->length;
907 
908 			for (j = 1; j < ref_entries->nb_segments; ++j) {
909 				struct rte_mbuf *m_tail =
910 						rte_pktmbuf_alloc(mbuf_pool);
911 				TEST_ASSERT_NOT_NULL(m_tail,
912 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
913 						op_type,
914 						n * ref_entries->nb_segments,
915 						mbuf_pool->size);
916 				seg += 1;
917 
918 				data = rte_pktmbuf_append(m_tail, seg->length);
919 				TEST_ASSERT_NOT_NULL(data,
920 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
921 						seg->length, op_type);
922 
923 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
924 						min_alignment),
925 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
926 						data, min_alignment);
927 				rte_memcpy(data, seg->addr, seg->length);
928 				bufs[i].length += seg->length;
929 
930 				ret = rte_pktmbuf_chain(m_head, m_tail);
931 				TEST_ASSERT_SUCCESS(ret,
932 						"Couldn't chain mbufs from %d data type mbuf pool",
933 						op_type);
934 			}
935 		} else {
936 
937 			/* allocate chained-mbuf for output buffer */
938 			for (j = 1; j < ref_entries->nb_segments; ++j) {
939 				struct rte_mbuf *m_tail =
940 						rte_pktmbuf_alloc(mbuf_pool);
941 				TEST_ASSERT_NOT_NULL(m_tail,
942 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
943 						op_type,
944 						n * ref_entries->nb_segments,
945 						mbuf_pool->size);
946 
947 				ret = rte_pktmbuf_chain(m_head, m_tail);
948 				TEST_ASSERT_SUCCESS(ret,
949 						"Couldn't chain mbufs from %d data type mbuf pool",
950 						op_type);
951 			}
952 		}
953 	}
954 
955 	return 0;
956 }
957 
958 static int
959 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
960 		const int socket)
961 {
962 	int i;
963 
964 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
965 	if (*buffers == NULL) {
966 		printf("WARNING: Failed to allocate op_data on socket %d\n",
967 				socket);
968 		/* try to allocate memory on other detected sockets */
969 		for (i = 0; i < socket; i++) {
970 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
971 			if (*buffers != NULL)
972 				break;
973 		}
974 	}
975 
976 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
977 }
978 
979 static void
980 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
981 		const uint16_t n, const int8_t max_llr_modulus)
982 {
983 	uint16_t i, byte_idx;
984 
985 	for (i = 0; i < n; ++i) {
986 		struct rte_mbuf *m = input_ops[i].data;
987 		while (m != NULL) {
988 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
989 					input_ops[i].offset);
990 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
991 					++byte_idx)
992 				llr[byte_idx] = round((double)max_llr_modulus *
993 						llr[byte_idx] / INT8_MAX);
994 
995 			m = m->next;
996 		}
997 	}
998 }
999 
1000 /*
1001  * We may have to insert filler bits
1002  * when they are required by the HARQ assumption
1003  */
1004 static void
1005 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1006 		const uint16_t n, struct test_op_params *op_params)
1007 {
1008 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1009 
1010 	if (input_ops == NULL)
1011 		return;
1012 	/* No need to add filler if not required by device */
1013 	if (!(ldpc_cap_flags &
1014 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1015 		return;
1016 	/* No need to add filler for loopback operation */
1017 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1018 		return;
1019 
1020 	uint16_t i, j, parity_offset;
1021 	for (i = 0; i < n; ++i) {
1022 		struct rte_mbuf *m = input_ops[i].data;
1023 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1024 				input_ops[i].offset);
1025 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1026 				* dec.z_c - dec.n_filler;
1027 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1028 		m->data_len = new_hin_size;
1029 		input_ops[i].length = new_hin_size;
1030 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1031 				j--)
1032 			llr[j] = llr[j - dec.n_filler];
1033 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1034 		for (j = 0; j < dec.n_filler; j++)
1035 			llr[parity_offset + j] = llr_max_pre_scaling;
1036 	}
1037 }
1038 
1039 static void
1040 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1041 		const uint16_t n, const int8_t llr_size,
1042 		const int8_t llr_decimals)
1043 {
1044 	if (input_ops == NULL)
1045 		return;
1046 
1047 	uint16_t i, byte_idx;
1048 
1049 	int16_t llr_max, llr_min, llr_tmp;
1050 	llr_max = (1 << (llr_size - 1)) - 1;
1051 	llr_min = -llr_max;
1052 	for (i = 0; i < n; ++i) {
1053 		struct rte_mbuf *m = input_ops[i].data;
1054 		while (m != NULL) {
1055 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1056 					input_ops[i].offset);
1057 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1058 					++byte_idx) {
1059 
1060 				llr_tmp = llr[byte_idx];
1061 				if (llr_decimals == 4)
1062 					llr_tmp *= 8;
1063 				else if (llr_decimals == 2)
1064 					llr_tmp *= 2;
1065 				else if (llr_decimals == 0)
1066 					llr_tmp /= 2;
1067 				llr_tmp = RTE_MIN(llr_max,
1068 						RTE_MAX(llr_min, llr_tmp));
1069 				llr[byte_idx] = (int8_t) llr_tmp;
1070 			}
1071 
1072 			m = m->next;
1073 		}
1074 	}
1075 }
1076 
1077 
1078 
1079 static int
1080 fill_queue_buffers(struct test_op_params *op_params,
1081 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1082 		struct rte_mempool *soft_out_mp,
1083 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1084 		uint16_t queue_id,
1085 		const struct rte_bbdev_op_cap *capabilities,
1086 		uint16_t min_alignment, const int socket_id)
1087 {
1088 	int ret;
1089 	enum op_data_type type;
1090 	const uint16_t n = op_params->num_to_process;
1091 
1092 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1093 		in_mp,
1094 		soft_out_mp,
1095 		hard_out_mp,
1096 		harq_in_mp,
1097 		harq_out_mp,
1098 	};
1099 
1100 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1101 		&op_params->q_bufs[socket_id][queue_id].inputs,
1102 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1103 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1104 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1105 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1106 	};
1107 
1108 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1109 		struct op_data_entries *ref_entries =
1110 				&test_vector.entries[type];
1111 		if (ref_entries->nb_segments == 0)
1112 			continue;
1113 
1114 		ret = allocate_buffers_on_socket(queue_ops[type],
1115 				n * sizeof(struct rte_bbdev_op_data),
1116 				socket_id);
1117 		TEST_ASSERT_SUCCESS(ret,
1118 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1119 
1120 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1121 				mbuf_pools[type], n, type, min_alignment);
1122 		TEST_ASSERT_SUCCESS(ret,
1123 				"Couldn't init rte_bbdev_op_data structs");
1124 	}
1125 
1126 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1127 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1128 			capabilities->cap.turbo_dec.max_llr_modulus);
1129 
1130 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1131 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1132 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1133 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1134 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1135 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1136 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1137 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1138 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1139 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1140 		if (!loopback && !llr_comp)
1141 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1142 					ldpc_llr_size, ldpc_llr_decimals);
1143 		if (!loopback && !harq_comp)
1144 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1145 					ldpc_llr_size, ldpc_llr_decimals);
1146 		if (!loopback)
1147 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1148 					op_params);
1149 	}
1150 
1151 	return 0;
1152 }
1153 
1154 static void
1155 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1156 {
1157 	unsigned int i, j;
1158 
1159 	rte_mempool_free(ad->ops_mempool);
1160 	rte_mempool_free(ad->in_mbuf_pool);
1161 	rte_mempool_free(ad->hard_out_mbuf_pool);
1162 	rte_mempool_free(ad->soft_out_mbuf_pool);
1163 	rte_mempool_free(ad->harq_in_mbuf_pool);
1164 	rte_mempool_free(ad->harq_out_mbuf_pool);
1165 
1166 	for (i = 0; i < rte_lcore_count(); ++i) {
1167 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1168 			rte_free(op_params->q_bufs[j][i].inputs);
1169 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1170 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1171 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1172 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1173 		}
1174 	}
1175 }
1176 
1177 static void
1178 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1179 		unsigned int start_idx,
1180 		struct rte_bbdev_op_data *inputs,
1181 		struct rte_bbdev_op_data *hard_outputs,
1182 		struct rte_bbdev_op_data *soft_outputs,
1183 		struct rte_bbdev_dec_op *ref_op)
1184 {
1185 	unsigned int i;
1186 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1187 
1188 	for (i = 0; i < n; ++i) {
1189 		if (turbo_dec->code_block_mode == 0) {
1190 			ops[i]->turbo_dec.tb_params.ea =
1191 					turbo_dec->tb_params.ea;
1192 			ops[i]->turbo_dec.tb_params.eb =
1193 					turbo_dec->tb_params.eb;
1194 			ops[i]->turbo_dec.tb_params.k_pos =
1195 					turbo_dec->tb_params.k_pos;
1196 			ops[i]->turbo_dec.tb_params.k_neg =
1197 					turbo_dec->tb_params.k_neg;
1198 			ops[i]->turbo_dec.tb_params.c =
1199 					turbo_dec->tb_params.c;
1200 			ops[i]->turbo_dec.tb_params.c_neg =
1201 					turbo_dec->tb_params.c_neg;
1202 			ops[i]->turbo_dec.tb_params.cab =
1203 					turbo_dec->tb_params.cab;
1204 			ops[i]->turbo_dec.tb_params.r =
1205 					turbo_dec->tb_params.r;
1206 		} else {
1207 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1208 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1209 		}
1210 
1211 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1212 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1213 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1214 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1215 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1216 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1217 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1218 
1219 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1220 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1221 		if (soft_outputs != NULL)
1222 			ops[i]->turbo_dec.soft_output =
1223 				soft_outputs[start_idx + i];
1224 	}
1225 }
1226 
1227 static void
1228 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1229 		unsigned int start_idx,
1230 		struct rte_bbdev_op_data *inputs,
1231 		struct rte_bbdev_op_data *outputs,
1232 		struct rte_bbdev_enc_op *ref_op)
1233 {
1234 	unsigned int i;
1235 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1236 	for (i = 0; i < n; ++i) {
1237 		if (turbo_enc->code_block_mode == 0) {
1238 			ops[i]->turbo_enc.tb_params.ea =
1239 					turbo_enc->tb_params.ea;
1240 			ops[i]->turbo_enc.tb_params.eb =
1241 					turbo_enc->tb_params.eb;
1242 			ops[i]->turbo_enc.tb_params.k_pos =
1243 					turbo_enc->tb_params.k_pos;
1244 			ops[i]->turbo_enc.tb_params.k_neg =
1245 					turbo_enc->tb_params.k_neg;
1246 			ops[i]->turbo_enc.tb_params.c =
1247 					turbo_enc->tb_params.c;
1248 			ops[i]->turbo_enc.tb_params.c_neg =
1249 					turbo_enc->tb_params.c_neg;
1250 			ops[i]->turbo_enc.tb_params.cab =
1251 					turbo_enc->tb_params.cab;
1252 			ops[i]->turbo_enc.tb_params.ncb_pos =
1253 					turbo_enc->tb_params.ncb_pos;
1254 			ops[i]->turbo_enc.tb_params.ncb_neg =
1255 					turbo_enc->tb_params.ncb_neg;
1256 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1257 		} else {
1258 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1259 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1260 			ops[i]->turbo_enc.cb_params.ncb =
1261 					turbo_enc->cb_params.ncb;
1262 		}
1263 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1264 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1265 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1266 
1267 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1268 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1269 	}
1270 }
1271 
1272 
1273 /* Returns a random number drawn from a normal distribution
1274  * with mean of 0 and variance of 1
1275  * Marsaglia algorithm
1276  */
1277 static double
1278 randn(int n)
1279 {
1280 	double S, Z, U1, U2, u, v, fac;
1281 
1282 	do {
1283 		U1 = (double)rand() / RAND_MAX;
1284 		U2 = (double)rand() / RAND_MAX;
1285 		u = 2. * U1 - 1.;
1286 		v = 2. * U2 - 1.;
1287 		S = u * u + v * v;
1288 	} while (S >= 1 || S == 0);
1289 	fac = sqrt(-2. * log(S) / S);
1290 	Z = (n % 2) ? u * fac : v * fac;
1291 	return Z;
1292 }
1293 
1294 static inline double
1295 maxstar(double A, double B)
1296 {
1297 	if (fabs(A - B) > 5)
1298 		return RTE_MAX(A, B);
1299 	else
1300 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1301 }
1302 
1303 /*
1304  * Generate Qm LLRS for Qm==8
1305  * Modulation, AWGN and LLR estimation from max log development
1306  */
1307 static void
1308 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1309 {
1310 	int qm = 8;
1311 	int qam = 256;
1312 	int m, k;
1313 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1314 	/* 5.1.4 of TS38.211 */
1315 	const double symbols_I[256] = {
1316 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1317 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1318 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1319 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1320 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1321 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1322 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1323 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1324 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1325 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1326 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1327 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1328 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1329 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1330 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1331 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1332 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1333 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1334 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1335 			-13, -13, -15, -15, -13, -13, -15, -15};
1336 	const double symbols_Q[256] = {
1337 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1338 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1339 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1340 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1341 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1342 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1343 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1344 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1345 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1346 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1347 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1348 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1349 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1350 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1351 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1352 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1353 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1354 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1355 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1356 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1357 	/* Average constellation point energy */
1358 	N0 *= 170.0;
1359 	for (k = 0; k < qm; k++)
1360 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1361 	/* 5.1.4 of TS38.211 */
1362 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1363 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1364 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1365 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1366 	/* AWGN channel */
1367 	I += sqrt(N0 / 2) * randn(0);
1368 	Q += sqrt(N0 / 2) * randn(1);
1369 	/*
1370 	 * Calculate the log of the probability that each of
1371 	 * the constellation points was transmitted
1372 	 */
1373 	for (m = 0; m < qam; m++)
1374 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1375 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1376 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1377 	for (k = 0; k < qm; k++) {
1378 		p0 = -999999;
1379 		p1 = -999999;
1380 		/* For each constellation point */
1381 		for (m = 0; m < qam; m++) {
1382 			if ((m >> (qm - k - 1)) & 1)
1383 				p1 = maxstar(p1, log_syml_prob[m]);
1384 			else
1385 				p0 = maxstar(p0, log_syml_prob[m]);
1386 		}
1387 		/* Calculate the LLR */
1388 		llr_ = p0 - p1;
1389 		llr_ *= (1 << ldpc_llr_decimals);
1390 		llr_ = round(llr_);
1391 		if (llr_ > llr_max)
1392 			llr_ = llr_max;
1393 		if (llr_ < -llr_max)
1394 			llr_ = -llr_max;
1395 		llrs[qm * i + k] = (int8_t) llr_;
1396 	}
1397 }
1398 
1399 
1400 /*
1401  * Generate Qm LLRS for Qm==6
1402  * Modulation, AWGN and LLR estimation from max log development
1403  */
1404 static void
1405 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1406 {
1407 	int qm = 6;
1408 	int qam = 64;
1409 	int m, k;
1410 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1411 	/* 5.1.4 of TS38.211 */
1412 	const double symbols_I[64] = {
1413 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1414 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1415 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1416 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1417 			-5, -5, -7, -7, -5, -5, -7, -7};
1418 	const double symbols_Q[64] = {
1419 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1420 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1421 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1422 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1423 			-3, -1, -3, -1, -5, -7, -5, -7};
1424 	/* Average constellation point energy */
1425 	N0 *= 42.0;
1426 	for (k = 0; k < qm; k++)
1427 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1428 	/* 5.1.4 of TS38.211 */
1429 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1430 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1431 	/* AWGN channel */
1432 	I += sqrt(N0 / 2) * randn(0);
1433 	Q += sqrt(N0 / 2) * randn(1);
1434 	/*
1435 	 * Calculate the log of the probability that each of
1436 	 * the constellation points was transmitted
1437 	 */
1438 	for (m = 0; m < qam; m++)
1439 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1440 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1441 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1442 	for (k = 0; k < qm; k++) {
1443 		p0 = -999999;
1444 		p1 = -999999;
1445 		/* For each constellation point */
1446 		for (m = 0; m < qam; m++) {
1447 			if ((m >> (qm - k - 1)) & 1)
1448 				p1 = maxstar(p1, log_syml_prob[m]);
1449 			else
1450 				p0 = maxstar(p0, log_syml_prob[m]);
1451 		}
1452 		/* Calculate the LLR */
1453 		llr_ = p0 - p1;
1454 		llr_ *= (1 << ldpc_llr_decimals);
1455 		llr_ = round(llr_);
1456 		if (llr_ > llr_max)
1457 			llr_ = llr_max;
1458 		if (llr_ < -llr_max)
1459 			llr_ = -llr_max;
1460 		llrs[qm * i + k] = (int8_t) llr_;
1461 	}
1462 }
1463 
1464 /*
1465  * Generate Qm LLRS for Qm==4
1466  * Modulation, AWGN and LLR estimation from max log development
1467  */
1468 static void
1469 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1470 {
1471 	int qm = 4;
1472 	int qam = 16;
1473 	int m, k;
1474 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1475 	/* 5.1.4 of TS38.211 */
1476 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1477 			-1, -1, -3, -3, -1, -1, -3, -3};
1478 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1479 			1, 3, 1, 3, -1, -3, -1, -3};
1480 	/* Average constellation point energy */
1481 	N0 *= 10.0;
1482 	for (k = 0; k < qm; k++)
1483 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1484 	/* 5.1.4 of TS38.211 */
1485 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1486 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1487 	/* AWGN channel */
1488 	I += sqrt(N0 / 2) * randn(0);
1489 	Q += sqrt(N0 / 2) * randn(1);
1490 	/*
1491 	 * Calculate the log of the probability that each of
1492 	 * the constellation points was transmitted
1493 	 */
1494 	for (m = 0; m < qam; m++)
1495 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1496 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1497 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1498 	for (k = 0; k < qm; k++) {
1499 		p0 = -999999;
1500 		p1 = -999999;
1501 		/* For each constellation point */
1502 		for (m = 0; m < qam; m++) {
1503 			if ((m >> (qm - k - 1)) & 1)
1504 				p1 = maxstar(p1, log_syml_prob[m]);
1505 			else
1506 				p0 = maxstar(p0, log_syml_prob[m]);
1507 		}
1508 		/* Calculate the LLR */
1509 		llr_ = p0 - p1;
1510 		llr_ *= (1 << ldpc_llr_decimals);
1511 		llr_ = round(llr_);
1512 		if (llr_ > llr_max)
1513 			llr_ = llr_max;
1514 		if (llr_ < -llr_max)
1515 			llr_ = -llr_max;
1516 		llrs[qm * i + k] = (int8_t) llr_;
1517 	}
1518 }
1519 
1520 static void
1521 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1522 {
1523 	double b, b1, n;
1524 	double coeff = 2.0 * sqrt(N0);
1525 
1526 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1527 	if (llrs[j] < 8 && llrs[j] > -8)
1528 		return;
1529 
1530 	/* Note don't change sign here */
1531 	n = randn(j % 2);
1532 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1533 			+ coeff * n) / N0;
1534 	b = b1 * (1 << ldpc_llr_decimals);
1535 	b = round(b);
1536 	if (b > llr_max)
1537 		b = llr_max;
1538 	if (b < -llr_max)
1539 		b = -llr_max;
1540 	llrs[j] = (int8_t) b;
1541 }
1542 
1543 /* Generate LLR for a given SNR */
1544 static void
1545 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1546 		struct rte_bbdev_dec_op *ref_op)
1547 {
1548 	struct rte_mbuf *m;
1549 	uint16_t qm;
1550 	uint32_t i, j, e, range;
1551 	double N0, llr_max;
1552 
1553 	e = ref_op->ldpc_dec.cb_params.e;
1554 	qm = ref_op->ldpc_dec.q_m;
1555 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1556 	range = e / qm;
1557 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1558 
1559 	for (i = 0; i < n; ++i) {
1560 		m = inputs[i].data;
1561 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1562 		if (qm == 8) {
1563 			for (j = 0; j < range; ++j)
1564 				gen_qm8_llr(llrs, j, N0, llr_max);
1565 		} else if (qm == 6) {
1566 			for (j = 0; j < range; ++j)
1567 				gen_qm6_llr(llrs, j, N0, llr_max);
1568 		} else if (qm == 4) {
1569 			for (j = 0; j < range; ++j)
1570 				gen_qm4_llr(llrs, j, N0, llr_max);
1571 		} else {
1572 			for (j = 0; j < e; ++j)
1573 				gen_qm2_llr(llrs, j, N0, llr_max);
1574 		}
1575 	}
1576 }
1577 
1578 static void
1579 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1580 		unsigned int start_idx,
1581 		struct rte_bbdev_op_data *inputs,
1582 		struct rte_bbdev_op_data *hard_outputs,
1583 		struct rte_bbdev_op_data *soft_outputs,
1584 		struct rte_bbdev_op_data *harq_inputs,
1585 		struct rte_bbdev_op_data *harq_outputs,
1586 		struct rte_bbdev_dec_op *ref_op)
1587 {
1588 	unsigned int i;
1589 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1590 
1591 	for (i = 0; i < n; ++i) {
1592 		if (ldpc_dec->code_block_mode == 0) {
1593 			ops[i]->ldpc_dec.tb_params.ea =
1594 					ldpc_dec->tb_params.ea;
1595 			ops[i]->ldpc_dec.tb_params.eb =
1596 					ldpc_dec->tb_params.eb;
1597 			ops[i]->ldpc_dec.tb_params.c =
1598 					ldpc_dec->tb_params.c;
1599 			ops[i]->ldpc_dec.tb_params.cab =
1600 					ldpc_dec->tb_params.cab;
1601 			ops[i]->ldpc_dec.tb_params.r =
1602 					ldpc_dec->tb_params.r;
1603 		} else {
1604 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1605 		}
1606 
1607 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1608 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1609 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1610 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1611 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1612 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1613 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1614 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1615 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1616 
1617 		if (hard_outputs != NULL)
1618 			ops[i]->ldpc_dec.hard_output =
1619 					hard_outputs[start_idx + i];
1620 		if (inputs != NULL)
1621 			ops[i]->ldpc_dec.input =
1622 					inputs[start_idx + i];
1623 		if (soft_outputs != NULL)
1624 			ops[i]->ldpc_dec.soft_output =
1625 					soft_outputs[start_idx + i];
1626 		if (harq_inputs != NULL)
1627 			ops[i]->ldpc_dec.harq_combined_input =
1628 					harq_inputs[start_idx + i];
1629 		if (harq_outputs != NULL)
1630 			ops[i]->ldpc_dec.harq_combined_output =
1631 					harq_outputs[start_idx + i];
1632 	}
1633 }
1634 
1635 
1636 static void
1637 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1638 		unsigned int start_idx,
1639 		struct rte_bbdev_op_data *inputs,
1640 		struct rte_bbdev_op_data *outputs,
1641 		struct rte_bbdev_enc_op *ref_op)
1642 {
1643 	unsigned int i;
1644 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1645 	for (i = 0; i < n; ++i) {
1646 		if (ldpc_enc->code_block_mode == 0) {
1647 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1648 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1649 			ops[i]->ldpc_enc.tb_params.cab =
1650 					ldpc_enc->tb_params.cab;
1651 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1652 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1653 		} else {
1654 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1655 		}
1656 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1657 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1658 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1659 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1660 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1661 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1662 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1663 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1664 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1665 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1666 	}
1667 }
1668 
1669 static int
1670 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1671 		unsigned int order_idx, const int expected_status)
1672 {
1673 	int status = op->status;
1674 	/* ignore parity mismatch false alarms for long iterations */
1675 	if (get_iter_max() >= 10) {
1676 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1677 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1678 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1679 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1680 		}
1681 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1682 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1683 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1684 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1685 		}
1686 	}
1687 
1688 	TEST_ASSERT(status == expected_status,
1689 			"op_status (%d) != expected_status (%d)",
1690 			op->status, expected_status);
1691 
1692 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1693 			"Ordering error, expected %p, got %p",
1694 			(void *)(uintptr_t)order_idx, op->opaque_data);
1695 
1696 	return TEST_SUCCESS;
1697 }
1698 
1699 static int
1700 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1701 		unsigned int order_idx, const int expected_status)
1702 {
1703 	TEST_ASSERT(op->status == expected_status,
1704 			"op_status (%d) != expected_status (%d)",
1705 			op->status, expected_status);
1706 
1707 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1708 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1709 				"Ordering error, expected %p, got %p",
1710 				(void *)(uintptr_t)order_idx, op->opaque_data);
1711 
1712 	return TEST_SUCCESS;
1713 }
1714 
1715 static inline int
1716 validate_op_chain(struct rte_bbdev_op_data *op,
1717 		struct op_data_entries *orig_op)
1718 {
1719 	uint8_t i;
1720 	struct rte_mbuf *m = op->data;
1721 	uint8_t nb_dst_segments = orig_op->nb_segments;
1722 	uint32_t total_data_size = 0;
1723 
1724 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1725 			"Number of segments differ in original (%u) and filled (%u) op",
1726 			nb_dst_segments, m->nb_segs);
1727 
1728 	/* Validate each mbuf segment length */
1729 	for (i = 0; i < nb_dst_segments; ++i) {
1730 		/* Apply offset to the first mbuf segment */
1731 		uint16_t offset = (i == 0) ? op->offset : 0;
1732 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1733 		total_data_size += orig_op->segments[i].length;
1734 
1735 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1736 				"Length of segment differ in original (%u) and filled (%u) op",
1737 				orig_op->segments[i].length, data_len);
1738 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1739 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1740 				data_len,
1741 				"Output buffers (CB=%u) are not equal", i);
1742 		m = m->next;
1743 	}
1744 
1745 	/* Validate total mbuf pkt length */
1746 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1747 	TEST_ASSERT(total_data_size == pkt_len,
1748 			"Length of data differ in original (%u) and filled (%u) op",
1749 			total_data_size, pkt_len);
1750 
1751 	return TEST_SUCCESS;
1752 }
1753 
1754 /*
1755  * Compute K0 for a given configuration for HARQ output length computation
1756  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1757  */
1758 static inline uint16_t
1759 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1760 {
1761 	if (rv_index == 0)
1762 		return 0;
1763 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1764 	if (n_cb == n) {
1765 		if (rv_index == 1)
1766 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1767 		else if (rv_index == 2)
1768 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1769 		else
1770 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1771 	}
1772 	/* LBRM case - includes a division by N */
1773 	if (rv_index == 1)
1774 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1775 				/ n) * z_c;
1776 	else if (rv_index == 2)
1777 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1778 				/ n) * z_c;
1779 	else
1780 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1781 				/ n) * z_c;
1782 }
1783 
1784 /* HARQ output length including the Filler bits */
1785 static inline uint16_t
1786 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1787 {
1788 	uint16_t k0 = 0;
1789 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1790 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1791 	/* Compute RM out size and number of rows */
1792 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1793 			* ops_ld->z_c - ops_ld->n_filler;
1794 	uint16_t deRmOutSize = RTE_MIN(
1795 			k0 + ops_ld->cb_params.e +
1796 			((k0 > parity_offset) ?
1797 					0 : ops_ld->n_filler),
1798 					ops_ld->n_cb);
1799 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1800 			/ ops_ld->z_c);
1801 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1802 	return harq_output_len;
1803 }
1804 
1805 static inline int
1806 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1807 		struct op_data_entries *orig_op,
1808 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1809 {
1810 	uint8_t i;
1811 	uint32_t j, jj, k;
1812 	struct rte_mbuf *m = op->data;
1813 	uint8_t nb_dst_segments = orig_op->nb_segments;
1814 	uint32_t total_data_size = 0;
1815 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1816 	uint32_t byte_error = 0, cum_error = 0, error;
1817 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1818 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1819 	uint16_t parity_offset;
1820 
1821 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1822 			"Number of segments differ in original (%u) and filled (%u) op",
1823 			nb_dst_segments, m->nb_segs);
1824 
1825 	/* Validate each mbuf segment length */
1826 	for (i = 0; i < nb_dst_segments; ++i) {
1827 		/* Apply offset to the first mbuf segment */
1828 		uint16_t offset = (i == 0) ? op->offset : 0;
1829 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1830 		total_data_size += orig_op->segments[i].length;
1831 
1832 		TEST_ASSERT(orig_op->segments[i].length <
1833 				(uint32_t)(data_len + 64),
1834 				"Length of segment differ in original (%u) and filled (%u) op",
1835 				orig_op->segments[i].length, data_len);
1836 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1837 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1838 
1839 		if (!(ldpc_cap_flags &
1840 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1841 				) || (ops_ld->op_flags &
1842 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1843 			data_len -= ops_ld->z_c;
1844 			parity_offset = data_len;
1845 		} else {
1846 			/* Compute RM out size and number of rows */
1847 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1848 					* ops_ld->z_c - ops_ld->n_filler;
1849 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1850 					ops_ld->n_filler;
1851 			if (data_len > deRmOutSize)
1852 				data_len = deRmOutSize;
1853 			if (data_len > orig_op->segments[i].length)
1854 				data_len = orig_op->segments[i].length;
1855 		}
1856 		/*
1857 		 * HARQ output can have minor differences
1858 		 * due to integer representation and related scaling
1859 		 */
1860 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1861 			if (j == parity_offset) {
1862 				/* Special Handling of the filler bits */
1863 				for (k = 0; k < ops_ld->n_filler; k++) {
1864 					if (harq_out[jj] !=
1865 							llr_max_pre_scaling) {
1866 						printf("HARQ Filler issue %d: %d %d\n",
1867 							jj, harq_out[jj],
1868 							llr_max);
1869 						byte_error++;
1870 					}
1871 					jj++;
1872 				}
1873 			}
1874 			if (!(ops_ld->op_flags &
1875 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1876 				if (ldpc_llr_decimals > 1)
1877 					harq_out[jj] = (harq_out[jj] + 1)
1878 						>> (ldpc_llr_decimals - 1);
1879 				/* Saturated to S7 */
1880 				if (harq_orig[j] > llr_max)
1881 					harq_orig[j] = llr_max;
1882 				if (harq_orig[j] < -llr_max)
1883 					harq_orig[j] = -llr_max;
1884 			}
1885 			if (harq_orig[j] != harq_out[jj]) {
1886 				error = (harq_orig[j] > harq_out[jj]) ?
1887 						harq_orig[j] - harq_out[jj] :
1888 						harq_out[jj] - harq_orig[j];
1889 				abs_harq_origin = harq_orig[j] > 0 ?
1890 							harq_orig[j] :
1891 							-harq_orig[j];
1892 				/* Residual quantization error */
1893 				if ((error > 8 && (abs_harq_origin <
1894 						(llr_max - 16))) ||
1895 						(error > 16)) {
1896 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1897 							j, harq_orig[j],
1898 							harq_out[jj], error);
1899 					byte_error++;
1900 					cum_error += error;
1901 				}
1902 			}
1903 		}
1904 		m = m->next;
1905 	}
1906 
1907 	if (byte_error)
1908 		TEST_ASSERT(byte_error <= 1,
1909 				"HARQ output mismatch (%d) %d",
1910 				byte_error, cum_error);
1911 
1912 	/* Validate total mbuf pkt length */
1913 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1914 	TEST_ASSERT(total_data_size < pkt_len + 64,
1915 			"Length of data differ in original (%u) and filled (%u) op",
1916 			total_data_size, pkt_len);
1917 
1918 	return TEST_SUCCESS;
1919 }
1920 
1921 static int
1922 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1923 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1924 {
1925 	unsigned int i;
1926 	int ret;
1927 	struct op_data_entries *hard_data_orig =
1928 			&test_vector.entries[DATA_HARD_OUTPUT];
1929 	struct op_data_entries *soft_data_orig =
1930 			&test_vector.entries[DATA_SOFT_OUTPUT];
1931 	struct rte_bbdev_op_turbo_dec *ops_td;
1932 	struct rte_bbdev_op_data *hard_output;
1933 	struct rte_bbdev_op_data *soft_output;
1934 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1935 
1936 	for (i = 0; i < n; ++i) {
1937 		ops_td = &ops[i]->turbo_dec;
1938 		hard_output = &ops_td->hard_output;
1939 		soft_output = &ops_td->soft_output;
1940 
1941 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1942 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1943 					"Returned iter_count (%d) > expected iter_count (%d)",
1944 					ops_td->iter_count, ref_td->iter_count);
1945 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1946 		TEST_ASSERT_SUCCESS(ret,
1947 				"Checking status and ordering for decoder failed");
1948 
1949 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1950 				hard_data_orig),
1951 				"Hard output buffers (CB=%u) are not equal",
1952 				i);
1953 
1954 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1955 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1956 					soft_data_orig),
1957 					"Soft output buffers (CB=%u) are not equal",
1958 					i);
1959 	}
1960 
1961 	return TEST_SUCCESS;
1962 }
1963 
1964 /* Check Number of code blocks errors */
1965 static int
1966 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
1967 {
1968 	unsigned int i;
1969 	struct op_data_entries *hard_data_orig =
1970 			&test_vector.entries[DATA_HARD_OUTPUT];
1971 	struct rte_bbdev_op_ldpc_dec *ops_td;
1972 	struct rte_bbdev_op_data *hard_output;
1973 	int errors = 0;
1974 	struct rte_mbuf *m;
1975 
1976 	for (i = 0; i < n; ++i) {
1977 		ops_td = &ops[i]->ldpc_dec;
1978 		hard_output = &ops_td->hard_output;
1979 		m = hard_output->data;
1980 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
1981 				hard_data_orig->segments[0].addr,
1982 				hard_data_orig->segments[0].length))
1983 			errors++;
1984 	}
1985 	return errors;
1986 }
1987 
1988 static int
1989 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1990 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1991 {
1992 	unsigned int i;
1993 	int ret;
1994 	struct op_data_entries *hard_data_orig =
1995 			&test_vector.entries[DATA_HARD_OUTPUT];
1996 	struct op_data_entries *soft_data_orig =
1997 			&test_vector.entries[DATA_SOFT_OUTPUT];
1998 	struct op_data_entries *harq_data_orig =
1999 				&test_vector.entries[DATA_HARQ_OUTPUT];
2000 	struct rte_bbdev_op_ldpc_dec *ops_td;
2001 	struct rte_bbdev_op_data *hard_output;
2002 	struct rte_bbdev_op_data *harq_output;
2003 	struct rte_bbdev_op_data *soft_output;
2004 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2005 
2006 	for (i = 0; i < n; ++i) {
2007 		ops_td = &ops[i]->ldpc_dec;
2008 		hard_output = &ops_td->hard_output;
2009 		harq_output = &ops_td->harq_combined_output;
2010 		soft_output = &ops_td->soft_output;
2011 
2012 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2013 		TEST_ASSERT_SUCCESS(ret,
2014 				"Checking status and ordering for decoder failed");
2015 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2016 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2017 					"Returned iter_count (%d) > expected iter_count (%d)",
2018 					ops_td->iter_count, ref_td->iter_count);
2019 		/*
2020 		 * We can ignore output data when the decoding failed to
2021 		 * converge or for loop-back cases
2022 		 */
2023 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2024 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2025 				) && (
2026 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2027 						)) == 0)
2028 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2029 					hard_data_orig),
2030 					"Hard output buffers (CB=%u) are not equal",
2031 					i);
2032 
2033 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2034 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2035 					soft_data_orig),
2036 					"Soft output buffers (CB=%u) are not equal",
2037 					i);
2038 		if (ref_op->ldpc_dec.op_flags &
2039 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2040 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2041 					harq_data_orig, ops_td),
2042 					"HARQ output buffers (CB=%u) are not equal",
2043 					i);
2044 		}
2045 		if (ref_op->ldpc_dec.op_flags &
2046 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2047 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2048 					harq_data_orig, ops_td),
2049 					"HARQ output buffers (CB=%u) are not equal",
2050 					i);
2051 
2052 	}
2053 
2054 	return TEST_SUCCESS;
2055 }
2056 
2057 
2058 static int
2059 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2060 		struct rte_bbdev_enc_op *ref_op)
2061 {
2062 	unsigned int i;
2063 	int ret;
2064 	struct op_data_entries *hard_data_orig =
2065 			&test_vector.entries[DATA_HARD_OUTPUT];
2066 
2067 	for (i = 0; i < n; ++i) {
2068 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2069 		TEST_ASSERT_SUCCESS(ret,
2070 				"Checking status and ordering for encoder failed");
2071 		TEST_ASSERT_SUCCESS(validate_op_chain(
2072 				&ops[i]->turbo_enc.output,
2073 				hard_data_orig),
2074 				"Output buffers (CB=%u) are not equal",
2075 				i);
2076 	}
2077 
2078 	return TEST_SUCCESS;
2079 }
2080 
2081 static int
2082 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2083 		struct rte_bbdev_enc_op *ref_op)
2084 {
2085 	unsigned int i;
2086 	int ret;
2087 	struct op_data_entries *hard_data_orig =
2088 			&test_vector.entries[DATA_HARD_OUTPUT];
2089 
2090 	for (i = 0; i < n; ++i) {
2091 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2092 		TEST_ASSERT_SUCCESS(ret,
2093 				"Checking status and ordering for encoder failed");
2094 		TEST_ASSERT_SUCCESS(validate_op_chain(
2095 				&ops[i]->ldpc_enc.output,
2096 				hard_data_orig),
2097 				"Output buffers (CB=%u) are not equal",
2098 				i);
2099 	}
2100 
2101 	return TEST_SUCCESS;
2102 }
2103 
2104 static void
2105 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2106 {
2107 	unsigned int i;
2108 	struct op_data_entries *entry;
2109 
2110 	op->turbo_dec = test_vector.turbo_dec;
2111 	entry = &test_vector.entries[DATA_INPUT];
2112 	for (i = 0; i < entry->nb_segments; ++i)
2113 		op->turbo_dec.input.length +=
2114 				entry->segments[i].length;
2115 }
2116 
2117 static void
2118 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2119 {
2120 	unsigned int i;
2121 	struct op_data_entries *entry;
2122 
2123 	op->ldpc_dec = test_vector.ldpc_dec;
2124 	entry = &test_vector.entries[DATA_INPUT];
2125 	for (i = 0; i < entry->nb_segments; ++i)
2126 		op->ldpc_dec.input.length +=
2127 				entry->segments[i].length;
2128 	if (test_vector.ldpc_dec.op_flags &
2129 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2130 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2131 		for (i = 0; i < entry->nb_segments; ++i)
2132 			op->ldpc_dec.harq_combined_input.length +=
2133 				entry->segments[i].length;
2134 	}
2135 }
2136 
2137 
2138 static void
2139 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2140 {
2141 	unsigned int i;
2142 	struct op_data_entries *entry;
2143 
2144 	op->turbo_enc = test_vector.turbo_enc;
2145 	entry = &test_vector.entries[DATA_INPUT];
2146 	for (i = 0; i < entry->nb_segments; ++i)
2147 		op->turbo_enc.input.length +=
2148 				entry->segments[i].length;
2149 }
2150 
2151 static void
2152 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2153 {
2154 	unsigned int i;
2155 	struct op_data_entries *entry;
2156 
2157 	op->ldpc_enc = test_vector.ldpc_enc;
2158 	entry = &test_vector.entries[DATA_INPUT];
2159 	for (i = 0; i < entry->nb_segments; ++i)
2160 		op->ldpc_enc.input.length +=
2161 				entry->segments[i].length;
2162 }
2163 
2164 static uint32_t
2165 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2166 {
2167 	uint8_t i;
2168 	uint32_t c, r, tb_size = 0;
2169 
2170 	if (op->turbo_dec.code_block_mode) {
2171 		tb_size = op->turbo_dec.tb_params.k_neg;
2172 	} else {
2173 		c = op->turbo_dec.tb_params.c;
2174 		r = op->turbo_dec.tb_params.r;
2175 		for (i = 0; i < c-r; i++)
2176 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2177 				op->turbo_dec.tb_params.k_neg :
2178 				op->turbo_dec.tb_params.k_pos;
2179 	}
2180 	return tb_size;
2181 }
2182 
2183 static uint32_t
2184 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2185 {
2186 	uint8_t i;
2187 	uint32_t c, r, tb_size = 0;
2188 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2189 
2190 	if (op->ldpc_dec.code_block_mode) {
2191 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2192 	} else {
2193 		c = op->ldpc_dec.tb_params.c;
2194 		r = op->ldpc_dec.tb_params.r;
2195 		for (i = 0; i < c-r; i++)
2196 			tb_size += sys_cols * op->ldpc_dec.z_c
2197 					- op->ldpc_dec.n_filler;
2198 	}
2199 	return tb_size;
2200 }
2201 
2202 static uint32_t
2203 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2204 {
2205 	uint8_t i;
2206 	uint32_t c, r, tb_size = 0;
2207 
2208 	if (op->turbo_enc.code_block_mode) {
2209 		tb_size = op->turbo_enc.tb_params.k_neg;
2210 	} else {
2211 		c = op->turbo_enc.tb_params.c;
2212 		r = op->turbo_enc.tb_params.r;
2213 		for (i = 0; i < c-r; i++)
2214 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2215 				op->turbo_enc.tb_params.k_neg :
2216 				op->turbo_enc.tb_params.k_pos;
2217 	}
2218 	return tb_size;
2219 }
2220 
2221 static uint32_t
2222 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2223 {
2224 	uint8_t i;
2225 	uint32_t c, r, tb_size = 0;
2226 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2227 
2228 	if (op->turbo_enc.code_block_mode) {
2229 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2230 	} else {
2231 		c = op->turbo_enc.tb_params.c;
2232 		r = op->turbo_enc.tb_params.r;
2233 		for (i = 0; i < c-r; i++)
2234 			tb_size += sys_cols * op->ldpc_enc.z_c
2235 					- op->ldpc_enc.n_filler;
2236 	}
2237 	return tb_size;
2238 }
2239 
2240 
2241 static int
2242 init_test_op_params(struct test_op_params *op_params,
2243 		enum rte_bbdev_op_type op_type, const int expected_status,
2244 		const int vector_mask, struct rte_mempool *ops_mp,
2245 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2246 {
2247 	int ret = 0;
2248 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2249 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2250 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2251 				&op_params->ref_dec_op, 1);
2252 	else
2253 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2254 				&op_params->ref_enc_op, 1);
2255 
2256 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2257 
2258 	op_params->mp = ops_mp;
2259 	op_params->burst_sz = burst_sz;
2260 	op_params->num_to_process = num_to_process;
2261 	op_params->num_lcores = num_lcores;
2262 	op_params->vector_mask = vector_mask;
2263 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2264 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2265 		op_params->ref_dec_op->status = expected_status;
2266 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2267 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2268 		op_params->ref_enc_op->status = expected_status;
2269 	return 0;
2270 }
2271 
2272 static int
2273 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2274 		struct test_op_params *op_params)
2275 {
2276 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2277 	unsigned int i;
2278 	struct active_device *ad;
2279 	unsigned int burst_sz = get_burst_sz();
2280 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2281 	const struct rte_bbdev_op_cap *capabilities = NULL;
2282 
2283 	ad = &active_devs[dev_id];
2284 
2285 	/* Check if device supports op_type */
2286 	if (!is_avail_op(ad, test_vector.op_type))
2287 		return TEST_SUCCESS;
2288 
2289 	struct rte_bbdev_info info;
2290 	rte_bbdev_info_get(ad->dev_id, &info);
2291 	socket_id = GET_SOCKET(info.socket_id);
2292 
2293 	f_ret = create_mempools(ad, socket_id, op_type,
2294 			get_num_ops());
2295 	if (f_ret != TEST_SUCCESS) {
2296 		printf("Couldn't create mempools");
2297 		goto fail;
2298 	}
2299 	if (op_type == RTE_BBDEV_OP_NONE)
2300 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2301 
2302 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2303 			test_vector.expected_status,
2304 			test_vector.mask,
2305 			ad->ops_mempool,
2306 			burst_sz,
2307 			get_num_ops(),
2308 			get_num_lcores());
2309 	if (f_ret != TEST_SUCCESS) {
2310 		printf("Couldn't init test op params");
2311 		goto fail;
2312 	}
2313 
2314 
2315 	/* Find capabilities */
2316 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2317 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2318 		if (cap->type == test_vector.op_type) {
2319 			capabilities = cap;
2320 			break;
2321 		}
2322 		cap++;
2323 	}
2324 	TEST_ASSERT_NOT_NULL(capabilities,
2325 			"Couldn't find capabilities");
2326 
2327 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2328 		create_reference_dec_op(op_params->ref_dec_op);
2329 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2330 		create_reference_enc_op(op_params->ref_enc_op);
2331 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2332 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2333 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2334 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2335 
2336 	for (i = 0; i < ad->nb_queues; ++i) {
2337 		f_ret = fill_queue_buffers(op_params,
2338 				ad->in_mbuf_pool,
2339 				ad->hard_out_mbuf_pool,
2340 				ad->soft_out_mbuf_pool,
2341 				ad->harq_in_mbuf_pool,
2342 				ad->harq_out_mbuf_pool,
2343 				ad->queue_ids[i],
2344 				capabilities,
2345 				info.drv.min_alignment,
2346 				socket_id);
2347 		if (f_ret != TEST_SUCCESS) {
2348 			printf("Couldn't init queue buffers");
2349 			goto fail;
2350 		}
2351 	}
2352 
2353 	/* Run test case function */
2354 	t_ret = test_case_func(ad, op_params);
2355 
2356 	/* Free active device resources and return */
2357 	free_buffers(ad, op_params);
2358 	return t_ret;
2359 
2360 fail:
2361 	free_buffers(ad, op_params);
2362 	return TEST_FAILED;
2363 }
2364 
2365 /* Run given test function per active device per supported op type
2366  * per burst size.
2367  */
2368 static int
2369 run_test_case(test_case_function *test_case_func)
2370 {
2371 	int ret = 0;
2372 	uint8_t dev;
2373 
2374 	/* Alloc op_params */
2375 	struct test_op_params *op_params = rte_zmalloc(NULL,
2376 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2377 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2378 			RTE_ALIGN(sizeof(struct test_op_params),
2379 				RTE_CACHE_LINE_SIZE));
2380 
2381 	/* For each device run test case function */
2382 	for (dev = 0; dev < nb_active_devs; ++dev)
2383 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2384 
2385 	rte_free(op_params);
2386 
2387 	return ret;
2388 }
2389 
2390 
2391 /* Push back the HARQ output from DDR to host */
2392 static void
2393 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2394 		struct rte_bbdev_dec_op **ops,
2395 		const uint16_t n)
2396 {
2397 	uint16_t j;
2398 	int save_status, ret;
2399 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2400 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2401 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2402 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2403 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2404 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2405 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2406 	for (j = 0; j < n; ++j) {
2407 		if ((loopback && mem_out) || hc_out) {
2408 			save_status = ops[j]->status;
2409 			ops[j]->ldpc_dec.op_flags =
2410 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2411 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2412 			if (h_comp)
2413 				ops[j]->ldpc_dec.op_flags +=
2414 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2415 			ops[j]->ldpc_dec.harq_combined_input.offset =
2416 					harq_offset;
2417 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2418 			harq_offset += HARQ_INCR;
2419 			if (!loopback)
2420 				ops[j]->ldpc_dec.harq_combined_input.length =
2421 				ops[j]->ldpc_dec.harq_combined_output.length;
2422 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2423 					&ops[j], 1);
2424 			ret = 0;
2425 			while (ret == 0)
2426 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2427 						dev_id, queue_id,
2428 						&ops_deq[j], 1);
2429 			ops[j]->ldpc_dec.op_flags = flags;
2430 			ops[j]->status = save_status;
2431 		}
2432 	}
2433 }
2434 
2435 /*
2436  * Push back the HARQ output from HW DDR to Host
2437  * Preload HARQ memory input and adjust HARQ offset
2438  */
2439 static void
2440 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2441 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2442 		bool preload)
2443 {
2444 	uint16_t j;
2445 	int ret;
2446 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2447 	struct rte_bbdev_op_data save_hc_in, save_hc_out;
2448 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2449 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2450 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2451 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2452 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2453 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2454 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2455 	for (j = 0; j < n; ++j) {
2456 		if ((mem_in || hc_in) && preload) {
2457 			save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2458 			save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2459 			ops[j]->ldpc_dec.op_flags =
2460 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2461 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2462 			if (h_comp)
2463 				ops[j]->ldpc_dec.op_flags +=
2464 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2465 			ops[j]->ldpc_dec.harq_combined_output.offset =
2466 					harq_offset;
2467 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2468 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2469 					&ops[j], 1);
2470 			ret = 0;
2471 			while (ret == 0)
2472 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2473 					dev_id, queue_id, &ops_deq[j], 1);
2474 			ops[j]->ldpc_dec.op_flags = flags;
2475 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2476 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2477 		}
2478 		/* Adjust HARQ offset when we reach external DDR */
2479 		if (mem_in || hc_in)
2480 			ops[j]->ldpc_dec.harq_combined_input.offset
2481 				= harq_offset;
2482 		if (mem_out || hc_out)
2483 			ops[j]->ldpc_dec.harq_combined_output.offset
2484 				= harq_offset;
2485 		harq_offset += HARQ_INCR;
2486 	}
2487 }
2488 
2489 static void
2490 dequeue_event_callback(uint16_t dev_id,
2491 		enum rte_bbdev_event_type event, void *cb_arg,
2492 		void *ret_param)
2493 {
2494 	int ret;
2495 	uint16_t i;
2496 	uint64_t total_time;
2497 	uint16_t deq, burst_sz, num_ops;
2498 	uint16_t queue_id = *(uint16_t *) ret_param;
2499 	struct rte_bbdev_info info;
2500 	double tb_len_bits;
2501 	struct thread_params *tp = cb_arg;
2502 
2503 	/* Find matching thread params using queue_id */
2504 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2505 		if (tp->queue_id == queue_id)
2506 			break;
2507 
2508 	if (i == MAX_QUEUES) {
2509 		printf("%s: Queue_id from interrupt details was not found!\n",
2510 				__func__);
2511 		return;
2512 	}
2513 
2514 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2515 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2516 		printf(
2517 			"Dequeue interrupt handler called for incorrect event!\n");
2518 		return;
2519 	}
2520 
2521 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2522 	num_ops = tp->op_params->num_to_process;
2523 
2524 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2525 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2526 				&tp->dec_ops[
2527 					rte_atomic16_read(&tp->nb_dequeued)],
2528 				burst_sz);
2529 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2530 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2531 				&tp->dec_ops[
2532 					rte_atomic16_read(&tp->nb_dequeued)],
2533 				burst_sz);
2534 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2535 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2536 				&tp->enc_ops[
2537 					rte_atomic16_read(&tp->nb_dequeued)],
2538 				burst_sz);
2539 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2540 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2541 				&tp->enc_ops[
2542 					rte_atomic16_read(&tp->nb_dequeued)],
2543 				burst_sz);
2544 
2545 	if (deq < burst_sz) {
2546 		printf(
2547 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2548 			burst_sz, deq);
2549 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2550 		return;
2551 	}
2552 
2553 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2554 		rte_atomic16_add(&tp->nb_dequeued, deq);
2555 		return;
2556 	}
2557 
2558 	total_time = rte_rdtsc_precise() - tp->start_time;
2559 
2560 	rte_bbdev_info_get(dev_id, &info);
2561 
2562 	ret = TEST_SUCCESS;
2563 
2564 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2565 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2566 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2567 				tp->op_params->vector_mask);
2568 		/* get the max of iter_count for all dequeued ops */
2569 		for (i = 0; i < num_ops; ++i)
2570 			tp->iter_count = RTE_MAX(
2571 					tp->dec_ops[i]->turbo_dec.iter_count,
2572 					tp->iter_count);
2573 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2574 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2575 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2576 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2577 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2578 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2579 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2580 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2581 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2582 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2583 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2584 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2585 				tp->op_params->vector_mask);
2586 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2587 	}
2588 
2589 	if (ret) {
2590 		printf("Buffers validation failed\n");
2591 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2592 	}
2593 
2594 	switch (test_vector.op_type) {
2595 	case RTE_BBDEV_OP_TURBO_DEC:
2596 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2597 		break;
2598 	case RTE_BBDEV_OP_TURBO_ENC:
2599 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2600 		break;
2601 	case RTE_BBDEV_OP_LDPC_DEC:
2602 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2603 		break;
2604 	case RTE_BBDEV_OP_LDPC_ENC:
2605 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2606 		break;
2607 	case RTE_BBDEV_OP_NONE:
2608 		tb_len_bits = 0.0;
2609 		break;
2610 	default:
2611 		printf("Unknown op type: %d\n", test_vector.op_type);
2612 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2613 		return;
2614 	}
2615 
2616 	tp->ops_per_sec += ((double)num_ops) /
2617 			((double)total_time / (double)rte_get_tsc_hz());
2618 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2619 			((double)total_time / (double)rte_get_tsc_hz());
2620 
2621 	rte_atomic16_add(&tp->nb_dequeued, deq);
2622 }
2623 
2624 static int
2625 throughput_intr_lcore_ldpc_dec(void *arg)
2626 {
2627 	struct thread_params *tp = arg;
2628 	unsigned int enqueued;
2629 	const uint16_t queue_id = tp->queue_id;
2630 	const uint16_t burst_sz = tp->op_params->burst_sz;
2631 	const uint16_t num_to_process = tp->op_params->num_to_process;
2632 	struct rte_bbdev_dec_op *ops[num_to_process];
2633 	struct test_buffers *bufs = NULL;
2634 	struct rte_bbdev_info info;
2635 	int ret, i, j;
2636 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2637 	uint16_t num_to_enq, enq;
2638 
2639 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2640 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2641 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2642 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2643 
2644 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2645 			"BURST_SIZE should be <= %u", MAX_BURST);
2646 
2647 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2648 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2649 			tp->dev_id, queue_id);
2650 
2651 	rte_bbdev_info_get(tp->dev_id, &info);
2652 
2653 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2654 			"NUM_OPS cannot exceed %u for this device",
2655 			info.drv.queue_size_lim);
2656 
2657 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2658 
2659 	rte_atomic16_clear(&tp->processing_status);
2660 	rte_atomic16_clear(&tp->nb_dequeued);
2661 
2662 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2663 		rte_pause();
2664 
2665 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2666 				num_to_process);
2667 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2668 			num_to_process);
2669 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2670 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2671 				bufs->hard_outputs, bufs->soft_outputs,
2672 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2673 
2674 	/* Set counter to validate the ordering */
2675 	for (j = 0; j < num_to_process; ++j)
2676 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2677 
2678 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2679 		for (i = 0; i < num_to_process; ++i) {
2680 			if (!loopback)
2681 				rte_pktmbuf_reset(
2682 					ops[i]->ldpc_dec.hard_output.data);
2683 			if (hc_out || loopback)
2684 				mbuf_reset(
2685 				ops[i]->ldpc_dec.harq_combined_output.data);
2686 		}
2687 
2688 		tp->start_time = rte_rdtsc_precise();
2689 		for (enqueued = 0; enqueued < num_to_process;) {
2690 			num_to_enq = burst_sz;
2691 
2692 			if (unlikely(num_to_process - enqueued < num_to_enq))
2693 				num_to_enq = num_to_process - enqueued;
2694 
2695 			enq = 0;
2696 			do {
2697 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2698 						tp->dev_id,
2699 						queue_id, &ops[enqueued],
2700 						num_to_enq);
2701 			} while (unlikely(num_to_enq != enq));
2702 			enqueued += enq;
2703 
2704 			/* Write to thread burst_sz current number of enqueued
2705 			 * descriptors. It ensures that proper number of
2706 			 * descriptors will be dequeued in callback
2707 			 * function - needed for last batch in case where
2708 			 * the number of operations is not a multiple of
2709 			 * burst size.
2710 			 */
2711 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2712 
2713 			/* Wait until processing of previous batch is
2714 			 * completed
2715 			 */
2716 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2717 					(int16_t) enqueued)
2718 				rte_pause();
2719 		}
2720 		if (j != TEST_REPETITIONS - 1)
2721 			rte_atomic16_clear(&tp->nb_dequeued);
2722 	}
2723 
2724 	return TEST_SUCCESS;
2725 }
2726 
2727 static int
2728 throughput_intr_lcore_dec(void *arg)
2729 {
2730 	struct thread_params *tp = arg;
2731 	unsigned int enqueued;
2732 	const uint16_t queue_id = tp->queue_id;
2733 	const uint16_t burst_sz = tp->op_params->burst_sz;
2734 	const uint16_t num_to_process = tp->op_params->num_to_process;
2735 	struct rte_bbdev_dec_op *ops[num_to_process];
2736 	struct test_buffers *bufs = NULL;
2737 	struct rte_bbdev_info info;
2738 	int ret, i, j;
2739 	uint16_t num_to_enq, enq;
2740 
2741 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2742 			"BURST_SIZE should be <= %u", MAX_BURST);
2743 
2744 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2745 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2746 			tp->dev_id, queue_id);
2747 
2748 	rte_bbdev_info_get(tp->dev_id, &info);
2749 
2750 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2751 			"NUM_OPS cannot exceed %u for this device",
2752 			info.drv.queue_size_lim);
2753 
2754 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2755 
2756 	rte_atomic16_clear(&tp->processing_status);
2757 	rte_atomic16_clear(&tp->nb_dequeued);
2758 
2759 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2760 		rte_pause();
2761 
2762 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2763 				num_to_process);
2764 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2765 			num_to_process);
2766 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2767 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2768 				bufs->hard_outputs, bufs->soft_outputs,
2769 				tp->op_params->ref_dec_op);
2770 
2771 	/* Set counter to validate the ordering */
2772 	for (j = 0; j < num_to_process; ++j)
2773 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2774 
2775 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2776 		for (i = 0; i < num_to_process; ++i)
2777 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2778 
2779 		tp->start_time = rte_rdtsc_precise();
2780 		for (enqueued = 0; enqueued < num_to_process;) {
2781 			num_to_enq = burst_sz;
2782 
2783 			if (unlikely(num_to_process - enqueued < num_to_enq))
2784 				num_to_enq = num_to_process - enqueued;
2785 
2786 			enq = 0;
2787 			do {
2788 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2789 						queue_id, &ops[enqueued],
2790 						num_to_enq);
2791 			} while (unlikely(num_to_enq != enq));
2792 			enqueued += enq;
2793 
2794 			/* Write to thread burst_sz current number of enqueued
2795 			 * descriptors. It ensures that proper number of
2796 			 * descriptors will be dequeued in callback
2797 			 * function - needed for last batch in case where
2798 			 * the number of operations is not a multiple of
2799 			 * burst size.
2800 			 */
2801 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2802 
2803 			/* Wait until processing of previous batch is
2804 			 * completed
2805 			 */
2806 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2807 					(int16_t) enqueued)
2808 				rte_pause();
2809 		}
2810 		if (j != TEST_REPETITIONS - 1)
2811 			rte_atomic16_clear(&tp->nb_dequeued);
2812 	}
2813 
2814 	return TEST_SUCCESS;
2815 }
2816 
2817 static int
2818 throughput_intr_lcore_enc(void *arg)
2819 {
2820 	struct thread_params *tp = arg;
2821 	unsigned int enqueued;
2822 	const uint16_t queue_id = tp->queue_id;
2823 	const uint16_t burst_sz = tp->op_params->burst_sz;
2824 	const uint16_t num_to_process = tp->op_params->num_to_process;
2825 	struct rte_bbdev_enc_op *ops[num_to_process];
2826 	struct test_buffers *bufs = NULL;
2827 	struct rte_bbdev_info info;
2828 	int ret, i, j;
2829 	uint16_t num_to_enq, enq;
2830 
2831 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2832 			"BURST_SIZE should be <= %u", MAX_BURST);
2833 
2834 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2835 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2836 			tp->dev_id, queue_id);
2837 
2838 	rte_bbdev_info_get(tp->dev_id, &info);
2839 
2840 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2841 			"NUM_OPS cannot exceed %u for this device",
2842 			info.drv.queue_size_lim);
2843 
2844 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2845 
2846 	rte_atomic16_clear(&tp->processing_status);
2847 	rte_atomic16_clear(&tp->nb_dequeued);
2848 
2849 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2850 		rte_pause();
2851 
2852 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2853 			num_to_process);
2854 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2855 			num_to_process);
2856 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2857 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2858 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2859 
2860 	/* Set counter to validate the ordering */
2861 	for (j = 0; j < num_to_process; ++j)
2862 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2863 
2864 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2865 		for (i = 0; i < num_to_process; ++i)
2866 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2867 
2868 		tp->start_time = rte_rdtsc_precise();
2869 		for (enqueued = 0; enqueued < num_to_process;) {
2870 			num_to_enq = burst_sz;
2871 
2872 			if (unlikely(num_to_process - enqueued < num_to_enq))
2873 				num_to_enq = num_to_process - enqueued;
2874 
2875 			enq = 0;
2876 			do {
2877 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2878 						queue_id, &ops[enqueued],
2879 						num_to_enq);
2880 			} while (unlikely(enq != num_to_enq));
2881 			enqueued += enq;
2882 
2883 			/* Write to thread burst_sz current number of enqueued
2884 			 * descriptors. It ensures that proper number of
2885 			 * descriptors will be dequeued in callback
2886 			 * function - needed for last batch in case where
2887 			 * the number of operations is not a multiple of
2888 			 * burst size.
2889 			 */
2890 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2891 
2892 			/* Wait until processing of previous batch is
2893 			 * completed
2894 			 */
2895 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2896 					(int16_t) enqueued)
2897 				rte_pause();
2898 		}
2899 		if (j != TEST_REPETITIONS - 1)
2900 			rte_atomic16_clear(&tp->nb_dequeued);
2901 	}
2902 
2903 	return TEST_SUCCESS;
2904 }
2905 
2906 
2907 static int
2908 throughput_intr_lcore_ldpc_enc(void *arg)
2909 {
2910 	struct thread_params *tp = arg;
2911 	unsigned int enqueued;
2912 	const uint16_t queue_id = tp->queue_id;
2913 	const uint16_t burst_sz = tp->op_params->burst_sz;
2914 	const uint16_t num_to_process = tp->op_params->num_to_process;
2915 	struct rte_bbdev_enc_op *ops[num_to_process];
2916 	struct test_buffers *bufs = NULL;
2917 	struct rte_bbdev_info info;
2918 	int ret, i, j;
2919 	uint16_t num_to_enq, enq;
2920 
2921 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2922 			"BURST_SIZE should be <= %u", MAX_BURST);
2923 
2924 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2925 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2926 			tp->dev_id, queue_id);
2927 
2928 	rte_bbdev_info_get(tp->dev_id, &info);
2929 
2930 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2931 			"NUM_OPS cannot exceed %u for this device",
2932 			info.drv.queue_size_lim);
2933 
2934 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2935 
2936 	rte_atomic16_clear(&tp->processing_status);
2937 	rte_atomic16_clear(&tp->nb_dequeued);
2938 
2939 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2940 		rte_pause();
2941 
2942 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2943 			num_to_process);
2944 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2945 			num_to_process);
2946 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2947 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
2948 				bufs->inputs, bufs->hard_outputs,
2949 				tp->op_params->ref_enc_op);
2950 
2951 	/* Set counter to validate the ordering */
2952 	for (j = 0; j < num_to_process; ++j)
2953 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2954 
2955 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2956 		for (i = 0; i < num_to_process; ++i)
2957 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2958 
2959 		tp->start_time = rte_rdtsc_precise();
2960 		for (enqueued = 0; enqueued < num_to_process;) {
2961 			num_to_enq = burst_sz;
2962 
2963 			if (unlikely(num_to_process - enqueued < num_to_enq))
2964 				num_to_enq = num_to_process - enqueued;
2965 
2966 			enq = 0;
2967 			do {
2968 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
2969 						tp->dev_id,
2970 						queue_id, &ops[enqueued],
2971 						num_to_enq);
2972 			} while (unlikely(enq != num_to_enq));
2973 			enqueued += enq;
2974 
2975 			/* Write to thread burst_sz current number of enqueued
2976 			 * descriptors. It ensures that proper number of
2977 			 * descriptors will be dequeued in callback
2978 			 * function - needed for last batch in case where
2979 			 * the number of operations is not a multiple of
2980 			 * burst size.
2981 			 */
2982 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2983 
2984 			/* Wait until processing of previous batch is
2985 			 * completed
2986 			 */
2987 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2988 					(int16_t) enqueued)
2989 				rte_pause();
2990 		}
2991 		if (j != TEST_REPETITIONS - 1)
2992 			rte_atomic16_clear(&tp->nb_dequeued);
2993 	}
2994 
2995 	return TEST_SUCCESS;
2996 }
2997 
2998 static int
2999 throughput_pmd_lcore_dec(void *arg)
3000 {
3001 	struct thread_params *tp = arg;
3002 	uint16_t enq, deq;
3003 	uint64_t total_time = 0, start_time;
3004 	const uint16_t queue_id = tp->queue_id;
3005 	const uint16_t burst_sz = tp->op_params->burst_sz;
3006 	const uint16_t num_ops = tp->op_params->num_to_process;
3007 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3008 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3009 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3010 	struct test_buffers *bufs = NULL;
3011 	int i, j, ret;
3012 	struct rte_bbdev_info info;
3013 	uint16_t num_to_enq;
3014 
3015 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3016 			"BURST_SIZE should be <= %u", MAX_BURST);
3017 
3018 	rte_bbdev_info_get(tp->dev_id, &info);
3019 
3020 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3021 			"NUM_OPS cannot exceed %u for this device",
3022 			info.drv.queue_size_lim);
3023 
3024 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3025 
3026 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3027 		rte_pause();
3028 
3029 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3030 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3031 
3032 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3033 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3034 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3035 
3036 	/* Set counter to validate the ordering */
3037 	for (j = 0; j < num_ops; ++j)
3038 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3039 
3040 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3041 
3042 		for (j = 0; j < num_ops; ++j)
3043 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3044 
3045 		start_time = rte_rdtsc_precise();
3046 
3047 		for (enq = 0, deq = 0; enq < num_ops;) {
3048 			num_to_enq = burst_sz;
3049 
3050 			if (unlikely(num_ops - enq < num_to_enq))
3051 				num_to_enq = num_ops - enq;
3052 
3053 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3054 					queue_id, &ops_enq[enq], num_to_enq);
3055 
3056 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3057 					queue_id, &ops_deq[deq], enq - deq);
3058 		}
3059 
3060 		/* dequeue the remaining */
3061 		while (deq < enq) {
3062 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3063 					queue_id, &ops_deq[deq], enq - deq);
3064 		}
3065 
3066 		total_time += rte_rdtsc_precise() - start_time;
3067 	}
3068 
3069 	tp->iter_count = 0;
3070 	/* get the max of iter_count for all dequeued ops */
3071 	for (i = 0; i < num_ops; ++i) {
3072 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3073 				tp->iter_count);
3074 	}
3075 
3076 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3077 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3078 				tp->op_params->vector_mask);
3079 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3080 	}
3081 
3082 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3083 
3084 	double tb_len_bits = calc_dec_TB_size(ref_op);
3085 
3086 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3087 			((double)total_time / (double)rte_get_tsc_hz());
3088 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3089 			1000000.0) / ((double)total_time /
3090 			(double)rte_get_tsc_hz());
3091 
3092 	return TEST_SUCCESS;
3093 }
3094 
3095 static int
3096 bler_pmd_lcore_ldpc_dec(void *arg)
3097 {
3098 	struct thread_params *tp = arg;
3099 	uint16_t enq, deq;
3100 	uint64_t total_time = 0, start_time;
3101 	const uint16_t queue_id = tp->queue_id;
3102 	const uint16_t burst_sz = tp->op_params->burst_sz;
3103 	const uint16_t num_ops = tp->op_params->num_to_process;
3104 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3105 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3106 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3107 	struct test_buffers *bufs = NULL;
3108 	int i, j, ret;
3109 	float parity_bler = 0;
3110 	struct rte_bbdev_info info;
3111 	uint16_t num_to_enq;
3112 	bool extDdr = check_bit(ldpc_cap_flags,
3113 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3114 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3115 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3116 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3117 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3118 
3119 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3120 			"BURST_SIZE should be <= %u", MAX_BURST);
3121 
3122 	rte_bbdev_info_get(tp->dev_id, &info);
3123 
3124 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3125 			"NUM_OPS cannot exceed %u for this device",
3126 			info.drv.queue_size_lim);
3127 
3128 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3129 
3130 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3131 		rte_pause();
3132 
3133 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3134 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3135 
3136 	/* For BLER tests we need to enable early termination */
3137 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3138 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3139 		ref_op->ldpc_dec.op_flags +=
3140 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3141 	ref_op->ldpc_dec.iter_max = get_iter_max();
3142 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3143 
3144 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3145 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3146 				bufs->hard_outputs, bufs->soft_outputs,
3147 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3148 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3149 
3150 	/* Set counter to validate the ordering */
3151 	for (j = 0; j < num_ops; ++j)
3152 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3153 
3154 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3155 		for (j = 0; j < num_ops; ++j) {
3156 			if (!loopback)
3157 				mbuf_reset(
3158 				ops_enq[j]->ldpc_dec.hard_output.data);
3159 			if (hc_out || loopback)
3160 				mbuf_reset(
3161 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3162 		}
3163 		if (extDdr) {
3164 			bool preload = i == (TEST_REPETITIONS - 1);
3165 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3166 					num_ops, preload);
3167 		}
3168 		start_time = rte_rdtsc_precise();
3169 
3170 		for (enq = 0, deq = 0; enq < num_ops;) {
3171 			num_to_enq = burst_sz;
3172 
3173 			if (unlikely(num_ops - enq < num_to_enq))
3174 				num_to_enq = num_ops - enq;
3175 
3176 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3177 					queue_id, &ops_enq[enq], num_to_enq);
3178 
3179 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3180 					queue_id, &ops_deq[deq], enq - deq);
3181 		}
3182 
3183 		/* dequeue the remaining */
3184 		while (deq < enq) {
3185 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3186 					queue_id, &ops_deq[deq], enq - deq);
3187 		}
3188 
3189 		total_time += rte_rdtsc_precise() - start_time;
3190 	}
3191 
3192 	tp->iter_count = 0;
3193 	tp->iter_average = 0;
3194 	/* get the max of iter_count for all dequeued ops */
3195 	for (i = 0; i < num_ops; ++i) {
3196 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3197 				tp->iter_count);
3198 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3199 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3200 			parity_bler += 1.0;
3201 	}
3202 
3203 	parity_bler /= num_ops; /* This one is based on SYND */
3204 	tp->iter_average /= num_ops;
3205 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3206 
3207 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3208 			&& tp->bler == 0
3209 			&& parity_bler == 0
3210 			&& !hc_out) {
3211 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3212 				tp->op_params->vector_mask);
3213 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3214 	}
3215 
3216 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3217 
3218 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3219 	tp->ops_per_sec = ((double)num_ops * 1) /
3220 			((double)total_time / (double)rte_get_tsc_hz());
3221 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3222 			1000000.0) / ((double)total_time /
3223 			(double)rte_get_tsc_hz());
3224 
3225 	return TEST_SUCCESS;
3226 }
3227 
3228 static int
3229 throughput_pmd_lcore_ldpc_dec(void *arg)
3230 {
3231 	struct thread_params *tp = arg;
3232 	uint16_t enq, deq;
3233 	uint64_t total_time = 0, start_time;
3234 	const uint16_t queue_id = tp->queue_id;
3235 	const uint16_t burst_sz = tp->op_params->burst_sz;
3236 	const uint16_t num_ops = tp->op_params->num_to_process;
3237 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3238 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3239 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3240 	struct test_buffers *bufs = NULL;
3241 	int i, j, ret;
3242 	struct rte_bbdev_info info;
3243 	uint16_t num_to_enq;
3244 	bool extDdr = check_bit(ldpc_cap_flags,
3245 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3246 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3247 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3248 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3249 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3250 
3251 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3252 			"BURST_SIZE should be <= %u", MAX_BURST);
3253 
3254 	rte_bbdev_info_get(tp->dev_id, &info);
3255 
3256 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3257 			"NUM_OPS cannot exceed %u for this device",
3258 			info.drv.queue_size_lim);
3259 
3260 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3261 
3262 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3263 		rte_pause();
3264 
3265 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3266 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3267 
3268 	/* For throughput tests we need to disable early termination */
3269 	if (check_bit(ref_op->ldpc_dec.op_flags,
3270 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3271 		ref_op->ldpc_dec.op_flags -=
3272 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3273 	ref_op->ldpc_dec.iter_max = get_iter_max();
3274 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3275 
3276 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3277 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3278 				bufs->hard_outputs, bufs->soft_outputs,
3279 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3280 
3281 	/* Set counter to validate the ordering */
3282 	for (j = 0; j < num_ops; ++j)
3283 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3284 
3285 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3286 		for (j = 0; j < num_ops; ++j) {
3287 			if (!loopback)
3288 				mbuf_reset(
3289 				ops_enq[j]->ldpc_dec.hard_output.data);
3290 			if (hc_out || loopback)
3291 				mbuf_reset(
3292 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3293 		}
3294 		if (extDdr) {
3295 			bool preload = i == (TEST_REPETITIONS - 1);
3296 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3297 					num_ops, preload);
3298 		}
3299 		start_time = rte_rdtsc_precise();
3300 
3301 		for (enq = 0, deq = 0; enq < num_ops;) {
3302 			num_to_enq = burst_sz;
3303 
3304 			if (unlikely(num_ops - enq < num_to_enq))
3305 				num_to_enq = num_ops - enq;
3306 
3307 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3308 					queue_id, &ops_enq[enq], num_to_enq);
3309 
3310 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3311 					queue_id, &ops_deq[deq], enq - deq);
3312 		}
3313 
3314 		/* dequeue the remaining */
3315 		while (deq < enq) {
3316 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3317 					queue_id, &ops_deq[deq], enq - deq);
3318 		}
3319 
3320 		total_time += rte_rdtsc_precise() - start_time;
3321 	}
3322 
3323 	tp->iter_count = 0;
3324 	/* get the max of iter_count for all dequeued ops */
3325 	for (i = 0; i < num_ops; ++i) {
3326 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3327 				tp->iter_count);
3328 	}
3329 	if (extDdr) {
3330 		/* Read loopback is not thread safe */
3331 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3332 	}
3333 
3334 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3335 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3336 				tp->op_params->vector_mask);
3337 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3338 	}
3339 
3340 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3341 
3342 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3343 
3344 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3345 			((double)total_time / (double)rte_get_tsc_hz());
3346 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3347 			1000000.0) / ((double)total_time /
3348 			(double)rte_get_tsc_hz());
3349 
3350 	return TEST_SUCCESS;
3351 }
3352 
3353 static int
3354 throughput_pmd_lcore_enc(void *arg)
3355 {
3356 	struct thread_params *tp = arg;
3357 	uint16_t enq, deq;
3358 	uint64_t total_time = 0, start_time;
3359 	const uint16_t queue_id = tp->queue_id;
3360 	const uint16_t burst_sz = tp->op_params->burst_sz;
3361 	const uint16_t num_ops = tp->op_params->num_to_process;
3362 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3363 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3364 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3365 	struct test_buffers *bufs = NULL;
3366 	int i, j, ret;
3367 	struct rte_bbdev_info info;
3368 	uint16_t num_to_enq;
3369 
3370 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3371 			"BURST_SIZE should be <= %u", MAX_BURST);
3372 
3373 	rte_bbdev_info_get(tp->dev_id, &info);
3374 
3375 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3376 			"NUM_OPS cannot exceed %u for this device",
3377 			info.drv.queue_size_lim);
3378 
3379 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3380 
3381 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3382 		rte_pause();
3383 
3384 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3385 			num_ops);
3386 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3387 			num_ops);
3388 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3389 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3390 				bufs->hard_outputs, ref_op);
3391 
3392 	/* Set counter to validate the ordering */
3393 	for (j = 0; j < num_ops; ++j)
3394 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3395 
3396 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3397 
3398 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3399 			for (j = 0; j < num_ops; ++j)
3400 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3401 
3402 		start_time = rte_rdtsc_precise();
3403 
3404 		for (enq = 0, deq = 0; enq < num_ops;) {
3405 			num_to_enq = burst_sz;
3406 
3407 			if (unlikely(num_ops - enq < num_to_enq))
3408 				num_to_enq = num_ops - enq;
3409 
3410 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3411 					queue_id, &ops_enq[enq], num_to_enq);
3412 
3413 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3414 					queue_id, &ops_deq[deq], enq - deq);
3415 		}
3416 
3417 		/* dequeue the remaining */
3418 		while (deq < enq) {
3419 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3420 					queue_id, &ops_deq[deq], enq - deq);
3421 		}
3422 
3423 		total_time += rte_rdtsc_precise() - start_time;
3424 	}
3425 
3426 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3427 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3428 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3429 	}
3430 
3431 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3432 
3433 	double tb_len_bits = calc_enc_TB_size(ref_op);
3434 
3435 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3436 			((double)total_time / (double)rte_get_tsc_hz());
3437 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3438 			/ 1000000.0) / ((double)total_time /
3439 			(double)rte_get_tsc_hz());
3440 
3441 	return TEST_SUCCESS;
3442 }
3443 
3444 static int
3445 throughput_pmd_lcore_ldpc_enc(void *arg)
3446 {
3447 	struct thread_params *tp = arg;
3448 	uint16_t enq, deq;
3449 	uint64_t total_time = 0, start_time;
3450 	const uint16_t queue_id = tp->queue_id;
3451 	const uint16_t burst_sz = tp->op_params->burst_sz;
3452 	const uint16_t num_ops = tp->op_params->num_to_process;
3453 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3454 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3455 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3456 	struct test_buffers *bufs = NULL;
3457 	int i, j, ret;
3458 	struct rte_bbdev_info info;
3459 	uint16_t num_to_enq;
3460 
3461 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3462 			"BURST_SIZE should be <= %u", MAX_BURST);
3463 
3464 	rte_bbdev_info_get(tp->dev_id, &info);
3465 
3466 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3467 			"NUM_OPS cannot exceed %u for this device",
3468 			info.drv.queue_size_lim);
3469 
3470 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3471 
3472 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3473 		rte_pause();
3474 
3475 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3476 			num_ops);
3477 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3478 			num_ops);
3479 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3480 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3481 				bufs->hard_outputs, ref_op);
3482 
3483 	/* Set counter to validate the ordering */
3484 	for (j = 0; j < num_ops; ++j)
3485 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3486 
3487 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3488 
3489 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3490 			for (j = 0; j < num_ops; ++j)
3491 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3492 
3493 		start_time = rte_rdtsc_precise();
3494 
3495 		for (enq = 0, deq = 0; enq < num_ops;) {
3496 			num_to_enq = burst_sz;
3497 
3498 			if (unlikely(num_ops - enq < num_to_enq))
3499 				num_to_enq = num_ops - enq;
3500 
3501 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3502 					queue_id, &ops_enq[enq], num_to_enq);
3503 
3504 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3505 					queue_id, &ops_deq[deq], enq - deq);
3506 		}
3507 
3508 		/* dequeue the remaining */
3509 		while (deq < enq) {
3510 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3511 					queue_id, &ops_deq[deq], enq - deq);
3512 		}
3513 
3514 		total_time += rte_rdtsc_precise() - start_time;
3515 	}
3516 
3517 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3518 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3519 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3520 	}
3521 
3522 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3523 
3524 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3525 
3526 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3527 			((double)total_time / (double)rte_get_tsc_hz());
3528 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3529 			/ 1000000.0) / ((double)total_time /
3530 			(double)rte_get_tsc_hz());
3531 
3532 	return TEST_SUCCESS;
3533 }
3534 
3535 static void
3536 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3537 {
3538 	unsigned int iter = 0;
3539 	double total_mops = 0, total_mbps = 0;
3540 
3541 	for (iter = 0; iter < used_cores; iter++) {
3542 		printf(
3543 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3544 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3545 			t_params[iter].mbps);
3546 		total_mops += t_params[iter].ops_per_sec;
3547 		total_mbps += t_params[iter].mbps;
3548 	}
3549 	printf(
3550 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3551 		used_cores, total_mops, total_mbps);
3552 }
3553 
3554 /* Aggregate the performance results over the number of cores used */
3555 static void
3556 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3557 {
3558 	unsigned int core_idx = 0;
3559 	double total_mops = 0, total_mbps = 0;
3560 	uint8_t iter_count = 0;
3561 
3562 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3563 		printf(
3564 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3565 			t_params[core_idx].lcore_id,
3566 			t_params[core_idx].ops_per_sec,
3567 			t_params[core_idx].mbps,
3568 			t_params[core_idx].iter_count);
3569 		total_mops += t_params[core_idx].ops_per_sec;
3570 		total_mbps += t_params[core_idx].mbps;
3571 		iter_count = RTE_MAX(iter_count,
3572 				t_params[core_idx].iter_count);
3573 	}
3574 	printf(
3575 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3576 		used_cores, total_mops, total_mbps, iter_count);
3577 }
3578 
3579 /* Aggregate the performance results over the number of cores used */
3580 static void
3581 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3582 {
3583 	unsigned int core_idx = 0;
3584 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3585 	double snr = get_snr();
3586 
3587 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3588 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3589 				t_params[core_idx].lcore_id,
3590 				t_params[core_idx].bler * 100,
3591 				t_params[core_idx].iter_average,
3592 				t_params[core_idx].mbps,
3593 				get_vector_filename());
3594 		total_mbps += t_params[core_idx].mbps;
3595 		total_bler += t_params[core_idx].bler;
3596 		total_iter += t_params[core_idx].iter_average;
3597 	}
3598 	total_bler /= used_cores;
3599 	total_iter /= used_cores;
3600 
3601 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3602 			snr, total_bler * 100, total_iter, get_iter_max(),
3603 			total_mbps, get_vector_filename());
3604 }
3605 
3606 /*
3607  * Test function that determines BLER wireless performance
3608  */
3609 static int
3610 bler_test(struct active_device *ad,
3611 		struct test_op_params *op_params)
3612 {
3613 	int ret;
3614 	unsigned int lcore_id, used_cores = 0;
3615 	struct thread_params *t_params;
3616 	struct rte_bbdev_info info;
3617 	lcore_function_t *bler_function;
3618 	uint16_t num_lcores;
3619 	const char *op_type_str;
3620 
3621 	rte_bbdev_info_get(ad->dev_id, &info);
3622 
3623 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3624 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3625 			test_vector.op_type);
3626 
3627 	printf("+ ------------------------------------------------------- +\n");
3628 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3629 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3630 			op_params->num_to_process, op_params->num_lcores,
3631 			op_type_str,
3632 			intr_enabled ? "Interrupt mode" : "PMD mode",
3633 			(double)rte_get_tsc_hz() / 1000000000.0);
3634 
3635 	/* Set number of lcores */
3636 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3637 			? ad->nb_queues
3638 			: op_params->num_lcores;
3639 
3640 	/* Allocate memory for thread parameters structure */
3641 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3642 			RTE_CACHE_LINE_SIZE);
3643 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3644 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3645 				RTE_CACHE_LINE_SIZE));
3646 
3647 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3648 		bler_function = bler_pmd_lcore_ldpc_dec;
3649 	else
3650 		return TEST_SKIPPED;
3651 
3652 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3653 
3654 	/* Master core is set at first entry */
3655 	t_params[0].dev_id = ad->dev_id;
3656 	t_params[0].lcore_id = rte_lcore_id();
3657 	t_params[0].op_params = op_params;
3658 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3659 	t_params[0].iter_count = 0;
3660 
3661 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3662 		if (used_cores >= num_lcores)
3663 			break;
3664 
3665 		t_params[used_cores].dev_id = ad->dev_id;
3666 		t_params[used_cores].lcore_id = lcore_id;
3667 		t_params[used_cores].op_params = op_params;
3668 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3669 		t_params[used_cores].iter_count = 0;
3670 
3671 		rte_eal_remote_launch(bler_function,
3672 				&t_params[used_cores++], lcore_id);
3673 	}
3674 
3675 	rte_atomic16_set(&op_params->sync, SYNC_START);
3676 	ret = bler_function(&t_params[0]);
3677 
3678 	/* Master core is always used */
3679 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3680 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3681 
3682 	print_dec_bler(t_params, num_lcores);
3683 
3684 	/* Return if test failed */
3685 	if (ret) {
3686 		rte_free(t_params);
3687 		return ret;
3688 	}
3689 
3690 	/* Function to print something  here*/
3691 	rte_free(t_params);
3692 	return ret;
3693 }
3694 
3695 /*
3696  * Test function that determines how long an enqueue + dequeue of a burst
3697  * takes on available lcores.
3698  */
3699 static int
3700 throughput_test(struct active_device *ad,
3701 		struct test_op_params *op_params)
3702 {
3703 	int ret;
3704 	unsigned int lcore_id, used_cores = 0;
3705 	struct thread_params *t_params, *tp;
3706 	struct rte_bbdev_info info;
3707 	lcore_function_t *throughput_function;
3708 	uint16_t num_lcores;
3709 	const char *op_type_str;
3710 
3711 	rte_bbdev_info_get(ad->dev_id, &info);
3712 
3713 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3714 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3715 			test_vector.op_type);
3716 
3717 	printf("+ ------------------------------------------------------- +\n");
3718 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3719 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3720 			op_params->num_to_process, op_params->num_lcores,
3721 			op_type_str,
3722 			intr_enabled ? "Interrupt mode" : "PMD mode",
3723 			(double)rte_get_tsc_hz() / 1000000000.0);
3724 
3725 	/* Set number of lcores */
3726 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3727 			? ad->nb_queues
3728 			: op_params->num_lcores;
3729 
3730 	/* Allocate memory for thread parameters structure */
3731 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3732 			RTE_CACHE_LINE_SIZE);
3733 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3734 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3735 				RTE_CACHE_LINE_SIZE));
3736 
3737 	if (intr_enabled) {
3738 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3739 			throughput_function = throughput_intr_lcore_dec;
3740 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3741 			throughput_function = throughput_intr_lcore_ldpc_dec;
3742 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3743 			throughput_function = throughput_intr_lcore_enc;
3744 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3745 			throughput_function = throughput_intr_lcore_ldpc_enc;
3746 		else
3747 			throughput_function = throughput_intr_lcore_enc;
3748 
3749 		/* Dequeue interrupt callback registration */
3750 		ret = rte_bbdev_callback_register(ad->dev_id,
3751 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3752 				t_params);
3753 		if (ret < 0) {
3754 			rte_free(t_params);
3755 			return ret;
3756 		}
3757 	} else {
3758 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3759 			throughput_function = throughput_pmd_lcore_dec;
3760 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3761 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3762 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3763 			throughput_function = throughput_pmd_lcore_enc;
3764 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3765 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3766 		else
3767 			throughput_function = throughput_pmd_lcore_enc;
3768 	}
3769 
3770 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3771 
3772 	/* Master core is set at first entry */
3773 	t_params[0].dev_id = ad->dev_id;
3774 	t_params[0].lcore_id = rte_lcore_id();
3775 	t_params[0].op_params = op_params;
3776 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3777 	t_params[0].iter_count = 0;
3778 
3779 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3780 		if (used_cores >= num_lcores)
3781 			break;
3782 
3783 		t_params[used_cores].dev_id = ad->dev_id;
3784 		t_params[used_cores].lcore_id = lcore_id;
3785 		t_params[used_cores].op_params = op_params;
3786 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3787 		t_params[used_cores].iter_count = 0;
3788 
3789 		rte_eal_remote_launch(throughput_function,
3790 				&t_params[used_cores++], lcore_id);
3791 	}
3792 
3793 	rte_atomic16_set(&op_params->sync, SYNC_START);
3794 	ret = throughput_function(&t_params[0]);
3795 
3796 	/* Master core is always used */
3797 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3798 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3799 
3800 	/* Return if test failed */
3801 	if (ret) {
3802 		rte_free(t_params);
3803 		return ret;
3804 	}
3805 
3806 	/* Print throughput if interrupts are disabled and test passed */
3807 	if (!intr_enabled) {
3808 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3809 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3810 			print_dec_throughput(t_params, num_lcores);
3811 		else
3812 			print_enc_throughput(t_params, num_lcores);
3813 		rte_free(t_params);
3814 		return ret;
3815 	}
3816 
3817 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3818 	 * all pending operations. Skip waiting for queues which reported an
3819 	 * error using processing_status variable.
3820 	 * Wait for master lcore operations.
3821 	 */
3822 	tp = &t_params[0];
3823 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3824 			op_params->num_to_process) &&
3825 			(rte_atomic16_read(&tp->processing_status) !=
3826 			TEST_FAILED))
3827 		rte_pause();
3828 
3829 	tp->ops_per_sec /= TEST_REPETITIONS;
3830 	tp->mbps /= TEST_REPETITIONS;
3831 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3832 
3833 	/* Wait for slave lcores operations */
3834 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3835 		tp = &t_params[used_cores];
3836 
3837 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3838 				op_params->num_to_process) &&
3839 				(rte_atomic16_read(&tp->processing_status) !=
3840 				TEST_FAILED))
3841 			rte_pause();
3842 
3843 		tp->ops_per_sec /= TEST_REPETITIONS;
3844 		tp->mbps /= TEST_REPETITIONS;
3845 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3846 	}
3847 
3848 	/* Print throughput if test passed */
3849 	if (!ret) {
3850 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3851 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3852 			print_dec_throughput(t_params, num_lcores);
3853 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3854 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3855 			print_enc_throughput(t_params, num_lcores);
3856 	}
3857 
3858 	rte_free(t_params);
3859 	return ret;
3860 }
3861 
3862 static int
3863 latency_test_dec(struct rte_mempool *mempool,
3864 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3865 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3866 		const uint16_t num_to_process, uint16_t burst_sz,
3867 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3868 {
3869 	int ret = TEST_SUCCESS;
3870 	uint16_t i, j, dequeued;
3871 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3872 	uint64_t start_time = 0, last_time = 0;
3873 
3874 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3875 		uint16_t enq = 0, deq = 0;
3876 		bool first_time = true;
3877 		last_time = 0;
3878 
3879 		if (unlikely(num_to_process - dequeued < burst_sz))
3880 			burst_sz = num_to_process - dequeued;
3881 
3882 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3883 		TEST_ASSERT_SUCCESS(ret,
3884 				"rte_bbdev_dec_op_alloc_bulk() failed");
3885 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3886 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3887 					bufs->inputs,
3888 					bufs->hard_outputs,
3889 					bufs->soft_outputs,
3890 					ref_op);
3891 
3892 		/* Set counter to validate the ordering */
3893 		for (j = 0; j < burst_sz; ++j)
3894 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3895 
3896 		start_time = rte_rdtsc_precise();
3897 
3898 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3899 				burst_sz);
3900 		TEST_ASSERT(enq == burst_sz,
3901 				"Error enqueueing burst, expected %u, got %u",
3902 				burst_sz, enq);
3903 
3904 		/* Dequeue */
3905 		do {
3906 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3907 					&ops_deq[deq], burst_sz - deq);
3908 			if (likely(first_time && (deq > 0))) {
3909 				last_time = rte_rdtsc_precise() - start_time;
3910 				first_time = false;
3911 			}
3912 		} while (unlikely(burst_sz != deq));
3913 
3914 		*max_time = RTE_MAX(*max_time, last_time);
3915 		*min_time = RTE_MIN(*min_time, last_time);
3916 		*total_time += last_time;
3917 
3918 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3919 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3920 					vector_mask);
3921 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3922 		}
3923 
3924 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3925 		dequeued += deq;
3926 	}
3927 
3928 	return i;
3929 }
3930 
3931 static int
3932 latency_test_ldpc_dec(struct rte_mempool *mempool,
3933 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3934 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3935 		const uint16_t num_to_process, uint16_t burst_sz,
3936 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3937 {
3938 	int ret = TEST_SUCCESS;
3939 	uint16_t i, j, dequeued;
3940 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3941 	uint64_t start_time = 0, last_time = 0;
3942 	bool extDdr = ldpc_cap_flags &
3943 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3944 
3945 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3946 		uint16_t enq = 0, deq = 0;
3947 		bool first_time = true;
3948 		last_time = 0;
3949 
3950 		if (unlikely(num_to_process - dequeued < burst_sz))
3951 			burst_sz = num_to_process - dequeued;
3952 
3953 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3954 		TEST_ASSERT_SUCCESS(ret,
3955 				"rte_bbdev_dec_op_alloc_bulk() failed");
3956 
3957 		/* For latency tests we need to disable early termination */
3958 		if (check_bit(ref_op->ldpc_dec.op_flags,
3959 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3960 			ref_op->ldpc_dec.op_flags -=
3961 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3962 		ref_op->ldpc_dec.iter_max = get_iter_max();
3963 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3964 
3965 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3966 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3967 					bufs->inputs,
3968 					bufs->hard_outputs,
3969 					bufs->soft_outputs,
3970 					bufs->harq_inputs,
3971 					bufs->harq_outputs,
3972 					ref_op);
3973 
3974 		if (extDdr)
3975 			preload_harq_ddr(dev_id, queue_id, ops_enq,
3976 					burst_sz, true);
3977 
3978 		/* Set counter to validate the ordering */
3979 		for (j = 0; j < burst_sz; ++j)
3980 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3981 
3982 		start_time = rte_rdtsc_precise();
3983 
3984 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3985 				&ops_enq[enq], burst_sz);
3986 		TEST_ASSERT(enq == burst_sz,
3987 				"Error enqueueing burst, expected %u, got %u",
3988 				burst_sz, enq);
3989 
3990 		/* Dequeue */
3991 		do {
3992 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3993 					&ops_deq[deq], burst_sz - deq);
3994 			if (likely(first_time && (deq > 0))) {
3995 				last_time = rte_rdtsc_precise() - start_time;
3996 				first_time = false;
3997 			}
3998 		} while (unlikely(burst_sz != deq));
3999 
4000 		*max_time = RTE_MAX(*max_time, last_time);
4001 		*min_time = RTE_MIN(*min_time, last_time);
4002 		*total_time += last_time;
4003 
4004 		if (extDdr)
4005 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4006 
4007 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4008 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4009 					vector_mask);
4010 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4011 		}
4012 
4013 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4014 		dequeued += deq;
4015 	}
4016 	return i;
4017 }
4018 
4019 static int
4020 latency_test_enc(struct rte_mempool *mempool,
4021 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4022 		uint16_t dev_id, uint16_t queue_id,
4023 		const uint16_t num_to_process, uint16_t burst_sz,
4024 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4025 {
4026 	int ret = TEST_SUCCESS;
4027 	uint16_t i, j, dequeued;
4028 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4029 	uint64_t start_time = 0, last_time = 0;
4030 
4031 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4032 		uint16_t enq = 0, deq = 0;
4033 		bool first_time = true;
4034 		last_time = 0;
4035 
4036 		if (unlikely(num_to_process - dequeued < burst_sz))
4037 			burst_sz = num_to_process - dequeued;
4038 
4039 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4040 		TEST_ASSERT_SUCCESS(ret,
4041 				"rte_bbdev_enc_op_alloc_bulk() failed");
4042 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4043 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4044 					bufs->inputs,
4045 					bufs->hard_outputs,
4046 					ref_op);
4047 
4048 		/* Set counter to validate the ordering */
4049 		for (j = 0; j < burst_sz; ++j)
4050 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4051 
4052 		start_time = rte_rdtsc_precise();
4053 
4054 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4055 				burst_sz);
4056 		TEST_ASSERT(enq == burst_sz,
4057 				"Error enqueueing burst, expected %u, got %u",
4058 				burst_sz, enq);
4059 
4060 		/* Dequeue */
4061 		do {
4062 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4063 					&ops_deq[deq], burst_sz - deq);
4064 			if (likely(first_time && (deq > 0))) {
4065 				last_time += rte_rdtsc_precise() - start_time;
4066 				first_time = false;
4067 			}
4068 		} while (unlikely(burst_sz != deq));
4069 
4070 		*max_time = RTE_MAX(*max_time, last_time);
4071 		*min_time = RTE_MIN(*min_time, last_time);
4072 		*total_time += last_time;
4073 
4074 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4075 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4076 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4077 		}
4078 
4079 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4080 		dequeued += deq;
4081 	}
4082 
4083 	return i;
4084 }
4085 
4086 static int
4087 latency_test_ldpc_enc(struct rte_mempool *mempool,
4088 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4089 		uint16_t dev_id, uint16_t queue_id,
4090 		const uint16_t num_to_process, uint16_t burst_sz,
4091 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4092 {
4093 	int ret = TEST_SUCCESS;
4094 	uint16_t i, j, dequeued;
4095 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4096 	uint64_t start_time = 0, last_time = 0;
4097 
4098 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4099 		uint16_t enq = 0, deq = 0;
4100 		bool first_time = true;
4101 		last_time = 0;
4102 
4103 		if (unlikely(num_to_process - dequeued < burst_sz))
4104 			burst_sz = num_to_process - dequeued;
4105 
4106 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4107 		TEST_ASSERT_SUCCESS(ret,
4108 				"rte_bbdev_enc_op_alloc_bulk() failed");
4109 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4110 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4111 					bufs->inputs,
4112 					bufs->hard_outputs,
4113 					ref_op);
4114 
4115 		/* Set counter to validate the ordering */
4116 		for (j = 0; j < burst_sz; ++j)
4117 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4118 
4119 		start_time = rte_rdtsc_precise();
4120 
4121 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4122 				&ops_enq[enq], burst_sz);
4123 		TEST_ASSERT(enq == burst_sz,
4124 				"Error enqueueing burst, expected %u, got %u",
4125 				burst_sz, enq);
4126 
4127 		/* Dequeue */
4128 		do {
4129 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4130 					&ops_deq[deq], burst_sz - deq);
4131 			if (likely(first_time && (deq > 0))) {
4132 				last_time += rte_rdtsc_precise() - start_time;
4133 				first_time = false;
4134 			}
4135 		} while (unlikely(burst_sz != deq));
4136 
4137 		*max_time = RTE_MAX(*max_time, last_time);
4138 		*min_time = RTE_MIN(*min_time, last_time);
4139 		*total_time += last_time;
4140 
4141 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4142 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4143 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4144 		}
4145 
4146 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4147 		dequeued += deq;
4148 	}
4149 
4150 	return i;
4151 }
4152 
4153 static int
4154 latency_test(struct active_device *ad,
4155 		struct test_op_params *op_params)
4156 {
4157 	int iter;
4158 	uint16_t burst_sz = op_params->burst_sz;
4159 	const uint16_t num_to_process = op_params->num_to_process;
4160 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4161 	const uint16_t queue_id = ad->queue_ids[0];
4162 	struct test_buffers *bufs = NULL;
4163 	struct rte_bbdev_info info;
4164 	uint64_t total_time, min_time, max_time;
4165 	const char *op_type_str;
4166 
4167 	total_time = max_time = 0;
4168 	min_time = UINT64_MAX;
4169 
4170 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4171 			"BURST_SIZE should be <= %u", MAX_BURST);
4172 
4173 	rte_bbdev_info_get(ad->dev_id, &info);
4174 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4175 
4176 	op_type_str = rte_bbdev_op_type_str(op_type);
4177 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4178 
4179 	printf("+ ------------------------------------------------------- +\n");
4180 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4181 			info.dev_name, burst_sz, num_to_process, op_type_str);
4182 
4183 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4184 		iter = latency_test_dec(op_params->mp, bufs,
4185 				op_params->ref_dec_op, op_params->vector_mask,
4186 				ad->dev_id, queue_id, num_to_process,
4187 				burst_sz, &total_time, &min_time, &max_time);
4188 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4189 		iter = latency_test_enc(op_params->mp, bufs,
4190 				op_params->ref_enc_op, ad->dev_id, queue_id,
4191 				num_to_process, burst_sz, &total_time,
4192 				&min_time, &max_time);
4193 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4194 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4195 				op_params->ref_enc_op, ad->dev_id, queue_id,
4196 				num_to_process, burst_sz, &total_time,
4197 				&min_time, &max_time);
4198 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4199 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4200 				op_params->ref_dec_op, op_params->vector_mask,
4201 				ad->dev_id, queue_id, num_to_process,
4202 				burst_sz, &total_time, &min_time, &max_time);
4203 	else
4204 		iter = latency_test_enc(op_params->mp, bufs,
4205 					op_params->ref_enc_op,
4206 					ad->dev_id, queue_id,
4207 					num_to_process, burst_sz, &total_time,
4208 					&min_time, &max_time);
4209 
4210 	if (iter <= 0)
4211 		return TEST_FAILED;
4212 
4213 	printf("Operation latency:\n"
4214 			"\tavg: %lg cycles, %lg us\n"
4215 			"\tmin: %lg cycles, %lg us\n"
4216 			"\tmax: %lg cycles, %lg us\n",
4217 			(double)total_time / (double)iter,
4218 			(double)(total_time * 1000000) / (double)iter /
4219 			(double)rte_get_tsc_hz(), (double)min_time,
4220 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4221 			(double)max_time, (double)(max_time * 1000000) /
4222 			(double)rte_get_tsc_hz());
4223 
4224 	return TEST_SUCCESS;
4225 }
4226 
4227 #ifdef RTE_BBDEV_OFFLOAD_COST
4228 static int
4229 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4230 		struct rte_bbdev_stats *stats)
4231 {
4232 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4233 	struct rte_bbdev_stats *q_stats;
4234 
4235 	if (queue_id >= dev->data->num_queues)
4236 		return -1;
4237 
4238 	q_stats = &dev->data->queues[queue_id].queue_stats;
4239 
4240 	stats->enqueued_count = q_stats->enqueued_count;
4241 	stats->dequeued_count = q_stats->dequeued_count;
4242 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4243 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4244 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4245 
4246 	return 0;
4247 }
4248 
4249 static int
4250 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4251 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4252 		uint16_t queue_id, const uint16_t num_to_process,
4253 		uint16_t burst_sz, struct test_time_stats *time_st)
4254 {
4255 	int i, dequeued, ret;
4256 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4257 	uint64_t enq_start_time, deq_start_time;
4258 	uint64_t enq_sw_last_time, deq_last_time;
4259 	struct rte_bbdev_stats stats;
4260 
4261 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4262 		uint16_t enq = 0, deq = 0;
4263 
4264 		if (unlikely(num_to_process - dequeued < burst_sz))
4265 			burst_sz = num_to_process - dequeued;
4266 
4267 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4268 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4269 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4270 					bufs->inputs,
4271 					bufs->hard_outputs,
4272 					bufs->soft_outputs,
4273 					ref_op);
4274 
4275 		/* Start time meas for enqueue function offload latency */
4276 		enq_start_time = rte_rdtsc_precise();
4277 		do {
4278 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4279 					&ops_enq[enq], burst_sz - enq);
4280 		} while (unlikely(burst_sz != enq));
4281 
4282 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4283 		TEST_ASSERT_SUCCESS(ret,
4284 				"Failed to get stats for queue (%u) of device (%u)",
4285 				queue_id, dev_id);
4286 
4287 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4288 				stats.acc_offload_cycles;
4289 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4290 				enq_sw_last_time);
4291 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4292 				enq_sw_last_time);
4293 		time_st->enq_sw_total_time += enq_sw_last_time;
4294 
4295 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4296 				stats.acc_offload_cycles);
4297 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4298 				stats.acc_offload_cycles);
4299 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4300 
4301 		/* give time for device to process ops */
4302 		rte_delay_us(200);
4303 
4304 		/* Start time meas for dequeue function offload latency */
4305 		deq_start_time = rte_rdtsc_precise();
4306 		/* Dequeue one operation */
4307 		do {
4308 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4309 					&ops_deq[deq], 1);
4310 		} while (unlikely(deq != 1));
4311 
4312 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4313 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4314 				deq_last_time);
4315 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4316 				deq_last_time);
4317 		time_st->deq_total_time += deq_last_time;
4318 
4319 		/* Dequeue remaining operations if needed*/
4320 		while (burst_sz != deq)
4321 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4322 					&ops_deq[deq], burst_sz - deq);
4323 
4324 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4325 		dequeued += deq;
4326 	}
4327 
4328 	return i;
4329 }
4330 
4331 static int
4332 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4333 		struct test_buffers *bufs,
4334 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4335 		uint16_t queue_id, const uint16_t num_to_process,
4336 		uint16_t burst_sz, struct test_time_stats *time_st)
4337 {
4338 	int i, dequeued, ret;
4339 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4340 	uint64_t enq_start_time, deq_start_time;
4341 	uint64_t enq_sw_last_time, deq_last_time;
4342 	struct rte_bbdev_stats stats;
4343 	bool extDdr = ldpc_cap_flags &
4344 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4345 
4346 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4347 		uint16_t enq = 0, deq = 0;
4348 
4349 		if (unlikely(num_to_process - dequeued < burst_sz))
4350 			burst_sz = num_to_process - dequeued;
4351 
4352 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4353 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4354 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4355 					bufs->inputs,
4356 					bufs->hard_outputs,
4357 					bufs->soft_outputs,
4358 					bufs->harq_inputs,
4359 					bufs->harq_outputs,
4360 					ref_op);
4361 
4362 		if (extDdr)
4363 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4364 					burst_sz, true);
4365 
4366 		/* Start time meas for enqueue function offload latency */
4367 		enq_start_time = rte_rdtsc_precise();
4368 		do {
4369 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4370 					&ops_enq[enq], burst_sz - enq);
4371 		} while (unlikely(burst_sz != enq));
4372 
4373 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4374 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4375 		TEST_ASSERT_SUCCESS(ret,
4376 				"Failed to get stats for queue (%u) of device (%u)",
4377 				queue_id, dev_id);
4378 
4379 		enq_sw_last_time -= stats.acc_offload_cycles;
4380 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4381 				enq_sw_last_time);
4382 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4383 				enq_sw_last_time);
4384 		time_st->enq_sw_total_time += enq_sw_last_time;
4385 
4386 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4387 				stats.acc_offload_cycles);
4388 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4389 				stats.acc_offload_cycles);
4390 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4391 
4392 		/* give time for device to process ops */
4393 		rte_delay_us(200);
4394 
4395 		/* Start time meas for dequeue function offload latency */
4396 		deq_start_time = rte_rdtsc_precise();
4397 		/* Dequeue one operation */
4398 		do {
4399 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4400 					&ops_deq[deq], 1);
4401 		} while (unlikely(deq != 1));
4402 
4403 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4404 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4405 				deq_last_time);
4406 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4407 				deq_last_time);
4408 		time_st->deq_total_time += deq_last_time;
4409 
4410 		/* Dequeue remaining operations if needed*/
4411 		while (burst_sz != deq)
4412 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4413 					&ops_deq[deq], burst_sz - deq);
4414 
4415 		if (extDdr) {
4416 			/* Read loopback is not thread safe */
4417 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4418 		}
4419 
4420 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4421 		dequeued += deq;
4422 	}
4423 
4424 	return i;
4425 }
4426 
4427 static int
4428 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4429 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4430 		uint16_t queue_id, const uint16_t num_to_process,
4431 		uint16_t burst_sz, struct test_time_stats *time_st)
4432 {
4433 	int i, dequeued, ret;
4434 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4435 	uint64_t enq_start_time, deq_start_time;
4436 	uint64_t enq_sw_last_time, deq_last_time;
4437 	struct rte_bbdev_stats stats;
4438 
4439 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4440 		uint16_t enq = 0, deq = 0;
4441 
4442 		if (unlikely(num_to_process - dequeued < burst_sz))
4443 			burst_sz = num_to_process - dequeued;
4444 
4445 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4446 		TEST_ASSERT_SUCCESS(ret,
4447 				"rte_bbdev_enc_op_alloc_bulk() failed");
4448 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4449 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4450 					bufs->inputs,
4451 					bufs->hard_outputs,
4452 					ref_op);
4453 
4454 		/* Start time meas for enqueue function offload latency */
4455 		enq_start_time = rte_rdtsc_precise();
4456 		do {
4457 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4458 					&ops_enq[enq], burst_sz - enq);
4459 		} while (unlikely(burst_sz != enq));
4460 
4461 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4462 
4463 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4464 		TEST_ASSERT_SUCCESS(ret,
4465 				"Failed to get stats for queue (%u) of device (%u)",
4466 				queue_id, dev_id);
4467 		enq_sw_last_time -= stats.acc_offload_cycles;
4468 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4469 				enq_sw_last_time);
4470 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4471 				enq_sw_last_time);
4472 		time_st->enq_sw_total_time += enq_sw_last_time;
4473 
4474 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4475 				stats.acc_offload_cycles);
4476 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4477 				stats.acc_offload_cycles);
4478 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4479 
4480 		/* give time for device to process ops */
4481 		rte_delay_us(200);
4482 
4483 		/* Start time meas for dequeue function offload latency */
4484 		deq_start_time = rte_rdtsc_precise();
4485 		/* Dequeue one operation */
4486 		do {
4487 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4488 					&ops_deq[deq], 1);
4489 		} while (unlikely(deq != 1));
4490 
4491 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4492 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4493 				deq_last_time);
4494 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4495 				deq_last_time);
4496 		time_st->deq_total_time += deq_last_time;
4497 
4498 		while (burst_sz != deq)
4499 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4500 					&ops_deq[deq], burst_sz - deq);
4501 
4502 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4503 		dequeued += deq;
4504 	}
4505 
4506 	return i;
4507 }
4508 
4509 static int
4510 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4511 		struct test_buffers *bufs,
4512 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4513 		uint16_t queue_id, const uint16_t num_to_process,
4514 		uint16_t burst_sz, struct test_time_stats *time_st)
4515 {
4516 	int i, dequeued, ret;
4517 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4518 	uint64_t enq_start_time, deq_start_time;
4519 	uint64_t enq_sw_last_time, deq_last_time;
4520 	struct rte_bbdev_stats stats;
4521 
4522 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4523 		uint16_t enq = 0, deq = 0;
4524 
4525 		if (unlikely(num_to_process - dequeued < burst_sz))
4526 			burst_sz = num_to_process - dequeued;
4527 
4528 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4529 		TEST_ASSERT_SUCCESS(ret,
4530 				"rte_bbdev_enc_op_alloc_bulk() failed");
4531 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4532 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4533 					bufs->inputs,
4534 					bufs->hard_outputs,
4535 					ref_op);
4536 
4537 		/* Start time meas for enqueue function offload latency */
4538 		enq_start_time = rte_rdtsc_precise();
4539 		do {
4540 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4541 					&ops_enq[enq], burst_sz - enq);
4542 		} while (unlikely(burst_sz != enq));
4543 
4544 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4545 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4546 		TEST_ASSERT_SUCCESS(ret,
4547 				"Failed to get stats for queue (%u) of device (%u)",
4548 				queue_id, dev_id);
4549 
4550 		enq_sw_last_time -= stats.acc_offload_cycles;
4551 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4552 				enq_sw_last_time);
4553 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4554 				enq_sw_last_time);
4555 		time_st->enq_sw_total_time += enq_sw_last_time;
4556 
4557 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4558 				stats.acc_offload_cycles);
4559 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4560 				stats.acc_offload_cycles);
4561 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4562 
4563 		/* give time for device to process ops */
4564 		rte_delay_us(200);
4565 
4566 		/* Start time meas for dequeue function offload latency */
4567 		deq_start_time = rte_rdtsc_precise();
4568 		/* Dequeue one operation */
4569 		do {
4570 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4571 					&ops_deq[deq], 1);
4572 		} while (unlikely(deq != 1));
4573 
4574 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4575 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4576 				deq_last_time);
4577 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4578 				deq_last_time);
4579 		time_st->deq_total_time += deq_last_time;
4580 
4581 		while (burst_sz != deq)
4582 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4583 					&ops_deq[deq], burst_sz - deq);
4584 
4585 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4586 		dequeued += deq;
4587 	}
4588 
4589 	return i;
4590 }
4591 #endif
4592 
4593 static int
4594 offload_cost_test(struct active_device *ad,
4595 		struct test_op_params *op_params)
4596 {
4597 #ifndef RTE_BBDEV_OFFLOAD_COST
4598 	RTE_SET_USED(ad);
4599 	RTE_SET_USED(op_params);
4600 	printf("Offload latency test is disabled.\n");
4601 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4602 	return TEST_SKIPPED;
4603 #else
4604 	int iter;
4605 	uint16_t burst_sz = op_params->burst_sz;
4606 	const uint16_t num_to_process = op_params->num_to_process;
4607 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4608 	const uint16_t queue_id = ad->queue_ids[0];
4609 	struct test_buffers *bufs = NULL;
4610 	struct rte_bbdev_info info;
4611 	const char *op_type_str;
4612 	struct test_time_stats time_st;
4613 
4614 	memset(&time_st, 0, sizeof(struct test_time_stats));
4615 	time_st.enq_sw_min_time = UINT64_MAX;
4616 	time_st.enq_acc_min_time = UINT64_MAX;
4617 	time_st.deq_min_time = UINT64_MAX;
4618 
4619 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4620 			"BURST_SIZE should be <= %u", MAX_BURST);
4621 
4622 	rte_bbdev_info_get(ad->dev_id, &info);
4623 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4624 
4625 	op_type_str = rte_bbdev_op_type_str(op_type);
4626 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4627 
4628 	printf("+ ------------------------------------------------------- +\n");
4629 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4630 			info.dev_name, burst_sz, num_to_process, op_type_str);
4631 
4632 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4633 		iter = offload_latency_test_dec(op_params->mp, bufs,
4634 				op_params->ref_dec_op, ad->dev_id, queue_id,
4635 				num_to_process, burst_sz, &time_st);
4636 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4637 		iter = offload_latency_test_enc(op_params->mp, bufs,
4638 				op_params->ref_enc_op, ad->dev_id, queue_id,
4639 				num_to_process, burst_sz, &time_st);
4640 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4641 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4642 				op_params->ref_enc_op, ad->dev_id, queue_id,
4643 				num_to_process, burst_sz, &time_st);
4644 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4645 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4646 			op_params->ref_dec_op, ad->dev_id, queue_id,
4647 			num_to_process, burst_sz, &time_st);
4648 	else
4649 		iter = offload_latency_test_enc(op_params->mp, bufs,
4650 				op_params->ref_enc_op, ad->dev_id, queue_id,
4651 				num_to_process, burst_sz, &time_st);
4652 
4653 	if (iter <= 0)
4654 		return TEST_FAILED;
4655 
4656 	printf("Enqueue driver offload cost latency:\n"
4657 			"\tavg: %lg cycles, %lg us\n"
4658 			"\tmin: %lg cycles, %lg us\n"
4659 			"\tmax: %lg cycles, %lg us\n"
4660 			"Enqueue accelerator offload cost latency:\n"
4661 			"\tavg: %lg cycles, %lg us\n"
4662 			"\tmin: %lg cycles, %lg us\n"
4663 			"\tmax: %lg cycles, %lg us\n",
4664 			(double)time_st.enq_sw_total_time / (double)iter,
4665 			(double)(time_st.enq_sw_total_time * 1000000) /
4666 			(double)iter / (double)rte_get_tsc_hz(),
4667 			(double)time_st.enq_sw_min_time,
4668 			(double)(time_st.enq_sw_min_time * 1000000) /
4669 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4670 			(double)(time_st.enq_sw_max_time * 1000000) /
4671 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4672 			(double)iter,
4673 			(double)(time_st.enq_acc_total_time * 1000000) /
4674 			(double)iter / (double)rte_get_tsc_hz(),
4675 			(double)time_st.enq_acc_min_time,
4676 			(double)(time_st.enq_acc_min_time * 1000000) /
4677 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4678 			(double)(time_st.enq_acc_max_time * 1000000) /
4679 			rte_get_tsc_hz());
4680 
4681 	printf("Dequeue offload cost latency - one op:\n"
4682 			"\tavg: %lg cycles, %lg us\n"
4683 			"\tmin: %lg cycles, %lg us\n"
4684 			"\tmax: %lg cycles, %lg us\n",
4685 			(double)time_st.deq_total_time / (double)iter,
4686 			(double)(time_st.deq_total_time * 1000000) /
4687 			(double)iter / (double)rte_get_tsc_hz(),
4688 			(double)time_st.deq_min_time,
4689 			(double)(time_st.deq_min_time * 1000000) /
4690 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4691 			(double)(time_st.deq_max_time * 1000000) /
4692 			rte_get_tsc_hz());
4693 
4694 	return TEST_SUCCESS;
4695 #endif
4696 }
4697 
4698 #ifdef RTE_BBDEV_OFFLOAD_COST
4699 static int
4700 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4701 		const uint16_t num_to_process, uint16_t burst_sz,
4702 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4703 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4704 {
4705 	int i, deq_total;
4706 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4707 	uint64_t deq_start_time, deq_last_time;
4708 
4709 	/* Test deq offload latency from an empty queue */
4710 
4711 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4712 			++i, deq_total += burst_sz) {
4713 		deq_start_time = rte_rdtsc_precise();
4714 
4715 		if (unlikely(num_to_process - deq_total < burst_sz))
4716 			burst_sz = num_to_process - deq_total;
4717 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4718 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4719 					burst_sz);
4720 		else
4721 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4722 					burst_sz);
4723 
4724 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4725 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4726 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4727 		*deq_total_time += deq_last_time;
4728 	}
4729 
4730 	return i;
4731 }
4732 
4733 static int
4734 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4735 		const uint16_t num_to_process, uint16_t burst_sz,
4736 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4737 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4738 {
4739 	int i, deq_total;
4740 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4741 	uint64_t deq_start_time, deq_last_time;
4742 
4743 	/* Test deq offload latency from an empty queue */
4744 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4745 			++i, deq_total += burst_sz) {
4746 		deq_start_time = rte_rdtsc_precise();
4747 
4748 		if (unlikely(num_to_process - deq_total < burst_sz))
4749 			burst_sz = num_to_process - deq_total;
4750 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4751 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4752 					burst_sz);
4753 		else
4754 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4755 					burst_sz);
4756 
4757 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4758 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4759 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4760 		*deq_total_time += deq_last_time;
4761 	}
4762 
4763 	return i;
4764 }
4765 
4766 #endif
4767 
4768 static int
4769 offload_latency_empty_q_test(struct active_device *ad,
4770 		struct test_op_params *op_params)
4771 {
4772 #ifndef RTE_BBDEV_OFFLOAD_COST
4773 	RTE_SET_USED(ad);
4774 	RTE_SET_USED(op_params);
4775 	printf("Offload latency empty dequeue test is disabled.\n");
4776 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4777 	return TEST_SKIPPED;
4778 #else
4779 	int iter;
4780 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4781 	uint16_t burst_sz = op_params->burst_sz;
4782 	const uint16_t num_to_process = op_params->num_to_process;
4783 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4784 	const uint16_t queue_id = ad->queue_ids[0];
4785 	struct rte_bbdev_info info;
4786 	const char *op_type_str;
4787 
4788 	deq_total_time = deq_max_time = 0;
4789 	deq_min_time = UINT64_MAX;
4790 
4791 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4792 			"BURST_SIZE should be <= %u", MAX_BURST);
4793 
4794 	rte_bbdev_info_get(ad->dev_id, &info);
4795 
4796 	op_type_str = rte_bbdev_op_type_str(op_type);
4797 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4798 
4799 	printf("+ ------------------------------------------------------- +\n");
4800 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4801 			info.dev_name, burst_sz, num_to_process, op_type_str);
4802 
4803 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4804 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4805 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4806 				num_to_process, burst_sz, &deq_total_time,
4807 				&deq_min_time, &deq_max_time, op_type);
4808 	else
4809 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4810 				num_to_process, burst_sz, &deq_total_time,
4811 				&deq_min_time, &deq_max_time, op_type);
4812 
4813 	if (iter <= 0)
4814 		return TEST_FAILED;
4815 
4816 	printf("Empty dequeue offload:\n"
4817 			"\tavg: %lg cycles, %lg us\n"
4818 			"\tmin: %lg cycles, %lg us\n"
4819 			"\tmax: %lg cycles, %lg us\n",
4820 			(double)deq_total_time / (double)iter,
4821 			(double)(deq_total_time * 1000000) / (double)iter /
4822 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4823 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4824 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4825 			rte_get_tsc_hz());
4826 
4827 	return TEST_SUCCESS;
4828 #endif
4829 }
4830 
4831 static int
4832 bler_tc(void)
4833 {
4834 	return run_test_case(bler_test);
4835 }
4836 
4837 static int
4838 throughput_tc(void)
4839 {
4840 	return run_test_case(throughput_test);
4841 }
4842 
4843 static int
4844 offload_cost_tc(void)
4845 {
4846 	return run_test_case(offload_cost_test);
4847 }
4848 
4849 static int
4850 offload_latency_empty_q_tc(void)
4851 {
4852 	return run_test_case(offload_latency_empty_q_test);
4853 }
4854 
4855 static int
4856 latency_tc(void)
4857 {
4858 	return run_test_case(latency_test);
4859 }
4860 
4861 static int
4862 interrupt_tc(void)
4863 {
4864 	return run_test_case(throughput_test);
4865 }
4866 
4867 static struct unit_test_suite bbdev_bler_testsuite = {
4868 	.suite_name = "BBdev BLER Tests",
4869 	.setup = testsuite_setup,
4870 	.teardown = testsuite_teardown,
4871 	.unit_test_cases = {
4872 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4873 		TEST_CASES_END() /**< NULL terminate unit test array */
4874 	}
4875 };
4876 
4877 static struct unit_test_suite bbdev_throughput_testsuite = {
4878 	.suite_name = "BBdev Throughput Tests",
4879 	.setup = testsuite_setup,
4880 	.teardown = testsuite_teardown,
4881 	.unit_test_cases = {
4882 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4883 		TEST_CASES_END() /**< NULL terminate unit test array */
4884 	}
4885 };
4886 
4887 static struct unit_test_suite bbdev_validation_testsuite = {
4888 	.suite_name = "BBdev Validation Tests",
4889 	.setup = testsuite_setup,
4890 	.teardown = testsuite_teardown,
4891 	.unit_test_cases = {
4892 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4893 		TEST_CASES_END() /**< NULL terminate unit test array */
4894 	}
4895 };
4896 
4897 static struct unit_test_suite bbdev_latency_testsuite = {
4898 	.suite_name = "BBdev Latency Tests",
4899 	.setup = testsuite_setup,
4900 	.teardown = testsuite_teardown,
4901 	.unit_test_cases = {
4902 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4903 		TEST_CASES_END() /**< NULL terminate unit test array */
4904 	}
4905 };
4906 
4907 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4908 	.suite_name = "BBdev Offload Cost Tests",
4909 	.setup = testsuite_setup,
4910 	.teardown = testsuite_teardown,
4911 	.unit_test_cases = {
4912 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4913 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4914 		TEST_CASES_END() /**< NULL terminate unit test array */
4915 	}
4916 };
4917 
4918 static struct unit_test_suite bbdev_interrupt_testsuite = {
4919 	.suite_name = "BBdev Interrupt Tests",
4920 	.setup = interrupt_testsuite_setup,
4921 	.teardown = testsuite_teardown,
4922 	.unit_test_cases = {
4923 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4924 		TEST_CASES_END() /**< NULL terminate unit test array */
4925 	}
4926 };
4927 
4928 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
4929 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4930 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4931 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4932 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4933 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
4934