xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision a1598e90f353b609df41b077c59dbd1f378d23c0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28 
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41 
42 #define OPS_CACHE_SIZE 256U
43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
44 
45 #define SYNC_WAIT 0
46 #define SYNC_START 1
47 #define INVALID_OPAQUE -1
48 
49 #define INVALID_QUEUE_ID -1
50 /* Increment for next code block in external HARQ memory */
51 #define HARQ_INCR 32768
52 /* Headroom for filler LLRs insertion in HARQ buffer */
53 #define FILLER_HEADROOM 1024
54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
63 
64 static struct test_bbdev_vector test_vector;
65 
66 /* Switch between PMD and Interrupt for throughput TC */
67 static bool intr_enabled;
68 
69 /* LLR arithmetic representation for numerical conversion */
70 static int ldpc_llr_decimals;
71 static int ldpc_llr_size;
72 /* Keep track of the LDPC decoder device capability flag */
73 static uint32_t ldpc_cap_flags;
74 
75 /* Represents tested active devices */
76 static struct active_device {
77 	const char *driver_name;
78 	uint8_t dev_id;
79 	uint16_t supported_ops;
80 	uint16_t queue_ids[MAX_QUEUES];
81 	uint16_t nb_queues;
82 	struct rte_mempool *ops_mempool;
83 	struct rte_mempool *in_mbuf_pool;
84 	struct rte_mempool *hard_out_mbuf_pool;
85 	struct rte_mempool *soft_out_mbuf_pool;
86 	struct rte_mempool *harq_in_mbuf_pool;
87 	struct rte_mempool *harq_out_mbuf_pool;
88 } active_devs[RTE_BBDEV_MAX_DEVS];
89 
90 static uint8_t nb_active_devs;
91 
92 /* Data buffers used by BBDEV ops */
93 struct test_buffers {
94 	struct rte_bbdev_op_data *inputs;
95 	struct rte_bbdev_op_data *hard_outputs;
96 	struct rte_bbdev_op_data *soft_outputs;
97 	struct rte_bbdev_op_data *harq_inputs;
98 	struct rte_bbdev_op_data *harq_outputs;
99 };
100 
101 /* Operation parameters specific for given test case */
102 struct test_op_params {
103 	struct rte_mempool *mp;
104 	struct rte_bbdev_dec_op *ref_dec_op;
105 	struct rte_bbdev_enc_op *ref_enc_op;
106 	uint16_t burst_sz;
107 	uint16_t num_to_process;
108 	uint16_t num_lcores;
109 	int vector_mask;
110 	rte_atomic16_t sync;
111 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
112 };
113 
114 /* Contains per lcore params */
115 struct thread_params {
116 	uint8_t dev_id;
117 	uint16_t queue_id;
118 	uint32_t lcore_id;
119 	uint64_t start_time;
120 	double ops_per_sec;
121 	double mbps;
122 	uint8_t iter_count;
123 	double iter_average;
124 	double bler;
125 	rte_atomic16_t nb_dequeued;
126 	rte_atomic16_t processing_status;
127 	rte_atomic16_t burst_sz;
128 	struct test_op_params *op_params;
129 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
130 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
131 };
132 
133 #ifdef RTE_BBDEV_OFFLOAD_COST
134 /* Stores time statistics */
135 struct test_time_stats {
136 	/* Stores software enqueue total working time */
137 	uint64_t enq_sw_total_time;
138 	/* Stores minimum value of software enqueue working time */
139 	uint64_t enq_sw_min_time;
140 	/* Stores maximum value of software enqueue working time */
141 	uint64_t enq_sw_max_time;
142 	/* Stores turbo enqueue total working time */
143 	uint64_t enq_acc_total_time;
144 	/* Stores minimum value of accelerator enqueue working time */
145 	uint64_t enq_acc_min_time;
146 	/* Stores maximum value of accelerator enqueue working time */
147 	uint64_t enq_acc_max_time;
148 	/* Stores dequeue total working time */
149 	uint64_t deq_total_time;
150 	/* Stores minimum value of dequeue working time */
151 	uint64_t deq_min_time;
152 	/* Stores maximum value of dequeue working time */
153 	uint64_t deq_max_time;
154 };
155 #endif
156 
157 typedef int (test_case_function)(struct active_device *ad,
158 		struct test_op_params *op_params);
159 
160 static inline void
161 mbuf_reset(struct rte_mbuf *m)
162 {
163 	m->pkt_len = 0;
164 
165 	do {
166 		m->data_len = 0;
167 		m = m->next;
168 	} while (m != NULL);
169 }
170 
171 /* Read flag value 0/1 from bitmap */
172 static inline bool
173 check_bit(uint32_t bitmap, uint32_t bitmask)
174 {
175 	return bitmap & bitmask;
176 }
177 
178 static inline void
179 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
180 {
181 	ad->supported_ops |= (1 << op_type);
182 }
183 
184 static inline bool
185 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
186 {
187 	return ad->supported_ops & (1 << op_type);
188 }
189 
190 static inline bool
191 flags_match(uint32_t flags_req, uint32_t flags_present)
192 {
193 	return (flags_req & flags_present) == flags_req;
194 }
195 
196 static void
197 clear_soft_out_cap(uint32_t *op_flags)
198 {
199 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
200 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
201 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
202 }
203 
204 static int
205 check_dev_cap(const struct rte_bbdev_info *dev_info)
206 {
207 	unsigned int i;
208 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
209 		nb_harq_inputs, nb_harq_outputs;
210 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
211 
212 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
213 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
214 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
215 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
216 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
217 
218 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
219 		if (op_cap->type != test_vector.op_type)
220 			continue;
221 
222 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
223 			const struct rte_bbdev_op_cap_turbo_dec *cap =
224 					&op_cap->cap.turbo_dec;
225 			/* Ignore lack of soft output capability, just skip
226 			 * checking if soft output is valid.
227 			 */
228 			if ((test_vector.turbo_dec.op_flags &
229 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
230 					!(cap->capability_flags &
231 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
232 				printf(
233 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
234 					dev_info->dev_name);
235 				clear_soft_out_cap(
236 					&test_vector.turbo_dec.op_flags);
237 			}
238 
239 			if (!flags_match(test_vector.turbo_dec.op_flags,
240 					cap->capability_flags))
241 				return TEST_FAILED;
242 			if (nb_inputs > cap->num_buffers_src) {
243 				printf("Too many inputs defined: %u, max: %u\n",
244 					nb_inputs, cap->num_buffers_src);
245 				return TEST_FAILED;
246 			}
247 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
248 					(test_vector.turbo_dec.op_flags &
249 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
250 				printf(
251 					"Too many soft outputs defined: %u, max: %u\n",
252 						nb_soft_outputs,
253 						cap->num_buffers_soft_out);
254 				return TEST_FAILED;
255 			}
256 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
257 				printf(
258 					"Too many hard outputs defined: %u, max: %u\n",
259 						nb_hard_outputs,
260 						cap->num_buffers_hard_out);
261 				return TEST_FAILED;
262 			}
263 			if (intr_enabled && !(cap->capability_flags &
264 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
265 				printf(
266 					"Dequeue interrupts are not supported!\n");
267 				return TEST_FAILED;
268 			}
269 
270 			return TEST_SUCCESS;
271 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
272 			const struct rte_bbdev_op_cap_turbo_enc *cap =
273 					&op_cap->cap.turbo_enc;
274 
275 			if (!flags_match(test_vector.turbo_enc.op_flags,
276 					cap->capability_flags))
277 				return TEST_FAILED;
278 			if (nb_inputs > cap->num_buffers_src) {
279 				printf("Too many inputs defined: %u, max: %u\n",
280 					nb_inputs, cap->num_buffers_src);
281 				return TEST_FAILED;
282 			}
283 			if (nb_hard_outputs > cap->num_buffers_dst) {
284 				printf(
285 					"Too many hard outputs defined: %u, max: %u\n",
286 					nb_hard_outputs, cap->num_buffers_dst);
287 				return TEST_FAILED;
288 			}
289 			if (intr_enabled && !(cap->capability_flags &
290 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
291 				printf(
292 					"Dequeue interrupts are not supported!\n");
293 				return TEST_FAILED;
294 			}
295 
296 			return TEST_SUCCESS;
297 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
298 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
299 					&op_cap->cap.ldpc_enc;
300 
301 			if (!flags_match(test_vector.ldpc_enc.op_flags,
302 					cap->capability_flags)){
303 				printf("Flag Mismatch\n");
304 				return TEST_FAILED;
305 			}
306 			if (nb_inputs > cap->num_buffers_src) {
307 				printf("Too many inputs defined: %u, max: %u\n",
308 					nb_inputs, cap->num_buffers_src);
309 				return TEST_FAILED;
310 			}
311 			if (nb_hard_outputs > cap->num_buffers_dst) {
312 				printf(
313 					"Too many hard outputs defined: %u, max: %u\n",
314 					nb_hard_outputs, cap->num_buffers_dst);
315 				return TEST_FAILED;
316 			}
317 			if (intr_enabled && !(cap->capability_flags &
318 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
319 				printf(
320 					"Dequeue interrupts are not supported!\n");
321 				return TEST_FAILED;
322 			}
323 
324 			return TEST_SUCCESS;
325 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
326 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
327 					&op_cap->cap.ldpc_dec;
328 
329 			if (!flags_match(test_vector.ldpc_dec.op_flags,
330 					cap->capability_flags)){
331 				printf("Flag Mismatch\n");
332 				return TEST_FAILED;
333 			}
334 			if (nb_inputs > cap->num_buffers_src) {
335 				printf("Too many inputs defined: %u, max: %u\n",
336 					nb_inputs, cap->num_buffers_src);
337 				return TEST_FAILED;
338 			}
339 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
340 				printf(
341 					"Too many hard outputs defined: %u, max: %u\n",
342 					nb_hard_outputs,
343 					cap->num_buffers_hard_out);
344 				return TEST_FAILED;
345 			}
346 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
347 				printf(
348 					"Too many HARQ inputs defined: %u, max: %u\n",
349 					nb_hard_outputs,
350 					cap->num_buffers_hard_out);
351 				return TEST_FAILED;
352 			}
353 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
354 				printf(
355 					"Too many HARQ outputs defined: %u, max: %u\n",
356 					nb_hard_outputs,
357 					cap->num_buffers_hard_out);
358 				return TEST_FAILED;
359 			}
360 			if (intr_enabled && !(cap->capability_flags &
361 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
362 				printf(
363 					"Dequeue interrupts are not supported!\n");
364 				return TEST_FAILED;
365 			}
366 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
367 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
368 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
369 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
370 					))) {
371 				printf("Skip loop-back with interrupt\n");
372 				return TEST_FAILED;
373 			}
374 			return TEST_SUCCESS;
375 		}
376 	}
377 
378 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
379 		return TEST_SUCCESS; /* Special case for NULL device */
380 
381 	return TEST_FAILED;
382 }
383 
384 /* calculates optimal mempool size not smaller than the val */
385 static unsigned int
386 optimal_mempool_size(unsigned int val)
387 {
388 	return rte_align32pow2(val + 1) - 1;
389 }
390 
391 /* allocates mbuf mempool for inputs and outputs */
392 static struct rte_mempool *
393 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
394 		int socket_id, unsigned int mbuf_pool_size,
395 		const char *op_type_str)
396 {
397 	unsigned int i;
398 	uint32_t max_seg_sz = 0;
399 	char pool_name[RTE_MEMPOOL_NAMESIZE];
400 
401 	/* find max input segment size */
402 	for (i = 0; i < entries->nb_segments; ++i)
403 		if (entries->segments[i].length > max_seg_sz)
404 			max_seg_sz = entries->segments[i].length;
405 
406 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
407 			dev_id);
408 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
409 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
410 					+ FILLER_HEADROOM,
411 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
412 }
413 
414 static int
415 create_mempools(struct active_device *ad, int socket_id,
416 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
417 {
418 	struct rte_mempool *mp;
419 	unsigned int ops_pool_size, mbuf_pool_size = 0;
420 	char pool_name[RTE_MEMPOOL_NAMESIZE];
421 	const char *op_type_str;
422 	enum rte_bbdev_op_type op_type = org_op_type;
423 
424 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
425 	struct op_data_entries *hard_out =
426 			&test_vector.entries[DATA_HARD_OUTPUT];
427 	struct op_data_entries *soft_out =
428 			&test_vector.entries[DATA_SOFT_OUTPUT];
429 	struct op_data_entries *harq_in =
430 			&test_vector.entries[DATA_HARQ_INPUT];
431 	struct op_data_entries *harq_out =
432 			&test_vector.entries[DATA_HARQ_OUTPUT];
433 
434 	/* allocate ops mempool */
435 	ops_pool_size = optimal_mempool_size(RTE_MAX(
436 			/* Ops used plus 1 reference op */
437 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
438 			/* Minimal cache size plus 1 reference op */
439 			(unsigned int)(1.5 * rte_lcore_count() *
440 					OPS_CACHE_SIZE + 1)),
441 			OPS_POOL_SIZE_MIN));
442 
443 	if (org_op_type == RTE_BBDEV_OP_NONE)
444 		op_type = RTE_BBDEV_OP_TURBO_ENC;
445 
446 	op_type_str = rte_bbdev_op_type_str(op_type);
447 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
448 
449 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
450 			ad->dev_id);
451 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
452 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
453 	TEST_ASSERT_NOT_NULL(mp,
454 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
455 			ops_pool_size,
456 			ad->dev_id,
457 			socket_id);
458 	ad->ops_mempool = mp;
459 
460 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
461 	if (org_op_type == RTE_BBDEV_OP_NONE)
462 		return TEST_SUCCESS;
463 
464 	/* Inputs */
465 	if (in->nb_segments > 0) {
466 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
467 				in->nb_segments);
468 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
469 				mbuf_pool_size, "in");
470 		TEST_ASSERT_NOT_NULL(mp,
471 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
472 				mbuf_pool_size,
473 				ad->dev_id,
474 				socket_id);
475 		ad->in_mbuf_pool = mp;
476 	}
477 
478 	/* Hard outputs */
479 	if (hard_out->nb_segments > 0) {
480 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
481 				hard_out->nb_segments);
482 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
483 				mbuf_pool_size,
484 				"hard_out");
485 		TEST_ASSERT_NOT_NULL(mp,
486 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
487 				mbuf_pool_size,
488 				ad->dev_id,
489 				socket_id);
490 		ad->hard_out_mbuf_pool = mp;
491 	}
492 
493 	/* Soft outputs */
494 	if (soft_out->nb_segments > 0) {
495 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
496 				soft_out->nb_segments);
497 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
498 				mbuf_pool_size,
499 				"soft_out");
500 		TEST_ASSERT_NOT_NULL(mp,
501 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
502 				mbuf_pool_size,
503 				ad->dev_id,
504 				socket_id);
505 		ad->soft_out_mbuf_pool = mp;
506 	}
507 
508 	/* HARQ inputs */
509 	if (harq_in->nb_segments > 0) {
510 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
511 				harq_in->nb_segments);
512 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
513 				mbuf_pool_size,
514 				"harq_in");
515 		TEST_ASSERT_NOT_NULL(mp,
516 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
517 				mbuf_pool_size,
518 				ad->dev_id,
519 				socket_id);
520 		ad->harq_in_mbuf_pool = mp;
521 	}
522 
523 	/* HARQ outputs */
524 	if (harq_out->nb_segments > 0) {
525 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
526 				harq_out->nb_segments);
527 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
528 				mbuf_pool_size,
529 				"harq_out");
530 		TEST_ASSERT_NOT_NULL(mp,
531 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
532 				mbuf_pool_size,
533 				ad->dev_id,
534 				socket_id);
535 		ad->harq_out_mbuf_pool = mp;
536 	}
537 
538 	return TEST_SUCCESS;
539 }
540 
541 static int
542 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
543 		struct test_bbdev_vector *vector)
544 {
545 	int ret;
546 	unsigned int queue_id;
547 	struct rte_bbdev_queue_conf qconf;
548 	struct active_device *ad = &active_devs[nb_active_devs];
549 	unsigned int nb_queues;
550 	enum rte_bbdev_op_type op_type = vector->op_type;
551 
552 /* Configure fpga lte fec with PF & VF values
553  * if '-i' flag is set and using fpga device
554  */
555 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
556 	if ((get_init_device() == true) &&
557 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
558 		struct fpga_lte_fec_conf conf;
559 		unsigned int i;
560 
561 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
562 				info->drv.driver_name);
563 
564 		/* clear default configuration before initialization */
565 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
566 
567 		/* Set PF mode :
568 		 * true if PF is used for data plane
569 		 * false for VFs
570 		 */
571 		conf.pf_mode_en = true;
572 
573 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
574 			/* Number of UL queues per VF (fpga supports 8 VFs) */
575 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
576 			/* Number of DL queues per VF (fpga supports 8 VFs) */
577 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
578 		}
579 
580 		/* UL bandwidth. Needed for schedule algorithm */
581 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
582 		/* DL bandwidth */
583 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
584 
585 		/* UL & DL load Balance Factor to 64 */
586 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
587 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
588 
589 		/**< FLR timeout value */
590 		conf.flr_time_out = FLR_4G_TIMEOUT;
591 
592 		/* setup FPGA PF with configuration information */
593 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
594 		TEST_ASSERT_SUCCESS(ret,
595 				"Failed to configure 4G FPGA PF for bbdev %s",
596 				info->dev_name);
597 	}
598 #endif
599 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
600 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
601 
602 	/* setup device */
603 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
604 	if (ret < 0) {
605 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
606 				dev_id, nb_queues, info->socket_id, ret);
607 		return TEST_FAILED;
608 	}
609 
610 	/* configure interrupts if needed */
611 	if (intr_enabled) {
612 		ret = rte_bbdev_intr_enable(dev_id);
613 		if (ret < 0) {
614 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
615 					ret);
616 			return TEST_FAILED;
617 		}
618 	}
619 
620 	/* setup device queues */
621 	qconf.socket = info->socket_id;
622 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
623 	qconf.priority = 0;
624 	qconf.deferred_start = 0;
625 	qconf.op_type = op_type;
626 
627 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
628 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
629 		if (ret != 0) {
630 			printf(
631 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
632 					queue_id, qconf.priority, dev_id);
633 			qconf.priority++;
634 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
635 					&qconf);
636 		}
637 		if (ret != 0) {
638 			printf("All queues on dev %u allocated: %u\n",
639 					dev_id, queue_id);
640 			break;
641 		}
642 		ad->queue_ids[queue_id] = queue_id;
643 	}
644 	TEST_ASSERT(queue_id != 0,
645 			"ERROR Failed to configure any queues on dev %u",
646 			dev_id);
647 	ad->nb_queues = queue_id;
648 
649 	set_avail_op(ad, op_type);
650 
651 	return TEST_SUCCESS;
652 }
653 
654 static int
655 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
656 		struct test_bbdev_vector *vector)
657 {
658 	int ret;
659 
660 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
661 	active_devs[nb_active_devs].dev_id = dev_id;
662 
663 	ret = add_bbdev_dev(dev_id, info, vector);
664 	if (ret == TEST_SUCCESS)
665 		++nb_active_devs;
666 	return ret;
667 }
668 
669 static uint8_t
670 populate_active_devices(void)
671 {
672 	int ret;
673 	uint8_t dev_id;
674 	uint8_t nb_devs_added = 0;
675 	struct rte_bbdev_info info;
676 
677 	RTE_BBDEV_FOREACH(dev_id) {
678 		rte_bbdev_info_get(dev_id, &info);
679 
680 		if (check_dev_cap(&info)) {
681 			printf(
682 				"Device %d (%s) does not support specified capabilities\n",
683 					dev_id, info.dev_name);
684 			continue;
685 		}
686 
687 		ret = add_active_device(dev_id, &info, &test_vector);
688 		if (ret != 0) {
689 			printf("Adding active bbdev %s skipped\n",
690 					info.dev_name);
691 			continue;
692 		}
693 		nb_devs_added++;
694 	}
695 
696 	return nb_devs_added;
697 }
698 
699 static int
700 read_test_vector(void)
701 {
702 	int ret;
703 
704 	memset(&test_vector, 0, sizeof(test_vector));
705 	printf("Test vector file = %s\n", get_vector_filename());
706 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
707 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
708 			get_vector_filename());
709 
710 	return TEST_SUCCESS;
711 }
712 
713 static int
714 testsuite_setup(void)
715 {
716 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
717 
718 	if (populate_active_devices() == 0) {
719 		printf("No suitable devices found!\n");
720 		return TEST_SKIPPED;
721 	}
722 
723 	return TEST_SUCCESS;
724 }
725 
726 static int
727 interrupt_testsuite_setup(void)
728 {
729 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
730 
731 	/* Enable interrupts */
732 	intr_enabled = true;
733 
734 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
735 	if (populate_active_devices() == 0 ||
736 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
737 		intr_enabled = false;
738 		printf("No suitable devices found!\n");
739 		return TEST_SKIPPED;
740 	}
741 
742 	return TEST_SUCCESS;
743 }
744 
745 static void
746 testsuite_teardown(void)
747 {
748 	uint8_t dev_id;
749 
750 	/* Unconfigure devices */
751 	RTE_BBDEV_FOREACH(dev_id)
752 		rte_bbdev_close(dev_id);
753 
754 	/* Clear active devices structs. */
755 	memset(active_devs, 0, sizeof(active_devs));
756 	nb_active_devs = 0;
757 
758 	/* Disable interrupts */
759 	intr_enabled = false;
760 }
761 
762 static int
763 ut_setup(void)
764 {
765 	uint8_t i, dev_id;
766 
767 	for (i = 0; i < nb_active_devs; i++) {
768 		dev_id = active_devs[i].dev_id;
769 		/* reset bbdev stats */
770 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
771 				"Failed to reset stats of bbdev %u", dev_id);
772 		/* start the device */
773 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
774 				"Failed to start bbdev %u", dev_id);
775 	}
776 
777 	return TEST_SUCCESS;
778 }
779 
780 static void
781 ut_teardown(void)
782 {
783 	uint8_t i, dev_id;
784 	struct rte_bbdev_stats stats;
785 
786 	for (i = 0; i < nb_active_devs; i++) {
787 		dev_id = active_devs[i].dev_id;
788 		/* read stats and print */
789 		rte_bbdev_stats_get(dev_id, &stats);
790 		/* Stop the device */
791 		rte_bbdev_stop(dev_id);
792 	}
793 }
794 
795 static int
796 init_op_data_objs(struct rte_bbdev_op_data *bufs,
797 		struct op_data_entries *ref_entries,
798 		struct rte_mempool *mbuf_pool, const uint16_t n,
799 		enum op_data_type op_type, uint16_t min_alignment)
800 {
801 	int ret;
802 	unsigned int i, j;
803 	bool large_input = false;
804 
805 	for (i = 0; i < n; ++i) {
806 		char *data;
807 		struct op_data_buf *seg = &ref_entries->segments[0];
808 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
809 		TEST_ASSERT_NOT_NULL(m_head,
810 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
811 				op_type, n * ref_entries->nb_segments,
812 				mbuf_pool->size);
813 
814 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
815 			/*
816 			 * Special case when DPDK mbuf cannot handle
817 			 * the required input size
818 			 */
819 			printf("Warning: Larger input size than DPDK mbuf %d\n",
820 					seg->length);
821 			large_input = true;
822 		}
823 		bufs[i].data = m_head;
824 		bufs[i].offset = 0;
825 		bufs[i].length = 0;
826 
827 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
828 			if ((op_type == DATA_INPUT) && large_input) {
829 				/* Allocate a fake overused mbuf */
830 				data = rte_malloc(NULL, seg->length, 0);
831 				memcpy(data, seg->addr, seg->length);
832 				m_head->buf_addr = data;
833 				m_head->buf_iova = rte_malloc_virt2iova(data);
834 				m_head->data_off = 0;
835 				m_head->data_len = seg->length;
836 			} else {
837 				data = rte_pktmbuf_append(m_head, seg->length);
838 				TEST_ASSERT_NOT_NULL(data,
839 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
840 					seg->length, op_type);
841 
842 				TEST_ASSERT(data == RTE_PTR_ALIGN(
843 						data, min_alignment),
844 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
845 					data, min_alignment);
846 				rte_memcpy(data, seg->addr, seg->length);
847 			}
848 
849 			bufs[i].length += seg->length;
850 
851 			for (j = 1; j < ref_entries->nb_segments; ++j) {
852 				struct rte_mbuf *m_tail =
853 						rte_pktmbuf_alloc(mbuf_pool);
854 				TEST_ASSERT_NOT_NULL(m_tail,
855 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
856 						op_type,
857 						n * ref_entries->nb_segments,
858 						mbuf_pool->size);
859 				seg += 1;
860 
861 				data = rte_pktmbuf_append(m_tail, seg->length);
862 				TEST_ASSERT_NOT_NULL(data,
863 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
864 						seg->length, op_type);
865 
866 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
867 						min_alignment),
868 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
869 						data, min_alignment);
870 				rte_memcpy(data, seg->addr, seg->length);
871 				bufs[i].length += seg->length;
872 
873 				ret = rte_pktmbuf_chain(m_head, m_tail);
874 				TEST_ASSERT_SUCCESS(ret,
875 						"Couldn't chain mbufs from %d data type mbuf pool",
876 						op_type);
877 			}
878 		} else {
879 
880 			/* allocate chained-mbuf for output buffer */
881 			for (j = 1; j < ref_entries->nb_segments; ++j) {
882 				struct rte_mbuf *m_tail =
883 						rte_pktmbuf_alloc(mbuf_pool);
884 				TEST_ASSERT_NOT_NULL(m_tail,
885 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
886 						op_type,
887 						n * ref_entries->nb_segments,
888 						mbuf_pool->size);
889 
890 				ret = rte_pktmbuf_chain(m_head, m_tail);
891 				TEST_ASSERT_SUCCESS(ret,
892 						"Couldn't chain mbufs from %d data type mbuf pool",
893 						op_type);
894 			}
895 		}
896 	}
897 
898 	return 0;
899 }
900 
901 static int
902 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
903 		const int socket)
904 {
905 	int i;
906 
907 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
908 	if (*buffers == NULL) {
909 		printf("WARNING: Failed to allocate op_data on socket %d\n",
910 				socket);
911 		/* try to allocate memory on other detected sockets */
912 		for (i = 0; i < socket; i++) {
913 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
914 			if (*buffers != NULL)
915 				break;
916 		}
917 	}
918 
919 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
920 }
921 
922 static void
923 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
924 		const uint16_t n, const int8_t max_llr_modulus)
925 {
926 	uint16_t i, byte_idx;
927 
928 	for (i = 0; i < n; ++i) {
929 		struct rte_mbuf *m = input_ops[i].data;
930 		while (m != NULL) {
931 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
932 					input_ops[i].offset);
933 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
934 					++byte_idx)
935 				llr[byte_idx] = round((double)max_llr_modulus *
936 						llr[byte_idx] / INT8_MAX);
937 
938 			m = m->next;
939 		}
940 	}
941 }
942 
943 /*
944  * We may have to insert filler bits
945  * when they are required by the HARQ assumption
946  */
947 static void
948 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
949 		const uint16_t n, struct test_op_params *op_params)
950 {
951 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
952 
953 	if (input_ops == NULL)
954 		return;
955 	/* No need to add filler if not required by device */
956 	if (!(ldpc_cap_flags &
957 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
958 		return;
959 	/* No need to add filler for loopback operation */
960 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
961 		return;
962 
963 	uint16_t i, j, parity_offset;
964 	for (i = 0; i < n; ++i) {
965 		struct rte_mbuf *m = input_ops[i].data;
966 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
967 				input_ops[i].offset);
968 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
969 				* dec.z_c - dec.n_filler;
970 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
971 		m->data_len = new_hin_size;
972 		input_ops[i].length = new_hin_size;
973 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
974 				j--)
975 			llr[j] = llr[j - dec.n_filler];
976 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
977 		for (j = 0; j < dec.n_filler; j++)
978 			llr[parity_offset + j] = llr_max_pre_scaling;
979 	}
980 }
981 
982 static void
983 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
984 		const uint16_t n, const int8_t llr_size,
985 		const int8_t llr_decimals)
986 {
987 	if (input_ops == NULL)
988 		return;
989 
990 	uint16_t i, byte_idx;
991 
992 	int16_t llr_max, llr_min, llr_tmp;
993 	llr_max = (1 << (llr_size - 1)) - 1;
994 	llr_min = -llr_max;
995 	for (i = 0; i < n; ++i) {
996 		struct rte_mbuf *m = input_ops[i].data;
997 		while (m != NULL) {
998 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
999 					input_ops[i].offset);
1000 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1001 					++byte_idx) {
1002 
1003 				llr_tmp = llr[byte_idx];
1004 				if (llr_decimals == 4)
1005 					llr_tmp *= 8;
1006 				else if (llr_decimals == 2)
1007 					llr_tmp *= 2;
1008 				else if (llr_decimals == 0)
1009 					llr_tmp /= 2;
1010 				llr_tmp = RTE_MIN(llr_max,
1011 						RTE_MAX(llr_min, llr_tmp));
1012 				llr[byte_idx] = (int8_t) llr_tmp;
1013 			}
1014 
1015 			m = m->next;
1016 		}
1017 	}
1018 }
1019 
1020 
1021 
1022 static int
1023 fill_queue_buffers(struct test_op_params *op_params,
1024 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1025 		struct rte_mempool *soft_out_mp,
1026 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1027 		uint16_t queue_id,
1028 		const struct rte_bbdev_op_cap *capabilities,
1029 		uint16_t min_alignment, const int socket_id)
1030 {
1031 	int ret;
1032 	enum op_data_type type;
1033 	const uint16_t n = op_params->num_to_process;
1034 
1035 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1036 		in_mp,
1037 		soft_out_mp,
1038 		hard_out_mp,
1039 		harq_in_mp,
1040 		harq_out_mp,
1041 	};
1042 
1043 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1044 		&op_params->q_bufs[socket_id][queue_id].inputs,
1045 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1046 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1047 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1048 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1049 	};
1050 
1051 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1052 		struct op_data_entries *ref_entries =
1053 				&test_vector.entries[type];
1054 		if (ref_entries->nb_segments == 0)
1055 			continue;
1056 
1057 		ret = allocate_buffers_on_socket(queue_ops[type],
1058 				n * sizeof(struct rte_bbdev_op_data),
1059 				socket_id);
1060 		TEST_ASSERT_SUCCESS(ret,
1061 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1062 
1063 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1064 				mbuf_pools[type], n, type, min_alignment);
1065 		TEST_ASSERT_SUCCESS(ret,
1066 				"Couldn't init rte_bbdev_op_data structs");
1067 	}
1068 
1069 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1070 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1071 			capabilities->cap.turbo_dec.max_llr_modulus);
1072 
1073 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1074 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1075 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1076 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1077 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1078 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1079 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1080 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1081 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1082 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1083 		if (!loopback && !llr_comp)
1084 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1085 					ldpc_llr_size, ldpc_llr_decimals);
1086 		if (!loopback && !harq_comp)
1087 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1088 					ldpc_llr_size, ldpc_llr_decimals);
1089 		if (!loopback)
1090 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1091 					op_params);
1092 	}
1093 
1094 	return 0;
1095 }
1096 
1097 static void
1098 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1099 {
1100 	unsigned int i, j;
1101 
1102 	rte_mempool_free(ad->ops_mempool);
1103 	rte_mempool_free(ad->in_mbuf_pool);
1104 	rte_mempool_free(ad->hard_out_mbuf_pool);
1105 	rte_mempool_free(ad->soft_out_mbuf_pool);
1106 	rte_mempool_free(ad->harq_in_mbuf_pool);
1107 	rte_mempool_free(ad->harq_out_mbuf_pool);
1108 
1109 	for (i = 0; i < rte_lcore_count(); ++i) {
1110 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1111 			rte_free(op_params->q_bufs[j][i].inputs);
1112 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1113 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1114 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1115 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1116 		}
1117 	}
1118 }
1119 
1120 static void
1121 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1122 		unsigned int start_idx,
1123 		struct rte_bbdev_op_data *inputs,
1124 		struct rte_bbdev_op_data *hard_outputs,
1125 		struct rte_bbdev_op_data *soft_outputs,
1126 		struct rte_bbdev_dec_op *ref_op)
1127 {
1128 	unsigned int i;
1129 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1130 
1131 	for (i = 0; i < n; ++i) {
1132 		if (turbo_dec->code_block_mode == 0) {
1133 			ops[i]->turbo_dec.tb_params.ea =
1134 					turbo_dec->tb_params.ea;
1135 			ops[i]->turbo_dec.tb_params.eb =
1136 					turbo_dec->tb_params.eb;
1137 			ops[i]->turbo_dec.tb_params.k_pos =
1138 					turbo_dec->tb_params.k_pos;
1139 			ops[i]->turbo_dec.tb_params.k_neg =
1140 					turbo_dec->tb_params.k_neg;
1141 			ops[i]->turbo_dec.tb_params.c =
1142 					turbo_dec->tb_params.c;
1143 			ops[i]->turbo_dec.tb_params.c_neg =
1144 					turbo_dec->tb_params.c_neg;
1145 			ops[i]->turbo_dec.tb_params.cab =
1146 					turbo_dec->tb_params.cab;
1147 			ops[i]->turbo_dec.tb_params.r =
1148 					turbo_dec->tb_params.r;
1149 		} else {
1150 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1151 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1152 		}
1153 
1154 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1155 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1156 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1157 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1158 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1159 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1160 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1161 
1162 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1163 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1164 		if (soft_outputs != NULL)
1165 			ops[i]->turbo_dec.soft_output =
1166 				soft_outputs[start_idx + i];
1167 	}
1168 }
1169 
1170 static void
1171 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1172 		unsigned int start_idx,
1173 		struct rte_bbdev_op_data *inputs,
1174 		struct rte_bbdev_op_data *outputs,
1175 		struct rte_bbdev_enc_op *ref_op)
1176 {
1177 	unsigned int i;
1178 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1179 	for (i = 0; i < n; ++i) {
1180 		if (turbo_enc->code_block_mode == 0) {
1181 			ops[i]->turbo_enc.tb_params.ea =
1182 					turbo_enc->tb_params.ea;
1183 			ops[i]->turbo_enc.tb_params.eb =
1184 					turbo_enc->tb_params.eb;
1185 			ops[i]->turbo_enc.tb_params.k_pos =
1186 					turbo_enc->tb_params.k_pos;
1187 			ops[i]->turbo_enc.tb_params.k_neg =
1188 					turbo_enc->tb_params.k_neg;
1189 			ops[i]->turbo_enc.tb_params.c =
1190 					turbo_enc->tb_params.c;
1191 			ops[i]->turbo_enc.tb_params.c_neg =
1192 					turbo_enc->tb_params.c_neg;
1193 			ops[i]->turbo_enc.tb_params.cab =
1194 					turbo_enc->tb_params.cab;
1195 			ops[i]->turbo_enc.tb_params.ncb_pos =
1196 					turbo_enc->tb_params.ncb_pos;
1197 			ops[i]->turbo_enc.tb_params.ncb_neg =
1198 					turbo_enc->tb_params.ncb_neg;
1199 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1200 		} else {
1201 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1202 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1203 			ops[i]->turbo_enc.cb_params.ncb =
1204 					turbo_enc->cb_params.ncb;
1205 		}
1206 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1207 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1208 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1209 
1210 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1211 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1212 	}
1213 }
1214 
1215 
1216 /* Returns a random number drawn from a normal distribution
1217  * with mean of 0 and variance of 1
1218  * Marsaglia algorithm
1219  */
1220 static double
1221 randn(int n)
1222 {
1223 	double S, Z, U1, U2, u, v, fac;
1224 
1225 	do {
1226 		U1 = (double)rand() / RAND_MAX;
1227 		U2 = (double)rand() / RAND_MAX;
1228 		u = 2. * U1 - 1.;
1229 		v = 2. * U2 - 1.;
1230 		S = u * u + v * v;
1231 	} while (S >= 1 || S == 0);
1232 	fac = sqrt(-2. * log(S) / S);
1233 	Z = (n % 2) ? u * fac : v * fac;
1234 	return Z;
1235 }
1236 
1237 static inline double
1238 maxstar(double A, double B)
1239 {
1240 	if (fabs(A - B) > 5)
1241 		return RTE_MAX(A, B);
1242 	else
1243 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1244 }
1245 
1246 /*
1247  * Generate Qm LLRS for Qm==8
1248  * Modulation, AWGN and LLR estimation from max log development
1249  */
1250 static void
1251 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1252 {
1253 	int qm = 8;
1254 	int qam = 256;
1255 	int m, k;
1256 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1257 	/* 5.1.4 of TS38.211 */
1258 	const double symbols_I[256] = {
1259 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1260 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1261 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1262 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1263 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1264 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1265 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1266 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1267 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1268 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1269 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1270 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1271 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1272 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1273 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1274 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1275 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1276 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1277 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1278 			-13, -13, -15, -15, -13, -13, -15, -15};
1279 	const double symbols_Q[256] = {
1280 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1281 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1282 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1283 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1284 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1285 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1286 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1287 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1288 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1289 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1290 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1291 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1292 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1293 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1294 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1295 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1296 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1297 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1298 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1299 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1300 	/* Average constellation point energy */
1301 	N0 *= 170.0;
1302 	for (k = 0; k < qm; k++)
1303 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1304 	/* 5.1.4 of TS38.211 */
1305 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1306 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1307 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1308 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1309 	/* AWGN channel */
1310 	I += sqrt(N0 / 2) * randn(0);
1311 	Q += sqrt(N0 / 2) * randn(1);
1312 	/*
1313 	 * Calculate the log of the probability that each of
1314 	 * the constellation points was transmitted
1315 	 */
1316 	for (m = 0; m < qam; m++)
1317 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1318 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1319 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1320 	for (k = 0; k < qm; k++) {
1321 		p0 = -999999;
1322 		p1 = -999999;
1323 		/* For each constellation point */
1324 		for (m = 0; m < qam; m++) {
1325 			if ((m >> (qm - k - 1)) & 1)
1326 				p1 = maxstar(p1, log_syml_prob[m]);
1327 			else
1328 				p0 = maxstar(p0, log_syml_prob[m]);
1329 		}
1330 		/* Calculate the LLR */
1331 		llr_ = p0 - p1;
1332 		llr_ *= (1 << ldpc_llr_decimals);
1333 		llr_ = round(llr_);
1334 		if (llr_ > llr_max)
1335 			llr_ = llr_max;
1336 		if (llr_ < -llr_max)
1337 			llr_ = -llr_max;
1338 		llrs[qm * i + k] = (int8_t) llr_;
1339 	}
1340 }
1341 
1342 
1343 /*
1344  * Generate Qm LLRS for Qm==6
1345  * Modulation, AWGN and LLR estimation from max log development
1346  */
1347 static void
1348 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1349 {
1350 	int qm = 6;
1351 	int qam = 64;
1352 	int m, k;
1353 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1354 	/* 5.1.4 of TS38.211 */
1355 	const double symbols_I[64] = {
1356 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1357 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1358 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1359 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1360 			-5, -5, -7, -7, -5, -5, -7, -7};
1361 	const double symbols_Q[64] = {
1362 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1363 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1364 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1365 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1366 			-3, -1, -3, -1, -5, -7, -5, -7};
1367 	/* Average constellation point energy */
1368 	N0 *= 42.0;
1369 	for (k = 0; k < qm; k++)
1370 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1371 	/* 5.1.4 of TS38.211 */
1372 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1373 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1374 	/* AWGN channel */
1375 	I += sqrt(N0 / 2) * randn(0);
1376 	Q += sqrt(N0 / 2) * randn(1);
1377 	/*
1378 	 * Calculate the log of the probability that each of
1379 	 * the constellation points was transmitted
1380 	 */
1381 	for (m = 0; m < qam; m++)
1382 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1383 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1384 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1385 	for (k = 0; k < qm; k++) {
1386 		p0 = -999999;
1387 		p1 = -999999;
1388 		/* For each constellation point */
1389 		for (m = 0; m < qam; m++) {
1390 			if ((m >> (qm - k - 1)) & 1)
1391 				p1 = maxstar(p1, log_syml_prob[m]);
1392 			else
1393 				p0 = maxstar(p0, log_syml_prob[m]);
1394 		}
1395 		/* Calculate the LLR */
1396 		llr_ = p0 - p1;
1397 		llr_ *= (1 << ldpc_llr_decimals);
1398 		llr_ = round(llr_);
1399 		if (llr_ > llr_max)
1400 			llr_ = llr_max;
1401 		if (llr_ < -llr_max)
1402 			llr_ = -llr_max;
1403 		llrs[qm * i + k] = (int8_t) llr_;
1404 	}
1405 }
1406 
1407 /*
1408  * Generate Qm LLRS for Qm==4
1409  * Modulation, AWGN and LLR estimation from max log development
1410  */
1411 static void
1412 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1413 {
1414 	int qm = 4;
1415 	int qam = 16;
1416 	int m, k;
1417 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1418 	/* 5.1.4 of TS38.211 */
1419 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1420 			-1, -1, -3, -3, -1, -1, -3, -3};
1421 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1422 			1, 3, 1, 3, -1, -3, -1, -3};
1423 	/* Average constellation point energy */
1424 	N0 *= 10.0;
1425 	for (k = 0; k < qm; k++)
1426 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1427 	/* 5.1.4 of TS38.211 */
1428 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1429 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1430 	/* AWGN channel */
1431 	I += sqrt(N0 / 2) * randn(0);
1432 	Q += sqrt(N0 / 2) * randn(1);
1433 	/*
1434 	 * Calculate the log of the probability that each of
1435 	 * the constellation points was transmitted
1436 	 */
1437 	for (m = 0; m < qam; m++)
1438 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1439 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1440 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1441 	for (k = 0; k < qm; k++) {
1442 		p0 = -999999;
1443 		p1 = -999999;
1444 		/* For each constellation point */
1445 		for (m = 0; m < qam; m++) {
1446 			if ((m >> (qm - k - 1)) & 1)
1447 				p1 = maxstar(p1, log_syml_prob[m]);
1448 			else
1449 				p0 = maxstar(p0, log_syml_prob[m]);
1450 		}
1451 		/* Calculate the LLR */
1452 		llr_ = p0 - p1;
1453 		llr_ *= (1 << ldpc_llr_decimals);
1454 		llr_ = round(llr_);
1455 		if (llr_ > llr_max)
1456 			llr_ = llr_max;
1457 		if (llr_ < -llr_max)
1458 			llr_ = -llr_max;
1459 		llrs[qm * i + k] = (int8_t) llr_;
1460 	}
1461 }
1462 
1463 static void
1464 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1465 {
1466 	double b, b1, n;
1467 	double coeff = 2.0 * sqrt(N0);
1468 
1469 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1470 	if (llrs[j] < 8 && llrs[j] > -8)
1471 		return;
1472 
1473 	/* Note don't change sign here */
1474 	n = randn(j % 2);
1475 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1476 			+ coeff * n) / N0;
1477 	b = b1 * (1 << ldpc_llr_decimals);
1478 	b = round(b);
1479 	if (b > llr_max)
1480 		b = llr_max;
1481 	if (b < -llr_max)
1482 		b = -llr_max;
1483 	llrs[j] = (int8_t) b;
1484 }
1485 
1486 /* Generate LLR for a given SNR */
1487 static void
1488 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1489 		struct rte_bbdev_dec_op *ref_op)
1490 {
1491 	struct rte_mbuf *m;
1492 	uint16_t qm;
1493 	uint32_t i, j, e, range;
1494 	double N0, llr_max;
1495 
1496 	e = ref_op->ldpc_dec.cb_params.e;
1497 	qm = ref_op->ldpc_dec.q_m;
1498 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1499 	range = e / qm;
1500 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1501 
1502 	for (i = 0; i < n; ++i) {
1503 		m = inputs[i].data;
1504 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1505 		if (qm == 8) {
1506 			for (j = 0; j < range; ++j)
1507 				gen_qm8_llr(llrs, j, N0, llr_max);
1508 		} else if (qm == 6) {
1509 			for (j = 0; j < range; ++j)
1510 				gen_qm6_llr(llrs, j, N0, llr_max);
1511 		} else if (qm == 4) {
1512 			for (j = 0; j < range; ++j)
1513 				gen_qm4_llr(llrs, j, N0, llr_max);
1514 		} else {
1515 			for (j = 0; j < e; ++j)
1516 				gen_qm2_llr(llrs, j, N0, llr_max);
1517 		}
1518 	}
1519 }
1520 
1521 static void
1522 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1523 		unsigned int start_idx,
1524 		struct rte_bbdev_op_data *inputs,
1525 		struct rte_bbdev_op_data *hard_outputs,
1526 		struct rte_bbdev_op_data *soft_outputs,
1527 		struct rte_bbdev_op_data *harq_inputs,
1528 		struct rte_bbdev_op_data *harq_outputs,
1529 		struct rte_bbdev_dec_op *ref_op)
1530 {
1531 	unsigned int i;
1532 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1533 
1534 	for (i = 0; i < n; ++i) {
1535 		if (ldpc_dec->code_block_mode == 0) {
1536 			ops[i]->ldpc_dec.tb_params.ea =
1537 					ldpc_dec->tb_params.ea;
1538 			ops[i]->ldpc_dec.tb_params.eb =
1539 					ldpc_dec->tb_params.eb;
1540 			ops[i]->ldpc_dec.tb_params.c =
1541 					ldpc_dec->tb_params.c;
1542 			ops[i]->ldpc_dec.tb_params.cab =
1543 					ldpc_dec->tb_params.cab;
1544 			ops[i]->ldpc_dec.tb_params.r =
1545 					ldpc_dec->tb_params.r;
1546 		} else {
1547 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1548 		}
1549 
1550 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1551 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1552 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1553 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1554 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1555 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1556 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1557 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1558 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1559 
1560 		if (hard_outputs != NULL)
1561 			ops[i]->ldpc_dec.hard_output =
1562 					hard_outputs[start_idx + i];
1563 		if (inputs != NULL)
1564 			ops[i]->ldpc_dec.input =
1565 					inputs[start_idx + i];
1566 		if (soft_outputs != NULL)
1567 			ops[i]->ldpc_dec.soft_output =
1568 					soft_outputs[start_idx + i];
1569 		if (harq_inputs != NULL)
1570 			ops[i]->ldpc_dec.harq_combined_input =
1571 					harq_inputs[start_idx + i];
1572 		if (harq_outputs != NULL)
1573 			ops[i]->ldpc_dec.harq_combined_output =
1574 					harq_outputs[start_idx + i];
1575 	}
1576 }
1577 
1578 
1579 static void
1580 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1581 		unsigned int start_idx,
1582 		struct rte_bbdev_op_data *inputs,
1583 		struct rte_bbdev_op_data *outputs,
1584 		struct rte_bbdev_enc_op *ref_op)
1585 {
1586 	unsigned int i;
1587 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1588 	for (i = 0; i < n; ++i) {
1589 		if (ldpc_enc->code_block_mode == 0) {
1590 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1591 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1592 			ops[i]->ldpc_enc.tb_params.cab =
1593 					ldpc_enc->tb_params.cab;
1594 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1595 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1596 		} else {
1597 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1598 		}
1599 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1600 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1601 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1602 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1603 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1604 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1605 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1606 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1607 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1608 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1609 	}
1610 }
1611 
1612 static int
1613 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1614 		unsigned int order_idx, const int expected_status)
1615 {
1616 	int status = op->status;
1617 	/* ignore parity mismatch false alarms for long iterations */
1618 	if (get_iter_max() >= 10) {
1619 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1620 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1621 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1622 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1623 		}
1624 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1625 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1626 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1627 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1628 		}
1629 	}
1630 
1631 	TEST_ASSERT(status == expected_status,
1632 			"op_status (%d) != expected_status (%d)",
1633 			op->status, expected_status);
1634 
1635 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1636 			"Ordering error, expected %p, got %p",
1637 			(void *)(uintptr_t)order_idx, op->opaque_data);
1638 
1639 	return TEST_SUCCESS;
1640 }
1641 
1642 static int
1643 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1644 		unsigned int order_idx, const int expected_status)
1645 {
1646 	TEST_ASSERT(op->status == expected_status,
1647 			"op_status (%d) != expected_status (%d)",
1648 			op->status, expected_status);
1649 
1650 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1651 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1652 				"Ordering error, expected %p, got %p",
1653 				(void *)(uintptr_t)order_idx, op->opaque_data);
1654 
1655 	return TEST_SUCCESS;
1656 }
1657 
1658 static inline int
1659 validate_op_chain(struct rte_bbdev_op_data *op,
1660 		struct op_data_entries *orig_op)
1661 {
1662 	uint8_t i;
1663 	struct rte_mbuf *m = op->data;
1664 	uint8_t nb_dst_segments = orig_op->nb_segments;
1665 	uint32_t total_data_size = 0;
1666 
1667 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1668 			"Number of segments differ in original (%u) and filled (%u) op",
1669 			nb_dst_segments, m->nb_segs);
1670 
1671 	/* Validate each mbuf segment length */
1672 	for (i = 0; i < nb_dst_segments; ++i) {
1673 		/* Apply offset to the first mbuf segment */
1674 		uint16_t offset = (i == 0) ? op->offset : 0;
1675 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1676 		total_data_size += orig_op->segments[i].length;
1677 
1678 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1679 				"Length of segment differ in original (%u) and filled (%u) op",
1680 				orig_op->segments[i].length, data_len);
1681 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1682 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1683 				data_len,
1684 				"Output buffers (CB=%u) are not equal", i);
1685 		m = m->next;
1686 	}
1687 
1688 	/* Validate total mbuf pkt length */
1689 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1690 	TEST_ASSERT(total_data_size == pkt_len,
1691 			"Length of data differ in original (%u) and filled (%u) op",
1692 			total_data_size, pkt_len);
1693 
1694 	return TEST_SUCCESS;
1695 }
1696 
1697 /*
1698  * Compute K0 for a given configuration for HARQ output length computation
1699  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1700  */
1701 static inline uint16_t
1702 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1703 {
1704 	if (rv_index == 0)
1705 		return 0;
1706 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1707 	if (n_cb == n) {
1708 		if (rv_index == 1)
1709 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1710 		else if (rv_index == 2)
1711 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1712 		else
1713 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1714 	}
1715 	/* LBRM case - includes a division by N */
1716 	if (rv_index == 1)
1717 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1718 				/ n) * z_c;
1719 	else if (rv_index == 2)
1720 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1721 				/ n) * z_c;
1722 	else
1723 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1724 				/ n) * z_c;
1725 }
1726 
1727 /* HARQ output length including the Filler bits */
1728 static inline uint16_t
1729 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1730 {
1731 	uint16_t k0 = 0;
1732 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1733 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1734 	/* Compute RM out size and number of rows */
1735 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1736 			* ops_ld->z_c - ops_ld->n_filler;
1737 	uint16_t deRmOutSize = RTE_MIN(
1738 			k0 + ops_ld->cb_params.e +
1739 			((k0 > parity_offset) ?
1740 					0 : ops_ld->n_filler),
1741 					ops_ld->n_cb);
1742 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1743 			/ ops_ld->z_c);
1744 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1745 	return harq_output_len;
1746 }
1747 
1748 static inline int
1749 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1750 		struct op_data_entries *orig_op,
1751 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1752 {
1753 	uint8_t i;
1754 	uint32_t j, jj, k;
1755 	struct rte_mbuf *m = op->data;
1756 	uint8_t nb_dst_segments = orig_op->nb_segments;
1757 	uint32_t total_data_size = 0;
1758 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1759 	uint32_t byte_error = 0, cum_error = 0, error;
1760 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1761 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1762 	uint16_t parity_offset;
1763 
1764 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1765 			"Number of segments differ in original (%u) and filled (%u) op",
1766 			nb_dst_segments, m->nb_segs);
1767 
1768 	/* Validate each mbuf segment length */
1769 	for (i = 0; i < nb_dst_segments; ++i) {
1770 		/* Apply offset to the first mbuf segment */
1771 		uint16_t offset = (i == 0) ? op->offset : 0;
1772 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1773 		total_data_size += orig_op->segments[i].length;
1774 
1775 		TEST_ASSERT(orig_op->segments[i].length <
1776 				(uint32_t)(data_len + 64),
1777 				"Length of segment differ in original (%u) and filled (%u) op",
1778 				orig_op->segments[i].length, data_len);
1779 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1780 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1781 
1782 		if (!(ldpc_cap_flags &
1783 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1784 				) || (ops_ld->op_flags &
1785 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1786 			data_len -= ops_ld->z_c;
1787 			parity_offset = data_len;
1788 		} else {
1789 			/* Compute RM out size and number of rows */
1790 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1791 					* ops_ld->z_c - ops_ld->n_filler;
1792 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1793 					ops_ld->n_filler;
1794 			if (data_len > deRmOutSize)
1795 				data_len = deRmOutSize;
1796 			if (data_len > orig_op->segments[i].length)
1797 				data_len = orig_op->segments[i].length;
1798 		}
1799 		/*
1800 		 * HARQ output can have minor differences
1801 		 * due to integer representation and related scaling
1802 		 */
1803 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1804 			if (j == parity_offset) {
1805 				/* Special Handling of the filler bits */
1806 				for (k = 0; k < ops_ld->n_filler; k++) {
1807 					if (harq_out[jj] !=
1808 							llr_max_pre_scaling) {
1809 						printf("HARQ Filler issue %d: %d %d\n",
1810 							jj, harq_out[jj],
1811 							llr_max);
1812 						byte_error++;
1813 					}
1814 					jj++;
1815 				}
1816 			}
1817 			if (!(ops_ld->op_flags &
1818 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1819 				if (ldpc_llr_decimals > 1)
1820 					harq_out[jj] = (harq_out[jj] + 1)
1821 						>> (ldpc_llr_decimals - 1);
1822 				/* Saturated to S7 */
1823 				if (harq_orig[j] > llr_max)
1824 					harq_orig[j] = llr_max;
1825 				if (harq_orig[j] < -llr_max)
1826 					harq_orig[j] = -llr_max;
1827 			}
1828 			if (harq_orig[j] != harq_out[jj]) {
1829 				error = (harq_orig[j] > harq_out[jj]) ?
1830 						harq_orig[j] - harq_out[jj] :
1831 						harq_out[jj] - harq_orig[j];
1832 				abs_harq_origin = harq_orig[j] > 0 ?
1833 							harq_orig[j] :
1834 							-harq_orig[j];
1835 				/* Residual quantization error */
1836 				if ((error > 8 && (abs_harq_origin <
1837 						(llr_max - 16))) ||
1838 						(error > 16)) {
1839 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1840 							j, harq_orig[j],
1841 							harq_out[jj], error);
1842 					byte_error++;
1843 					cum_error += error;
1844 				}
1845 			}
1846 		}
1847 		m = m->next;
1848 	}
1849 
1850 	if (byte_error)
1851 		TEST_ASSERT(byte_error <= 1,
1852 				"HARQ output mismatch (%d) %d",
1853 				byte_error, cum_error);
1854 
1855 	/* Validate total mbuf pkt length */
1856 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1857 	TEST_ASSERT(total_data_size < pkt_len + 64,
1858 			"Length of data differ in original (%u) and filled (%u) op",
1859 			total_data_size, pkt_len);
1860 
1861 	return TEST_SUCCESS;
1862 }
1863 
1864 static int
1865 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1866 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1867 {
1868 	unsigned int i;
1869 	int ret;
1870 	struct op_data_entries *hard_data_orig =
1871 			&test_vector.entries[DATA_HARD_OUTPUT];
1872 	struct op_data_entries *soft_data_orig =
1873 			&test_vector.entries[DATA_SOFT_OUTPUT];
1874 	struct rte_bbdev_op_turbo_dec *ops_td;
1875 	struct rte_bbdev_op_data *hard_output;
1876 	struct rte_bbdev_op_data *soft_output;
1877 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1878 
1879 	for (i = 0; i < n; ++i) {
1880 		ops_td = &ops[i]->turbo_dec;
1881 		hard_output = &ops_td->hard_output;
1882 		soft_output = &ops_td->soft_output;
1883 
1884 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1885 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1886 					"Returned iter_count (%d) > expected iter_count (%d)",
1887 					ops_td->iter_count, ref_td->iter_count);
1888 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1889 		TEST_ASSERT_SUCCESS(ret,
1890 				"Checking status and ordering for decoder failed");
1891 
1892 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1893 				hard_data_orig),
1894 				"Hard output buffers (CB=%u) are not equal",
1895 				i);
1896 
1897 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1898 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1899 					soft_data_orig),
1900 					"Soft output buffers (CB=%u) are not equal",
1901 					i);
1902 	}
1903 
1904 	return TEST_SUCCESS;
1905 }
1906 
1907 /* Check Number of code blocks errors */
1908 static int
1909 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
1910 {
1911 	unsigned int i;
1912 	struct op_data_entries *hard_data_orig =
1913 			&test_vector.entries[DATA_HARD_OUTPUT];
1914 	struct rte_bbdev_op_ldpc_dec *ops_td;
1915 	struct rte_bbdev_op_data *hard_output;
1916 	int errors = 0;
1917 	struct rte_mbuf *m;
1918 
1919 	for (i = 0; i < n; ++i) {
1920 		ops_td = &ops[i]->ldpc_dec;
1921 		hard_output = &ops_td->hard_output;
1922 		m = hard_output->data;
1923 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
1924 				hard_data_orig->segments[0].addr,
1925 				hard_data_orig->segments[0].length))
1926 			errors++;
1927 	}
1928 	return errors;
1929 }
1930 
1931 static int
1932 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1933 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1934 {
1935 	unsigned int i;
1936 	int ret;
1937 	struct op_data_entries *hard_data_orig =
1938 			&test_vector.entries[DATA_HARD_OUTPUT];
1939 	struct op_data_entries *soft_data_orig =
1940 			&test_vector.entries[DATA_SOFT_OUTPUT];
1941 	struct op_data_entries *harq_data_orig =
1942 				&test_vector.entries[DATA_HARQ_OUTPUT];
1943 	struct rte_bbdev_op_ldpc_dec *ops_td;
1944 	struct rte_bbdev_op_data *hard_output;
1945 	struct rte_bbdev_op_data *harq_output;
1946 	struct rte_bbdev_op_data *soft_output;
1947 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1948 
1949 	for (i = 0; i < n; ++i) {
1950 		ops_td = &ops[i]->ldpc_dec;
1951 		hard_output = &ops_td->hard_output;
1952 		harq_output = &ops_td->harq_combined_output;
1953 		soft_output = &ops_td->soft_output;
1954 
1955 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1956 		TEST_ASSERT_SUCCESS(ret,
1957 				"Checking status and ordering for decoder failed");
1958 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1959 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1960 					"Returned iter_count (%d) > expected iter_count (%d)",
1961 					ops_td->iter_count, ref_td->iter_count);
1962 		/*
1963 		 * We can ignore output data when the decoding failed to
1964 		 * converge or for loop-back cases
1965 		 */
1966 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
1967 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
1968 				) && (
1969 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
1970 						)) == 0)
1971 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1972 					hard_data_orig),
1973 					"Hard output buffers (CB=%u) are not equal",
1974 					i);
1975 
1976 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1977 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1978 					soft_data_orig),
1979 					"Soft output buffers (CB=%u) are not equal",
1980 					i);
1981 		if (ref_op->ldpc_dec.op_flags &
1982 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1983 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1984 					harq_data_orig, ops_td),
1985 					"HARQ output buffers (CB=%u) are not equal",
1986 					i);
1987 		}
1988 		if (ref_op->ldpc_dec.op_flags &
1989 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1990 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1991 					harq_data_orig, ops_td),
1992 					"HARQ output buffers (CB=%u) are not equal",
1993 					i);
1994 
1995 	}
1996 
1997 	return TEST_SUCCESS;
1998 }
1999 
2000 
2001 static int
2002 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2003 		struct rte_bbdev_enc_op *ref_op)
2004 {
2005 	unsigned int i;
2006 	int ret;
2007 	struct op_data_entries *hard_data_orig =
2008 			&test_vector.entries[DATA_HARD_OUTPUT];
2009 
2010 	for (i = 0; i < n; ++i) {
2011 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2012 		TEST_ASSERT_SUCCESS(ret,
2013 				"Checking status and ordering for encoder failed");
2014 		TEST_ASSERT_SUCCESS(validate_op_chain(
2015 				&ops[i]->turbo_enc.output,
2016 				hard_data_orig),
2017 				"Output buffers (CB=%u) are not equal",
2018 				i);
2019 	}
2020 
2021 	return TEST_SUCCESS;
2022 }
2023 
2024 static int
2025 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2026 		struct rte_bbdev_enc_op *ref_op)
2027 {
2028 	unsigned int i;
2029 	int ret;
2030 	struct op_data_entries *hard_data_orig =
2031 			&test_vector.entries[DATA_HARD_OUTPUT];
2032 
2033 	for (i = 0; i < n; ++i) {
2034 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2035 		TEST_ASSERT_SUCCESS(ret,
2036 				"Checking status and ordering for encoder failed");
2037 		TEST_ASSERT_SUCCESS(validate_op_chain(
2038 				&ops[i]->ldpc_enc.output,
2039 				hard_data_orig),
2040 				"Output buffers (CB=%u) are not equal",
2041 				i);
2042 	}
2043 
2044 	return TEST_SUCCESS;
2045 }
2046 
2047 static void
2048 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2049 {
2050 	unsigned int i;
2051 	struct op_data_entries *entry;
2052 
2053 	op->turbo_dec = test_vector.turbo_dec;
2054 	entry = &test_vector.entries[DATA_INPUT];
2055 	for (i = 0; i < entry->nb_segments; ++i)
2056 		op->turbo_dec.input.length +=
2057 				entry->segments[i].length;
2058 }
2059 
2060 static void
2061 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2062 {
2063 	unsigned int i;
2064 	struct op_data_entries *entry;
2065 
2066 	op->ldpc_dec = test_vector.ldpc_dec;
2067 	entry = &test_vector.entries[DATA_INPUT];
2068 	for (i = 0; i < entry->nb_segments; ++i)
2069 		op->ldpc_dec.input.length +=
2070 				entry->segments[i].length;
2071 	if (test_vector.ldpc_dec.op_flags &
2072 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2073 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2074 		for (i = 0; i < entry->nb_segments; ++i)
2075 			op->ldpc_dec.harq_combined_input.length +=
2076 				entry->segments[i].length;
2077 	}
2078 }
2079 
2080 
2081 static void
2082 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2083 {
2084 	unsigned int i;
2085 	struct op_data_entries *entry;
2086 
2087 	op->turbo_enc = test_vector.turbo_enc;
2088 	entry = &test_vector.entries[DATA_INPUT];
2089 	for (i = 0; i < entry->nb_segments; ++i)
2090 		op->turbo_enc.input.length +=
2091 				entry->segments[i].length;
2092 }
2093 
2094 static void
2095 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2096 {
2097 	unsigned int i;
2098 	struct op_data_entries *entry;
2099 
2100 	op->ldpc_enc = test_vector.ldpc_enc;
2101 	entry = &test_vector.entries[DATA_INPUT];
2102 	for (i = 0; i < entry->nb_segments; ++i)
2103 		op->ldpc_enc.input.length +=
2104 				entry->segments[i].length;
2105 }
2106 
2107 static uint32_t
2108 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2109 {
2110 	uint8_t i;
2111 	uint32_t c, r, tb_size = 0;
2112 
2113 	if (op->turbo_dec.code_block_mode) {
2114 		tb_size = op->turbo_dec.tb_params.k_neg;
2115 	} else {
2116 		c = op->turbo_dec.tb_params.c;
2117 		r = op->turbo_dec.tb_params.r;
2118 		for (i = 0; i < c-r; i++)
2119 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2120 				op->turbo_dec.tb_params.k_neg :
2121 				op->turbo_dec.tb_params.k_pos;
2122 	}
2123 	return tb_size;
2124 }
2125 
2126 static uint32_t
2127 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2128 {
2129 	uint8_t i;
2130 	uint32_t c, r, tb_size = 0;
2131 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2132 
2133 	if (op->ldpc_dec.code_block_mode) {
2134 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2135 	} else {
2136 		c = op->ldpc_dec.tb_params.c;
2137 		r = op->ldpc_dec.tb_params.r;
2138 		for (i = 0; i < c-r; i++)
2139 			tb_size += sys_cols * op->ldpc_dec.z_c
2140 					- op->ldpc_dec.n_filler;
2141 	}
2142 	return tb_size;
2143 }
2144 
2145 static uint32_t
2146 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2147 {
2148 	uint8_t i;
2149 	uint32_t c, r, tb_size = 0;
2150 
2151 	if (op->turbo_enc.code_block_mode) {
2152 		tb_size = op->turbo_enc.tb_params.k_neg;
2153 	} else {
2154 		c = op->turbo_enc.tb_params.c;
2155 		r = op->turbo_enc.tb_params.r;
2156 		for (i = 0; i < c-r; i++)
2157 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2158 				op->turbo_enc.tb_params.k_neg :
2159 				op->turbo_enc.tb_params.k_pos;
2160 	}
2161 	return tb_size;
2162 }
2163 
2164 static uint32_t
2165 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2166 {
2167 	uint8_t i;
2168 	uint32_t c, r, tb_size = 0;
2169 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2170 
2171 	if (op->turbo_enc.code_block_mode) {
2172 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2173 	} else {
2174 		c = op->turbo_enc.tb_params.c;
2175 		r = op->turbo_enc.tb_params.r;
2176 		for (i = 0; i < c-r; i++)
2177 			tb_size += sys_cols * op->ldpc_enc.z_c
2178 					- op->ldpc_enc.n_filler;
2179 	}
2180 	return tb_size;
2181 }
2182 
2183 
2184 static int
2185 init_test_op_params(struct test_op_params *op_params,
2186 		enum rte_bbdev_op_type op_type, const int expected_status,
2187 		const int vector_mask, struct rte_mempool *ops_mp,
2188 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2189 {
2190 	int ret = 0;
2191 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2192 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2193 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2194 				&op_params->ref_dec_op, 1);
2195 	else
2196 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2197 				&op_params->ref_enc_op, 1);
2198 
2199 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2200 
2201 	op_params->mp = ops_mp;
2202 	op_params->burst_sz = burst_sz;
2203 	op_params->num_to_process = num_to_process;
2204 	op_params->num_lcores = num_lcores;
2205 	op_params->vector_mask = vector_mask;
2206 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2207 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2208 		op_params->ref_dec_op->status = expected_status;
2209 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2210 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2211 		op_params->ref_enc_op->status = expected_status;
2212 	return 0;
2213 }
2214 
2215 static int
2216 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2217 		struct test_op_params *op_params)
2218 {
2219 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2220 	unsigned int i;
2221 	struct active_device *ad;
2222 	unsigned int burst_sz = get_burst_sz();
2223 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2224 	const struct rte_bbdev_op_cap *capabilities = NULL;
2225 
2226 	ad = &active_devs[dev_id];
2227 
2228 	/* Check if device supports op_type */
2229 	if (!is_avail_op(ad, test_vector.op_type))
2230 		return TEST_SUCCESS;
2231 
2232 	struct rte_bbdev_info info;
2233 	rte_bbdev_info_get(ad->dev_id, &info);
2234 	socket_id = GET_SOCKET(info.socket_id);
2235 
2236 	f_ret = create_mempools(ad, socket_id, op_type,
2237 			get_num_ops());
2238 	if (f_ret != TEST_SUCCESS) {
2239 		printf("Couldn't create mempools");
2240 		goto fail;
2241 	}
2242 	if (op_type == RTE_BBDEV_OP_NONE)
2243 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2244 
2245 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2246 			test_vector.expected_status,
2247 			test_vector.mask,
2248 			ad->ops_mempool,
2249 			burst_sz,
2250 			get_num_ops(),
2251 			get_num_lcores());
2252 	if (f_ret != TEST_SUCCESS) {
2253 		printf("Couldn't init test op params");
2254 		goto fail;
2255 	}
2256 
2257 
2258 	/* Find capabilities */
2259 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2260 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2261 		if (cap->type == test_vector.op_type) {
2262 			capabilities = cap;
2263 			break;
2264 		}
2265 		cap++;
2266 	}
2267 	TEST_ASSERT_NOT_NULL(capabilities,
2268 			"Couldn't find capabilities");
2269 
2270 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2271 		create_reference_dec_op(op_params->ref_dec_op);
2272 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2273 		create_reference_enc_op(op_params->ref_enc_op);
2274 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2275 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2276 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2277 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2278 
2279 	for (i = 0; i < ad->nb_queues; ++i) {
2280 		f_ret = fill_queue_buffers(op_params,
2281 				ad->in_mbuf_pool,
2282 				ad->hard_out_mbuf_pool,
2283 				ad->soft_out_mbuf_pool,
2284 				ad->harq_in_mbuf_pool,
2285 				ad->harq_out_mbuf_pool,
2286 				ad->queue_ids[i],
2287 				capabilities,
2288 				info.drv.min_alignment,
2289 				socket_id);
2290 		if (f_ret != TEST_SUCCESS) {
2291 			printf("Couldn't init queue buffers");
2292 			goto fail;
2293 		}
2294 	}
2295 
2296 	/* Run test case function */
2297 	t_ret = test_case_func(ad, op_params);
2298 
2299 	/* Free active device resources and return */
2300 	free_buffers(ad, op_params);
2301 	return t_ret;
2302 
2303 fail:
2304 	free_buffers(ad, op_params);
2305 	return TEST_FAILED;
2306 }
2307 
2308 /* Run given test function per active device per supported op type
2309  * per burst size.
2310  */
2311 static int
2312 run_test_case(test_case_function *test_case_func)
2313 {
2314 	int ret = 0;
2315 	uint8_t dev;
2316 
2317 	/* Alloc op_params */
2318 	struct test_op_params *op_params = rte_zmalloc(NULL,
2319 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2320 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2321 			RTE_ALIGN(sizeof(struct test_op_params),
2322 				RTE_CACHE_LINE_SIZE));
2323 
2324 	/* For each device run test case function */
2325 	for (dev = 0; dev < nb_active_devs; ++dev)
2326 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2327 
2328 	rte_free(op_params);
2329 
2330 	return ret;
2331 }
2332 
2333 
2334 /* Push back the HARQ output from DDR to host */
2335 static void
2336 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2337 		struct rte_bbdev_dec_op **ops,
2338 		const uint16_t n)
2339 {
2340 	uint16_t j;
2341 	int save_status, ret;
2342 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2343 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2344 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2345 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2346 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2347 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2348 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2349 	for (j = 0; j < n; ++j) {
2350 		if ((loopback && mem_out) || hc_out) {
2351 			save_status = ops[j]->status;
2352 			ops[j]->ldpc_dec.op_flags =
2353 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2354 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2355 			if (h_comp)
2356 				ops[j]->ldpc_dec.op_flags +=
2357 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2358 			ops[j]->ldpc_dec.harq_combined_input.offset =
2359 					harq_offset;
2360 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2361 			harq_offset += HARQ_INCR;
2362 			if (!loopback)
2363 				ops[j]->ldpc_dec.harq_combined_input.length =
2364 				ops[j]->ldpc_dec.harq_combined_output.length;
2365 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2366 					&ops[j], 1);
2367 			ret = 0;
2368 			while (ret == 0)
2369 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2370 						dev_id, queue_id,
2371 						&ops_deq[j], 1);
2372 			ops[j]->ldpc_dec.op_flags = flags;
2373 			ops[j]->status = save_status;
2374 		}
2375 	}
2376 }
2377 
2378 /*
2379  * Push back the HARQ output from HW DDR to Host
2380  * Preload HARQ memory input and adjust HARQ offset
2381  */
2382 static void
2383 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2384 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2385 		bool preload)
2386 {
2387 	uint16_t j;
2388 	int ret;
2389 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2390 	struct rte_bbdev_op_data save_hc_in, save_hc_out;
2391 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2392 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2393 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2394 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2395 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2396 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2397 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2398 	for (j = 0; j < n; ++j) {
2399 		if ((mem_in || hc_in) && preload) {
2400 			save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2401 			save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2402 			ops[j]->ldpc_dec.op_flags =
2403 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2404 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2405 			if (h_comp)
2406 				ops[j]->ldpc_dec.op_flags +=
2407 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2408 			ops[j]->ldpc_dec.harq_combined_output.offset =
2409 					harq_offset;
2410 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2411 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2412 					&ops[j], 1);
2413 			ret = 0;
2414 			while (ret == 0)
2415 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2416 					dev_id, queue_id, &ops_deq[j], 1);
2417 			ops[j]->ldpc_dec.op_flags = flags;
2418 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2419 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2420 		}
2421 		/* Adjust HARQ offset when we reach external DDR */
2422 		if (mem_in || hc_in)
2423 			ops[j]->ldpc_dec.harq_combined_input.offset
2424 				= harq_offset;
2425 		if (mem_out || hc_out)
2426 			ops[j]->ldpc_dec.harq_combined_output.offset
2427 				= harq_offset;
2428 		harq_offset += HARQ_INCR;
2429 	}
2430 }
2431 
2432 static void
2433 dequeue_event_callback(uint16_t dev_id,
2434 		enum rte_bbdev_event_type event, void *cb_arg,
2435 		void *ret_param)
2436 {
2437 	int ret;
2438 	uint16_t i;
2439 	uint64_t total_time;
2440 	uint16_t deq, burst_sz, num_ops;
2441 	uint16_t queue_id = *(uint16_t *) ret_param;
2442 	struct rte_bbdev_info info;
2443 	double tb_len_bits;
2444 	struct thread_params *tp = cb_arg;
2445 
2446 	/* Find matching thread params using queue_id */
2447 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2448 		if (tp->queue_id == queue_id)
2449 			break;
2450 
2451 	if (i == MAX_QUEUES) {
2452 		printf("%s: Queue_id from interrupt details was not found!\n",
2453 				__func__);
2454 		return;
2455 	}
2456 
2457 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2458 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2459 		printf(
2460 			"Dequeue interrupt handler called for incorrect event!\n");
2461 		return;
2462 	}
2463 
2464 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2465 	num_ops = tp->op_params->num_to_process;
2466 
2467 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2468 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2469 				&tp->dec_ops[
2470 					rte_atomic16_read(&tp->nb_dequeued)],
2471 				burst_sz);
2472 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2473 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2474 				&tp->dec_ops[
2475 					rte_atomic16_read(&tp->nb_dequeued)],
2476 				burst_sz);
2477 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2478 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2479 				&tp->enc_ops[
2480 					rte_atomic16_read(&tp->nb_dequeued)],
2481 				burst_sz);
2482 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2483 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2484 				&tp->enc_ops[
2485 					rte_atomic16_read(&tp->nb_dequeued)],
2486 				burst_sz);
2487 
2488 	if (deq < burst_sz) {
2489 		printf(
2490 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2491 			burst_sz, deq);
2492 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2493 		return;
2494 	}
2495 
2496 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2497 		rte_atomic16_add(&tp->nb_dequeued, deq);
2498 		return;
2499 	}
2500 
2501 	total_time = rte_rdtsc_precise() - tp->start_time;
2502 
2503 	rte_bbdev_info_get(dev_id, &info);
2504 
2505 	ret = TEST_SUCCESS;
2506 
2507 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2508 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2509 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2510 				tp->op_params->vector_mask);
2511 		/* get the max of iter_count for all dequeued ops */
2512 		for (i = 0; i < num_ops; ++i)
2513 			tp->iter_count = RTE_MAX(
2514 					tp->dec_ops[i]->turbo_dec.iter_count,
2515 					tp->iter_count);
2516 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2517 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2518 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2519 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2520 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2521 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2522 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2523 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2524 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2525 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2526 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2527 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2528 				tp->op_params->vector_mask);
2529 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2530 	}
2531 
2532 	if (ret) {
2533 		printf("Buffers validation failed\n");
2534 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2535 	}
2536 
2537 	switch (test_vector.op_type) {
2538 	case RTE_BBDEV_OP_TURBO_DEC:
2539 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2540 		break;
2541 	case RTE_BBDEV_OP_TURBO_ENC:
2542 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2543 		break;
2544 	case RTE_BBDEV_OP_LDPC_DEC:
2545 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2546 		break;
2547 	case RTE_BBDEV_OP_LDPC_ENC:
2548 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2549 		break;
2550 	case RTE_BBDEV_OP_NONE:
2551 		tb_len_bits = 0.0;
2552 		break;
2553 	default:
2554 		printf("Unknown op type: %d\n", test_vector.op_type);
2555 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2556 		return;
2557 	}
2558 
2559 	tp->ops_per_sec += ((double)num_ops) /
2560 			((double)total_time / (double)rte_get_tsc_hz());
2561 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2562 			((double)total_time / (double)rte_get_tsc_hz());
2563 
2564 	rte_atomic16_add(&tp->nb_dequeued, deq);
2565 }
2566 
2567 static int
2568 throughput_intr_lcore_ldpc_dec(void *arg)
2569 {
2570 	struct thread_params *tp = arg;
2571 	unsigned int enqueued;
2572 	const uint16_t queue_id = tp->queue_id;
2573 	const uint16_t burst_sz = tp->op_params->burst_sz;
2574 	const uint16_t num_to_process = tp->op_params->num_to_process;
2575 	struct rte_bbdev_dec_op *ops[num_to_process];
2576 	struct test_buffers *bufs = NULL;
2577 	struct rte_bbdev_info info;
2578 	int ret, i, j;
2579 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2580 	uint16_t num_to_enq, enq;
2581 
2582 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2583 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2584 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2585 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2586 
2587 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2588 			"BURST_SIZE should be <= %u", MAX_BURST);
2589 
2590 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2591 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2592 			tp->dev_id, queue_id);
2593 
2594 	rte_bbdev_info_get(tp->dev_id, &info);
2595 
2596 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2597 			"NUM_OPS cannot exceed %u for this device",
2598 			info.drv.queue_size_lim);
2599 
2600 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2601 
2602 	rte_atomic16_clear(&tp->processing_status);
2603 	rte_atomic16_clear(&tp->nb_dequeued);
2604 
2605 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2606 		rte_pause();
2607 
2608 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2609 				num_to_process);
2610 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2611 			num_to_process);
2612 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2613 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2614 				bufs->hard_outputs, bufs->soft_outputs,
2615 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2616 
2617 	/* Set counter to validate the ordering */
2618 	for (j = 0; j < num_to_process; ++j)
2619 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2620 
2621 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2622 		for (i = 0; i < num_to_process; ++i) {
2623 			if (!loopback)
2624 				rte_pktmbuf_reset(
2625 					ops[i]->ldpc_dec.hard_output.data);
2626 			if (hc_out || loopback)
2627 				mbuf_reset(
2628 				ops[i]->ldpc_dec.harq_combined_output.data);
2629 		}
2630 
2631 		tp->start_time = rte_rdtsc_precise();
2632 		for (enqueued = 0; enqueued < num_to_process;) {
2633 			num_to_enq = burst_sz;
2634 
2635 			if (unlikely(num_to_process - enqueued < num_to_enq))
2636 				num_to_enq = num_to_process - enqueued;
2637 
2638 			enq = 0;
2639 			do {
2640 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2641 						tp->dev_id,
2642 						queue_id, &ops[enqueued],
2643 						num_to_enq);
2644 			} while (unlikely(num_to_enq != enq));
2645 			enqueued += enq;
2646 
2647 			/* Write to thread burst_sz current number of enqueued
2648 			 * descriptors. It ensures that proper number of
2649 			 * descriptors will be dequeued in callback
2650 			 * function - needed for last batch in case where
2651 			 * the number of operations is not a multiple of
2652 			 * burst size.
2653 			 */
2654 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2655 
2656 			/* Wait until processing of previous batch is
2657 			 * completed
2658 			 */
2659 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2660 					(int16_t) enqueued)
2661 				rte_pause();
2662 		}
2663 		if (j != TEST_REPETITIONS - 1)
2664 			rte_atomic16_clear(&tp->nb_dequeued);
2665 	}
2666 
2667 	return TEST_SUCCESS;
2668 }
2669 
2670 static int
2671 throughput_intr_lcore_dec(void *arg)
2672 {
2673 	struct thread_params *tp = arg;
2674 	unsigned int enqueued;
2675 	const uint16_t queue_id = tp->queue_id;
2676 	const uint16_t burst_sz = tp->op_params->burst_sz;
2677 	const uint16_t num_to_process = tp->op_params->num_to_process;
2678 	struct rte_bbdev_dec_op *ops[num_to_process];
2679 	struct test_buffers *bufs = NULL;
2680 	struct rte_bbdev_info info;
2681 	int ret, i, j;
2682 	uint16_t num_to_enq, enq;
2683 
2684 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2685 			"BURST_SIZE should be <= %u", MAX_BURST);
2686 
2687 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2688 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2689 			tp->dev_id, queue_id);
2690 
2691 	rte_bbdev_info_get(tp->dev_id, &info);
2692 
2693 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2694 			"NUM_OPS cannot exceed %u for this device",
2695 			info.drv.queue_size_lim);
2696 
2697 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2698 
2699 	rte_atomic16_clear(&tp->processing_status);
2700 	rte_atomic16_clear(&tp->nb_dequeued);
2701 
2702 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2703 		rte_pause();
2704 
2705 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2706 				num_to_process);
2707 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2708 			num_to_process);
2709 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2710 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2711 				bufs->hard_outputs, bufs->soft_outputs,
2712 				tp->op_params->ref_dec_op);
2713 
2714 	/* Set counter to validate the ordering */
2715 	for (j = 0; j < num_to_process; ++j)
2716 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2717 
2718 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2719 		for (i = 0; i < num_to_process; ++i)
2720 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2721 
2722 		tp->start_time = rte_rdtsc_precise();
2723 		for (enqueued = 0; enqueued < num_to_process;) {
2724 			num_to_enq = burst_sz;
2725 
2726 			if (unlikely(num_to_process - enqueued < num_to_enq))
2727 				num_to_enq = num_to_process - enqueued;
2728 
2729 			enq = 0;
2730 			do {
2731 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2732 						queue_id, &ops[enqueued],
2733 						num_to_enq);
2734 			} while (unlikely(num_to_enq != enq));
2735 			enqueued += enq;
2736 
2737 			/* Write to thread burst_sz current number of enqueued
2738 			 * descriptors. It ensures that proper number of
2739 			 * descriptors will be dequeued in callback
2740 			 * function - needed for last batch in case where
2741 			 * the number of operations is not a multiple of
2742 			 * burst size.
2743 			 */
2744 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2745 
2746 			/* Wait until processing of previous batch is
2747 			 * completed
2748 			 */
2749 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2750 					(int16_t) enqueued)
2751 				rte_pause();
2752 		}
2753 		if (j != TEST_REPETITIONS - 1)
2754 			rte_atomic16_clear(&tp->nb_dequeued);
2755 	}
2756 
2757 	return TEST_SUCCESS;
2758 }
2759 
2760 static int
2761 throughput_intr_lcore_enc(void *arg)
2762 {
2763 	struct thread_params *tp = arg;
2764 	unsigned int enqueued;
2765 	const uint16_t queue_id = tp->queue_id;
2766 	const uint16_t burst_sz = tp->op_params->burst_sz;
2767 	const uint16_t num_to_process = tp->op_params->num_to_process;
2768 	struct rte_bbdev_enc_op *ops[num_to_process];
2769 	struct test_buffers *bufs = NULL;
2770 	struct rte_bbdev_info info;
2771 	int ret, i, j;
2772 	uint16_t num_to_enq, enq;
2773 
2774 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2775 			"BURST_SIZE should be <= %u", MAX_BURST);
2776 
2777 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2778 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2779 			tp->dev_id, queue_id);
2780 
2781 	rte_bbdev_info_get(tp->dev_id, &info);
2782 
2783 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2784 			"NUM_OPS cannot exceed %u for this device",
2785 			info.drv.queue_size_lim);
2786 
2787 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2788 
2789 	rte_atomic16_clear(&tp->processing_status);
2790 	rte_atomic16_clear(&tp->nb_dequeued);
2791 
2792 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2793 		rte_pause();
2794 
2795 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2796 			num_to_process);
2797 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2798 			num_to_process);
2799 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2800 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2801 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2802 
2803 	/* Set counter to validate the ordering */
2804 	for (j = 0; j < num_to_process; ++j)
2805 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2806 
2807 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2808 		for (i = 0; i < num_to_process; ++i)
2809 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2810 
2811 		tp->start_time = rte_rdtsc_precise();
2812 		for (enqueued = 0; enqueued < num_to_process;) {
2813 			num_to_enq = burst_sz;
2814 
2815 			if (unlikely(num_to_process - enqueued < num_to_enq))
2816 				num_to_enq = num_to_process - enqueued;
2817 
2818 			enq = 0;
2819 			do {
2820 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2821 						queue_id, &ops[enqueued],
2822 						num_to_enq);
2823 			} while (unlikely(enq != num_to_enq));
2824 			enqueued += enq;
2825 
2826 			/* Write to thread burst_sz current number of enqueued
2827 			 * descriptors. It ensures that proper number of
2828 			 * descriptors will be dequeued in callback
2829 			 * function - needed for last batch in case where
2830 			 * the number of operations is not a multiple of
2831 			 * burst size.
2832 			 */
2833 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2834 
2835 			/* Wait until processing of previous batch is
2836 			 * completed
2837 			 */
2838 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2839 					(int16_t) enqueued)
2840 				rte_pause();
2841 		}
2842 		if (j != TEST_REPETITIONS - 1)
2843 			rte_atomic16_clear(&tp->nb_dequeued);
2844 	}
2845 
2846 	return TEST_SUCCESS;
2847 }
2848 
2849 
2850 static int
2851 throughput_intr_lcore_ldpc_enc(void *arg)
2852 {
2853 	struct thread_params *tp = arg;
2854 	unsigned int enqueued;
2855 	const uint16_t queue_id = tp->queue_id;
2856 	const uint16_t burst_sz = tp->op_params->burst_sz;
2857 	const uint16_t num_to_process = tp->op_params->num_to_process;
2858 	struct rte_bbdev_enc_op *ops[num_to_process];
2859 	struct test_buffers *bufs = NULL;
2860 	struct rte_bbdev_info info;
2861 	int ret, i, j;
2862 	uint16_t num_to_enq, enq;
2863 
2864 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2865 			"BURST_SIZE should be <= %u", MAX_BURST);
2866 
2867 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2868 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2869 			tp->dev_id, queue_id);
2870 
2871 	rte_bbdev_info_get(tp->dev_id, &info);
2872 
2873 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2874 			"NUM_OPS cannot exceed %u for this device",
2875 			info.drv.queue_size_lim);
2876 
2877 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2878 
2879 	rte_atomic16_clear(&tp->processing_status);
2880 	rte_atomic16_clear(&tp->nb_dequeued);
2881 
2882 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2883 		rte_pause();
2884 
2885 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2886 			num_to_process);
2887 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2888 			num_to_process);
2889 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2890 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
2891 				bufs->inputs, bufs->hard_outputs,
2892 				tp->op_params->ref_enc_op);
2893 
2894 	/* Set counter to validate the ordering */
2895 	for (j = 0; j < num_to_process; ++j)
2896 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2897 
2898 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2899 		for (i = 0; i < num_to_process; ++i)
2900 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2901 
2902 		tp->start_time = rte_rdtsc_precise();
2903 		for (enqueued = 0; enqueued < num_to_process;) {
2904 			num_to_enq = burst_sz;
2905 
2906 			if (unlikely(num_to_process - enqueued < num_to_enq))
2907 				num_to_enq = num_to_process - enqueued;
2908 
2909 			enq = 0;
2910 			do {
2911 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
2912 						tp->dev_id,
2913 						queue_id, &ops[enqueued],
2914 						num_to_enq);
2915 			} while (unlikely(enq != num_to_enq));
2916 			enqueued += enq;
2917 
2918 			/* Write to thread burst_sz current number of enqueued
2919 			 * descriptors. It ensures that proper number of
2920 			 * descriptors will be dequeued in callback
2921 			 * function - needed for last batch in case where
2922 			 * the number of operations is not a multiple of
2923 			 * burst size.
2924 			 */
2925 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2926 
2927 			/* Wait until processing of previous batch is
2928 			 * completed
2929 			 */
2930 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2931 					(int16_t) enqueued)
2932 				rte_pause();
2933 		}
2934 		if (j != TEST_REPETITIONS - 1)
2935 			rte_atomic16_clear(&tp->nb_dequeued);
2936 	}
2937 
2938 	return TEST_SUCCESS;
2939 }
2940 
2941 static int
2942 throughput_pmd_lcore_dec(void *arg)
2943 {
2944 	struct thread_params *tp = arg;
2945 	uint16_t enq, deq;
2946 	uint64_t total_time = 0, start_time;
2947 	const uint16_t queue_id = tp->queue_id;
2948 	const uint16_t burst_sz = tp->op_params->burst_sz;
2949 	const uint16_t num_ops = tp->op_params->num_to_process;
2950 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2951 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2952 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2953 	struct test_buffers *bufs = NULL;
2954 	int i, j, ret;
2955 	struct rte_bbdev_info info;
2956 	uint16_t num_to_enq;
2957 
2958 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2959 			"BURST_SIZE should be <= %u", MAX_BURST);
2960 
2961 	rte_bbdev_info_get(tp->dev_id, &info);
2962 
2963 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2964 			"NUM_OPS cannot exceed %u for this device",
2965 			info.drv.queue_size_lim);
2966 
2967 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2968 
2969 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2970 		rte_pause();
2971 
2972 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2973 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2974 
2975 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2976 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2977 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
2978 
2979 	/* Set counter to validate the ordering */
2980 	for (j = 0; j < num_ops; ++j)
2981 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2982 
2983 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2984 
2985 		for (j = 0; j < num_ops; ++j)
2986 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2987 
2988 		start_time = rte_rdtsc_precise();
2989 
2990 		for (enq = 0, deq = 0; enq < num_ops;) {
2991 			num_to_enq = burst_sz;
2992 
2993 			if (unlikely(num_ops - enq < num_to_enq))
2994 				num_to_enq = num_ops - enq;
2995 
2996 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2997 					queue_id, &ops_enq[enq], num_to_enq);
2998 
2999 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3000 					queue_id, &ops_deq[deq], enq - deq);
3001 		}
3002 
3003 		/* dequeue the remaining */
3004 		while (deq < enq) {
3005 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3006 					queue_id, &ops_deq[deq], enq - deq);
3007 		}
3008 
3009 		total_time += rte_rdtsc_precise() - start_time;
3010 	}
3011 
3012 	tp->iter_count = 0;
3013 	/* get the max of iter_count for all dequeued ops */
3014 	for (i = 0; i < num_ops; ++i) {
3015 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3016 				tp->iter_count);
3017 	}
3018 
3019 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3020 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3021 				tp->op_params->vector_mask);
3022 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3023 	}
3024 
3025 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3026 
3027 	double tb_len_bits = calc_dec_TB_size(ref_op);
3028 
3029 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3030 			((double)total_time / (double)rte_get_tsc_hz());
3031 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3032 			1000000.0) / ((double)total_time /
3033 			(double)rte_get_tsc_hz());
3034 
3035 	return TEST_SUCCESS;
3036 }
3037 
3038 static int
3039 bler_pmd_lcore_ldpc_dec(void *arg)
3040 {
3041 	struct thread_params *tp = arg;
3042 	uint16_t enq, deq;
3043 	uint64_t total_time = 0, start_time;
3044 	const uint16_t queue_id = tp->queue_id;
3045 	const uint16_t burst_sz = tp->op_params->burst_sz;
3046 	const uint16_t num_ops = tp->op_params->num_to_process;
3047 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3048 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3049 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3050 	struct test_buffers *bufs = NULL;
3051 	int i, j, ret;
3052 	float parity_bler = 0;
3053 	struct rte_bbdev_info info;
3054 	uint16_t num_to_enq;
3055 	bool extDdr = check_bit(ldpc_cap_flags,
3056 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3057 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3058 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3059 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3060 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3061 
3062 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3063 			"BURST_SIZE should be <= %u", MAX_BURST);
3064 
3065 	rte_bbdev_info_get(tp->dev_id, &info);
3066 
3067 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3068 			"NUM_OPS cannot exceed %u for this device",
3069 			info.drv.queue_size_lim);
3070 
3071 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3072 
3073 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3074 		rte_pause();
3075 
3076 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3077 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3078 
3079 	/* For BLER tests we need to enable early termination */
3080 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3081 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3082 		ref_op->ldpc_dec.op_flags +=
3083 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3084 	ref_op->ldpc_dec.iter_max = get_iter_max();
3085 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3086 
3087 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3088 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3089 				bufs->hard_outputs, bufs->soft_outputs,
3090 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3091 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3092 
3093 	/* Set counter to validate the ordering */
3094 	for (j = 0; j < num_ops; ++j)
3095 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3096 
3097 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3098 		for (j = 0; j < num_ops; ++j) {
3099 			if (!loopback)
3100 				mbuf_reset(
3101 				ops_enq[j]->ldpc_dec.hard_output.data);
3102 			if (hc_out || loopback)
3103 				mbuf_reset(
3104 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3105 		}
3106 		if (extDdr) {
3107 			bool preload = i == (TEST_REPETITIONS - 1);
3108 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3109 					num_ops, preload);
3110 		}
3111 		start_time = rte_rdtsc_precise();
3112 
3113 		for (enq = 0, deq = 0; enq < num_ops;) {
3114 			num_to_enq = burst_sz;
3115 
3116 			if (unlikely(num_ops - enq < num_to_enq))
3117 				num_to_enq = num_ops - enq;
3118 
3119 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3120 					queue_id, &ops_enq[enq], num_to_enq);
3121 
3122 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3123 					queue_id, &ops_deq[deq], enq - deq);
3124 		}
3125 
3126 		/* dequeue the remaining */
3127 		while (deq < enq) {
3128 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3129 					queue_id, &ops_deq[deq], enq - deq);
3130 		}
3131 
3132 		total_time += rte_rdtsc_precise() - start_time;
3133 	}
3134 
3135 	tp->iter_count = 0;
3136 	tp->iter_average = 0;
3137 	/* get the max of iter_count for all dequeued ops */
3138 	for (i = 0; i < num_ops; ++i) {
3139 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3140 				tp->iter_count);
3141 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3142 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3143 			parity_bler += 1.0;
3144 	}
3145 
3146 	parity_bler /= num_ops; /* This one is based on SYND */
3147 	tp->iter_average /= num_ops;
3148 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3149 
3150 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3151 			&& tp->bler == 0
3152 			&& parity_bler == 0
3153 			&& !hc_out) {
3154 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3155 				tp->op_params->vector_mask);
3156 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3157 	}
3158 
3159 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3160 
3161 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3162 	tp->ops_per_sec = ((double)num_ops * 1) /
3163 			((double)total_time / (double)rte_get_tsc_hz());
3164 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3165 			1000000.0) / ((double)total_time /
3166 			(double)rte_get_tsc_hz());
3167 
3168 	return TEST_SUCCESS;
3169 }
3170 
3171 static int
3172 throughput_pmd_lcore_ldpc_dec(void *arg)
3173 {
3174 	struct thread_params *tp = arg;
3175 	uint16_t enq, deq;
3176 	uint64_t total_time = 0, start_time;
3177 	const uint16_t queue_id = tp->queue_id;
3178 	const uint16_t burst_sz = tp->op_params->burst_sz;
3179 	const uint16_t num_ops = tp->op_params->num_to_process;
3180 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3181 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3182 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3183 	struct test_buffers *bufs = NULL;
3184 	int i, j, ret;
3185 	struct rte_bbdev_info info;
3186 	uint16_t num_to_enq;
3187 	bool extDdr = check_bit(ldpc_cap_flags,
3188 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3189 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3190 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3191 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3192 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3193 
3194 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3195 			"BURST_SIZE should be <= %u", MAX_BURST);
3196 
3197 	rte_bbdev_info_get(tp->dev_id, &info);
3198 
3199 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3200 			"NUM_OPS cannot exceed %u for this device",
3201 			info.drv.queue_size_lim);
3202 
3203 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3204 
3205 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3206 		rte_pause();
3207 
3208 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3209 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3210 
3211 	/* For throughput tests we need to disable early termination */
3212 	if (check_bit(ref_op->ldpc_dec.op_flags,
3213 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3214 		ref_op->ldpc_dec.op_flags -=
3215 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3216 	ref_op->ldpc_dec.iter_max = get_iter_max();
3217 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3218 
3219 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3220 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3221 				bufs->hard_outputs, bufs->soft_outputs,
3222 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3223 
3224 	/* Set counter to validate the ordering */
3225 	for (j = 0; j < num_ops; ++j)
3226 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3227 
3228 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3229 		for (j = 0; j < num_ops; ++j) {
3230 			if (!loopback)
3231 				mbuf_reset(
3232 				ops_enq[j]->ldpc_dec.hard_output.data);
3233 			if (hc_out || loopback)
3234 				mbuf_reset(
3235 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3236 		}
3237 		if (extDdr) {
3238 			bool preload = i == (TEST_REPETITIONS - 1);
3239 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3240 					num_ops, preload);
3241 		}
3242 		start_time = rte_rdtsc_precise();
3243 
3244 		for (enq = 0, deq = 0; enq < num_ops;) {
3245 			num_to_enq = burst_sz;
3246 
3247 			if (unlikely(num_ops - enq < num_to_enq))
3248 				num_to_enq = num_ops - enq;
3249 
3250 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3251 					queue_id, &ops_enq[enq], num_to_enq);
3252 
3253 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3254 					queue_id, &ops_deq[deq], enq - deq);
3255 		}
3256 
3257 		/* dequeue the remaining */
3258 		while (deq < enq) {
3259 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3260 					queue_id, &ops_deq[deq], enq - deq);
3261 		}
3262 
3263 		total_time += rte_rdtsc_precise() - start_time;
3264 	}
3265 
3266 	tp->iter_count = 0;
3267 	/* get the max of iter_count for all dequeued ops */
3268 	for (i = 0; i < num_ops; ++i) {
3269 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3270 				tp->iter_count);
3271 	}
3272 	if (extDdr) {
3273 		/* Read loopback is not thread safe */
3274 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3275 	}
3276 
3277 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3278 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3279 				tp->op_params->vector_mask);
3280 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3281 	}
3282 
3283 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3284 
3285 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3286 
3287 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3288 			((double)total_time / (double)rte_get_tsc_hz());
3289 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3290 			1000000.0) / ((double)total_time /
3291 			(double)rte_get_tsc_hz());
3292 
3293 	return TEST_SUCCESS;
3294 }
3295 
3296 static int
3297 throughput_pmd_lcore_enc(void *arg)
3298 {
3299 	struct thread_params *tp = arg;
3300 	uint16_t enq, deq;
3301 	uint64_t total_time = 0, start_time;
3302 	const uint16_t queue_id = tp->queue_id;
3303 	const uint16_t burst_sz = tp->op_params->burst_sz;
3304 	const uint16_t num_ops = tp->op_params->num_to_process;
3305 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3306 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3307 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3308 	struct test_buffers *bufs = NULL;
3309 	int i, j, ret;
3310 	struct rte_bbdev_info info;
3311 	uint16_t num_to_enq;
3312 
3313 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3314 			"BURST_SIZE should be <= %u", MAX_BURST);
3315 
3316 	rte_bbdev_info_get(tp->dev_id, &info);
3317 
3318 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3319 			"NUM_OPS cannot exceed %u for this device",
3320 			info.drv.queue_size_lim);
3321 
3322 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3323 
3324 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3325 		rte_pause();
3326 
3327 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3328 			num_ops);
3329 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3330 			num_ops);
3331 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3332 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3333 				bufs->hard_outputs, ref_op);
3334 
3335 	/* Set counter to validate the ordering */
3336 	for (j = 0; j < num_ops; ++j)
3337 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3338 
3339 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3340 
3341 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3342 			for (j = 0; j < num_ops; ++j)
3343 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3344 
3345 		start_time = rte_rdtsc_precise();
3346 
3347 		for (enq = 0, deq = 0; enq < num_ops;) {
3348 			num_to_enq = burst_sz;
3349 
3350 			if (unlikely(num_ops - enq < num_to_enq))
3351 				num_to_enq = num_ops - enq;
3352 
3353 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3354 					queue_id, &ops_enq[enq], num_to_enq);
3355 
3356 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3357 					queue_id, &ops_deq[deq], enq - deq);
3358 		}
3359 
3360 		/* dequeue the remaining */
3361 		while (deq < enq) {
3362 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3363 					queue_id, &ops_deq[deq], enq - deq);
3364 		}
3365 
3366 		total_time += rte_rdtsc_precise() - start_time;
3367 	}
3368 
3369 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3370 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3371 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3372 	}
3373 
3374 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3375 
3376 	double tb_len_bits = calc_enc_TB_size(ref_op);
3377 
3378 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3379 			((double)total_time / (double)rte_get_tsc_hz());
3380 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3381 			/ 1000000.0) / ((double)total_time /
3382 			(double)rte_get_tsc_hz());
3383 
3384 	return TEST_SUCCESS;
3385 }
3386 
3387 static int
3388 throughput_pmd_lcore_ldpc_enc(void *arg)
3389 {
3390 	struct thread_params *tp = arg;
3391 	uint16_t enq, deq;
3392 	uint64_t total_time = 0, start_time;
3393 	const uint16_t queue_id = tp->queue_id;
3394 	const uint16_t burst_sz = tp->op_params->burst_sz;
3395 	const uint16_t num_ops = tp->op_params->num_to_process;
3396 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3397 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3398 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3399 	struct test_buffers *bufs = NULL;
3400 	int i, j, ret;
3401 	struct rte_bbdev_info info;
3402 	uint16_t num_to_enq;
3403 
3404 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3405 			"BURST_SIZE should be <= %u", MAX_BURST);
3406 
3407 	rte_bbdev_info_get(tp->dev_id, &info);
3408 
3409 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3410 			"NUM_OPS cannot exceed %u for this device",
3411 			info.drv.queue_size_lim);
3412 
3413 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3414 
3415 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3416 		rte_pause();
3417 
3418 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3419 			num_ops);
3420 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3421 			num_ops);
3422 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3423 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3424 				bufs->hard_outputs, ref_op);
3425 
3426 	/* Set counter to validate the ordering */
3427 	for (j = 0; j < num_ops; ++j)
3428 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3429 
3430 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3431 
3432 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3433 			for (j = 0; j < num_ops; ++j)
3434 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3435 
3436 		start_time = rte_rdtsc_precise();
3437 
3438 		for (enq = 0, deq = 0; enq < num_ops;) {
3439 			num_to_enq = burst_sz;
3440 
3441 			if (unlikely(num_ops - enq < num_to_enq))
3442 				num_to_enq = num_ops - enq;
3443 
3444 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3445 					queue_id, &ops_enq[enq], num_to_enq);
3446 
3447 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3448 					queue_id, &ops_deq[deq], enq - deq);
3449 		}
3450 
3451 		/* dequeue the remaining */
3452 		while (deq < enq) {
3453 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3454 					queue_id, &ops_deq[deq], enq - deq);
3455 		}
3456 
3457 		total_time += rte_rdtsc_precise() - start_time;
3458 	}
3459 
3460 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3461 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3462 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3463 	}
3464 
3465 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3466 
3467 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3468 
3469 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3470 			((double)total_time / (double)rte_get_tsc_hz());
3471 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3472 			/ 1000000.0) / ((double)total_time /
3473 			(double)rte_get_tsc_hz());
3474 
3475 	return TEST_SUCCESS;
3476 }
3477 
3478 static void
3479 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3480 {
3481 	unsigned int iter = 0;
3482 	double total_mops = 0, total_mbps = 0;
3483 
3484 	for (iter = 0; iter < used_cores; iter++) {
3485 		printf(
3486 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3487 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3488 			t_params[iter].mbps);
3489 		total_mops += t_params[iter].ops_per_sec;
3490 		total_mbps += t_params[iter].mbps;
3491 	}
3492 	printf(
3493 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3494 		used_cores, total_mops, total_mbps);
3495 }
3496 
3497 /* Aggregate the performance results over the number of cores used */
3498 static void
3499 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3500 {
3501 	unsigned int core_idx = 0;
3502 	double total_mops = 0, total_mbps = 0;
3503 	uint8_t iter_count = 0;
3504 
3505 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3506 		printf(
3507 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3508 			t_params[core_idx].lcore_id,
3509 			t_params[core_idx].ops_per_sec,
3510 			t_params[core_idx].mbps,
3511 			t_params[core_idx].iter_count);
3512 		total_mops += t_params[core_idx].ops_per_sec;
3513 		total_mbps += t_params[core_idx].mbps;
3514 		iter_count = RTE_MAX(iter_count,
3515 				t_params[core_idx].iter_count);
3516 	}
3517 	printf(
3518 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3519 		used_cores, total_mops, total_mbps, iter_count);
3520 }
3521 
3522 /* Aggregate the performance results over the number of cores used */
3523 static void
3524 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3525 {
3526 	unsigned int core_idx = 0;
3527 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3528 	double snr = get_snr();
3529 
3530 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3531 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3532 				t_params[core_idx].lcore_id,
3533 				t_params[core_idx].bler * 100,
3534 				t_params[core_idx].iter_average,
3535 				t_params[core_idx].mbps,
3536 				get_vector_filename());
3537 		total_mbps += t_params[core_idx].mbps;
3538 		total_bler += t_params[core_idx].bler;
3539 		total_iter += t_params[core_idx].iter_average;
3540 	}
3541 	total_bler /= used_cores;
3542 	total_iter /= used_cores;
3543 
3544 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3545 			snr, total_bler * 100, total_iter, get_iter_max(),
3546 			total_mbps, get_vector_filename());
3547 }
3548 
3549 /*
3550  * Test function that determines BLER wireless performance
3551  */
3552 static int
3553 bler_test(struct active_device *ad,
3554 		struct test_op_params *op_params)
3555 {
3556 	int ret;
3557 	unsigned int lcore_id, used_cores = 0;
3558 	struct thread_params *t_params;
3559 	struct rte_bbdev_info info;
3560 	lcore_function_t *bler_function;
3561 	uint16_t num_lcores;
3562 	const char *op_type_str;
3563 
3564 	rte_bbdev_info_get(ad->dev_id, &info);
3565 
3566 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3567 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3568 			test_vector.op_type);
3569 
3570 	printf("+ ------------------------------------------------------- +\n");
3571 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3572 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3573 			op_params->num_to_process, op_params->num_lcores,
3574 			op_type_str,
3575 			intr_enabled ? "Interrupt mode" : "PMD mode",
3576 			(double)rte_get_tsc_hz() / 1000000000.0);
3577 
3578 	/* Set number of lcores */
3579 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3580 			? ad->nb_queues
3581 			: op_params->num_lcores;
3582 
3583 	/* Allocate memory for thread parameters structure */
3584 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3585 			RTE_CACHE_LINE_SIZE);
3586 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3587 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3588 				RTE_CACHE_LINE_SIZE));
3589 
3590 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3591 		bler_function = bler_pmd_lcore_ldpc_dec;
3592 	else
3593 		return TEST_SKIPPED;
3594 
3595 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3596 
3597 	/* Master core is set at first entry */
3598 	t_params[0].dev_id = ad->dev_id;
3599 	t_params[0].lcore_id = rte_lcore_id();
3600 	t_params[0].op_params = op_params;
3601 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3602 	t_params[0].iter_count = 0;
3603 
3604 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3605 		if (used_cores >= num_lcores)
3606 			break;
3607 
3608 		t_params[used_cores].dev_id = ad->dev_id;
3609 		t_params[used_cores].lcore_id = lcore_id;
3610 		t_params[used_cores].op_params = op_params;
3611 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3612 		t_params[used_cores].iter_count = 0;
3613 
3614 		rte_eal_remote_launch(bler_function,
3615 				&t_params[used_cores++], lcore_id);
3616 	}
3617 
3618 	rte_atomic16_set(&op_params->sync, SYNC_START);
3619 	ret = bler_function(&t_params[0]);
3620 
3621 	/* Master core is always used */
3622 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3623 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3624 
3625 	print_dec_bler(t_params, num_lcores);
3626 
3627 	/* Return if test failed */
3628 	if (ret) {
3629 		rte_free(t_params);
3630 		return ret;
3631 	}
3632 
3633 	/* Function to print something  here*/
3634 	rte_free(t_params);
3635 	return ret;
3636 }
3637 
3638 /*
3639  * Test function that determines how long an enqueue + dequeue of a burst
3640  * takes on available lcores.
3641  */
3642 static int
3643 throughput_test(struct active_device *ad,
3644 		struct test_op_params *op_params)
3645 {
3646 	int ret;
3647 	unsigned int lcore_id, used_cores = 0;
3648 	struct thread_params *t_params, *tp;
3649 	struct rte_bbdev_info info;
3650 	lcore_function_t *throughput_function;
3651 	uint16_t num_lcores;
3652 	const char *op_type_str;
3653 
3654 	rte_bbdev_info_get(ad->dev_id, &info);
3655 
3656 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3657 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3658 			test_vector.op_type);
3659 
3660 	printf("+ ------------------------------------------------------- +\n");
3661 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3662 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3663 			op_params->num_to_process, op_params->num_lcores,
3664 			op_type_str,
3665 			intr_enabled ? "Interrupt mode" : "PMD mode",
3666 			(double)rte_get_tsc_hz() / 1000000000.0);
3667 
3668 	/* Set number of lcores */
3669 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3670 			? ad->nb_queues
3671 			: op_params->num_lcores;
3672 
3673 	/* Allocate memory for thread parameters structure */
3674 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3675 			RTE_CACHE_LINE_SIZE);
3676 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3677 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3678 				RTE_CACHE_LINE_SIZE));
3679 
3680 	if (intr_enabled) {
3681 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3682 			throughput_function = throughput_intr_lcore_dec;
3683 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3684 			throughput_function = throughput_intr_lcore_ldpc_dec;
3685 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3686 			throughput_function = throughput_intr_lcore_enc;
3687 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3688 			throughput_function = throughput_intr_lcore_ldpc_enc;
3689 		else
3690 			throughput_function = throughput_intr_lcore_enc;
3691 
3692 		/* Dequeue interrupt callback registration */
3693 		ret = rte_bbdev_callback_register(ad->dev_id,
3694 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3695 				t_params);
3696 		if (ret < 0) {
3697 			rte_free(t_params);
3698 			return ret;
3699 		}
3700 	} else {
3701 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3702 			throughput_function = throughput_pmd_lcore_dec;
3703 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3704 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3705 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3706 			throughput_function = throughput_pmd_lcore_enc;
3707 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3708 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3709 		else
3710 			throughput_function = throughput_pmd_lcore_enc;
3711 	}
3712 
3713 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3714 
3715 	/* Master core is set at first entry */
3716 	t_params[0].dev_id = ad->dev_id;
3717 	t_params[0].lcore_id = rte_lcore_id();
3718 	t_params[0].op_params = op_params;
3719 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3720 	t_params[0].iter_count = 0;
3721 
3722 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3723 		if (used_cores >= num_lcores)
3724 			break;
3725 
3726 		t_params[used_cores].dev_id = ad->dev_id;
3727 		t_params[used_cores].lcore_id = lcore_id;
3728 		t_params[used_cores].op_params = op_params;
3729 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3730 		t_params[used_cores].iter_count = 0;
3731 
3732 		rte_eal_remote_launch(throughput_function,
3733 				&t_params[used_cores++], lcore_id);
3734 	}
3735 
3736 	rte_atomic16_set(&op_params->sync, SYNC_START);
3737 	ret = throughput_function(&t_params[0]);
3738 
3739 	/* Master core is always used */
3740 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3741 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3742 
3743 	/* Return if test failed */
3744 	if (ret) {
3745 		rte_free(t_params);
3746 		return ret;
3747 	}
3748 
3749 	/* Print throughput if interrupts are disabled and test passed */
3750 	if (!intr_enabled) {
3751 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3752 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3753 			print_dec_throughput(t_params, num_lcores);
3754 		else
3755 			print_enc_throughput(t_params, num_lcores);
3756 		rte_free(t_params);
3757 		return ret;
3758 	}
3759 
3760 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3761 	 * all pending operations. Skip waiting for queues which reported an
3762 	 * error using processing_status variable.
3763 	 * Wait for master lcore operations.
3764 	 */
3765 	tp = &t_params[0];
3766 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3767 			op_params->num_to_process) &&
3768 			(rte_atomic16_read(&tp->processing_status) !=
3769 			TEST_FAILED))
3770 		rte_pause();
3771 
3772 	tp->ops_per_sec /= TEST_REPETITIONS;
3773 	tp->mbps /= TEST_REPETITIONS;
3774 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3775 
3776 	/* Wait for slave lcores operations */
3777 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3778 		tp = &t_params[used_cores];
3779 
3780 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3781 				op_params->num_to_process) &&
3782 				(rte_atomic16_read(&tp->processing_status) !=
3783 				TEST_FAILED))
3784 			rte_pause();
3785 
3786 		tp->ops_per_sec /= TEST_REPETITIONS;
3787 		tp->mbps /= TEST_REPETITIONS;
3788 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3789 	}
3790 
3791 	/* Print throughput if test passed */
3792 	if (!ret) {
3793 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3794 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3795 			print_dec_throughput(t_params, num_lcores);
3796 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3797 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3798 			print_enc_throughput(t_params, num_lcores);
3799 	}
3800 
3801 	rte_free(t_params);
3802 	return ret;
3803 }
3804 
3805 static int
3806 latency_test_dec(struct rte_mempool *mempool,
3807 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3808 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3809 		const uint16_t num_to_process, uint16_t burst_sz,
3810 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3811 {
3812 	int ret = TEST_SUCCESS;
3813 	uint16_t i, j, dequeued;
3814 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3815 	uint64_t start_time = 0, last_time = 0;
3816 
3817 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3818 		uint16_t enq = 0, deq = 0;
3819 		bool first_time = true;
3820 		last_time = 0;
3821 
3822 		if (unlikely(num_to_process - dequeued < burst_sz))
3823 			burst_sz = num_to_process - dequeued;
3824 
3825 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3826 		TEST_ASSERT_SUCCESS(ret,
3827 				"rte_bbdev_dec_op_alloc_bulk() failed");
3828 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3829 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3830 					bufs->inputs,
3831 					bufs->hard_outputs,
3832 					bufs->soft_outputs,
3833 					ref_op);
3834 
3835 		/* Set counter to validate the ordering */
3836 		for (j = 0; j < burst_sz; ++j)
3837 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3838 
3839 		start_time = rte_rdtsc_precise();
3840 
3841 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3842 				burst_sz);
3843 		TEST_ASSERT(enq == burst_sz,
3844 				"Error enqueueing burst, expected %u, got %u",
3845 				burst_sz, enq);
3846 
3847 		/* Dequeue */
3848 		do {
3849 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3850 					&ops_deq[deq], burst_sz - deq);
3851 			if (likely(first_time && (deq > 0))) {
3852 				last_time = rte_rdtsc_precise() - start_time;
3853 				first_time = false;
3854 			}
3855 		} while (unlikely(burst_sz != deq));
3856 
3857 		*max_time = RTE_MAX(*max_time, last_time);
3858 		*min_time = RTE_MIN(*min_time, last_time);
3859 		*total_time += last_time;
3860 
3861 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3862 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3863 					vector_mask);
3864 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3865 		}
3866 
3867 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3868 		dequeued += deq;
3869 	}
3870 
3871 	return i;
3872 }
3873 
3874 static int
3875 latency_test_ldpc_dec(struct rte_mempool *mempool,
3876 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3877 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3878 		const uint16_t num_to_process, uint16_t burst_sz,
3879 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3880 {
3881 	int ret = TEST_SUCCESS;
3882 	uint16_t i, j, dequeued;
3883 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3884 	uint64_t start_time = 0, last_time = 0;
3885 	bool extDdr = ldpc_cap_flags &
3886 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3887 
3888 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3889 		uint16_t enq = 0, deq = 0;
3890 		bool first_time = true;
3891 		last_time = 0;
3892 
3893 		if (unlikely(num_to_process - dequeued < burst_sz))
3894 			burst_sz = num_to_process - dequeued;
3895 
3896 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3897 		TEST_ASSERT_SUCCESS(ret,
3898 				"rte_bbdev_dec_op_alloc_bulk() failed");
3899 
3900 		/* For latency tests we need to disable early termination */
3901 		if (check_bit(ref_op->ldpc_dec.op_flags,
3902 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3903 			ref_op->ldpc_dec.op_flags -=
3904 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3905 		ref_op->ldpc_dec.iter_max = get_iter_max();
3906 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3907 
3908 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3909 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3910 					bufs->inputs,
3911 					bufs->hard_outputs,
3912 					bufs->soft_outputs,
3913 					bufs->harq_inputs,
3914 					bufs->harq_outputs,
3915 					ref_op);
3916 
3917 		if (extDdr)
3918 			preload_harq_ddr(dev_id, queue_id, ops_enq,
3919 					burst_sz, true);
3920 
3921 		/* Set counter to validate the ordering */
3922 		for (j = 0; j < burst_sz; ++j)
3923 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3924 
3925 		start_time = rte_rdtsc_precise();
3926 
3927 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3928 				&ops_enq[enq], burst_sz);
3929 		TEST_ASSERT(enq == burst_sz,
3930 				"Error enqueueing burst, expected %u, got %u",
3931 				burst_sz, enq);
3932 
3933 		/* Dequeue */
3934 		do {
3935 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3936 					&ops_deq[deq], burst_sz - deq);
3937 			if (likely(first_time && (deq > 0))) {
3938 				last_time = rte_rdtsc_precise() - start_time;
3939 				first_time = false;
3940 			}
3941 		} while (unlikely(burst_sz != deq));
3942 
3943 		*max_time = RTE_MAX(*max_time, last_time);
3944 		*min_time = RTE_MIN(*min_time, last_time);
3945 		*total_time += last_time;
3946 
3947 		if (extDdr)
3948 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3949 
3950 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3951 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
3952 					vector_mask);
3953 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3954 		}
3955 
3956 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3957 		dequeued += deq;
3958 	}
3959 	return i;
3960 }
3961 
3962 static int
3963 latency_test_enc(struct rte_mempool *mempool,
3964 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3965 		uint16_t dev_id, uint16_t queue_id,
3966 		const uint16_t num_to_process, uint16_t burst_sz,
3967 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3968 {
3969 	int ret = TEST_SUCCESS;
3970 	uint16_t i, j, dequeued;
3971 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3972 	uint64_t start_time = 0, last_time = 0;
3973 
3974 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3975 		uint16_t enq = 0, deq = 0;
3976 		bool first_time = true;
3977 		last_time = 0;
3978 
3979 		if (unlikely(num_to_process - dequeued < burst_sz))
3980 			burst_sz = num_to_process - dequeued;
3981 
3982 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3983 		TEST_ASSERT_SUCCESS(ret,
3984 				"rte_bbdev_enc_op_alloc_bulk() failed");
3985 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3986 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3987 					bufs->inputs,
3988 					bufs->hard_outputs,
3989 					ref_op);
3990 
3991 		/* Set counter to validate the ordering */
3992 		for (j = 0; j < burst_sz; ++j)
3993 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3994 
3995 		start_time = rte_rdtsc_precise();
3996 
3997 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
3998 				burst_sz);
3999 		TEST_ASSERT(enq == burst_sz,
4000 				"Error enqueueing burst, expected %u, got %u",
4001 				burst_sz, enq);
4002 
4003 		/* Dequeue */
4004 		do {
4005 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4006 					&ops_deq[deq], burst_sz - deq);
4007 			if (likely(first_time && (deq > 0))) {
4008 				last_time += rte_rdtsc_precise() - start_time;
4009 				first_time = false;
4010 			}
4011 		} while (unlikely(burst_sz != deq));
4012 
4013 		*max_time = RTE_MAX(*max_time, last_time);
4014 		*min_time = RTE_MIN(*min_time, last_time);
4015 		*total_time += last_time;
4016 
4017 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4018 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4019 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4020 		}
4021 
4022 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4023 		dequeued += deq;
4024 	}
4025 
4026 	return i;
4027 }
4028 
4029 static int
4030 latency_test_ldpc_enc(struct rte_mempool *mempool,
4031 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4032 		uint16_t dev_id, uint16_t queue_id,
4033 		const uint16_t num_to_process, uint16_t burst_sz,
4034 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4035 {
4036 	int ret = TEST_SUCCESS;
4037 	uint16_t i, j, dequeued;
4038 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4039 	uint64_t start_time = 0, last_time = 0;
4040 
4041 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4042 		uint16_t enq = 0, deq = 0;
4043 		bool first_time = true;
4044 		last_time = 0;
4045 
4046 		if (unlikely(num_to_process - dequeued < burst_sz))
4047 			burst_sz = num_to_process - dequeued;
4048 
4049 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4050 		TEST_ASSERT_SUCCESS(ret,
4051 				"rte_bbdev_enc_op_alloc_bulk() failed");
4052 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4053 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4054 					bufs->inputs,
4055 					bufs->hard_outputs,
4056 					ref_op);
4057 
4058 		/* Set counter to validate the ordering */
4059 		for (j = 0; j < burst_sz; ++j)
4060 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4061 
4062 		start_time = rte_rdtsc_precise();
4063 
4064 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4065 				&ops_enq[enq], burst_sz);
4066 		TEST_ASSERT(enq == burst_sz,
4067 				"Error enqueueing burst, expected %u, got %u",
4068 				burst_sz, enq);
4069 
4070 		/* Dequeue */
4071 		do {
4072 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4073 					&ops_deq[deq], burst_sz - deq);
4074 			if (likely(first_time && (deq > 0))) {
4075 				last_time += rte_rdtsc_precise() - start_time;
4076 				first_time = false;
4077 			}
4078 		} while (unlikely(burst_sz != deq));
4079 
4080 		*max_time = RTE_MAX(*max_time, last_time);
4081 		*min_time = RTE_MIN(*min_time, last_time);
4082 		*total_time += last_time;
4083 
4084 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4085 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4086 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4087 		}
4088 
4089 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4090 		dequeued += deq;
4091 	}
4092 
4093 	return i;
4094 }
4095 
4096 static int
4097 latency_test(struct active_device *ad,
4098 		struct test_op_params *op_params)
4099 {
4100 	int iter;
4101 	uint16_t burst_sz = op_params->burst_sz;
4102 	const uint16_t num_to_process = op_params->num_to_process;
4103 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4104 	const uint16_t queue_id = ad->queue_ids[0];
4105 	struct test_buffers *bufs = NULL;
4106 	struct rte_bbdev_info info;
4107 	uint64_t total_time, min_time, max_time;
4108 	const char *op_type_str;
4109 
4110 	total_time = max_time = 0;
4111 	min_time = UINT64_MAX;
4112 
4113 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4114 			"BURST_SIZE should be <= %u", MAX_BURST);
4115 
4116 	rte_bbdev_info_get(ad->dev_id, &info);
4117 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4118 
4119 	op_type_str = rte_bbdev_op_type_str(op_type);
4120 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4121 
4122 	printf("+ ------------------------------------------------------- +\n");
4123 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4124 			info.dev_name, burst_sz, num_to_process, op_type_str);
4125 
4126 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4127 		iter = latency_test_dec(op_params->mp, bufs,
4128 				op_params->ref_dec_op, op_params->vector_mask,
4129 				ad->dev_id, queue_id, num_to_process,
4130 				burst_sz, &total_time, &min_time, &max_time);
4131 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4132 		iter = latency_test_enc(op_params->mp, bufs,
4133 				op_params->ref_enc_op, ad->dev_id, queue_id,
4134 				num_to_process, burst_sz, &total_time,
4135 				&min_time, &max_time);
4136 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4137 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4138 				op_params->ref_enc_op, ad->dev_id, queue_id,
4139 				num_to_process, burst_sz, &total_time,
4140 				&min_time, &max_time);
4141 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4142 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4143 				op_params->ref_dec_op, op_params->vector_mask,
4144 				ad->dev_id, queue_id, num_to_process,
4145 				burst_sz, &total_time, &min_time, &max_time);
4146 	else
4147 		iter = latency_test_enc(op_params->mp, bufs,
4148 					op_params->ref_enc_op,
4149 					ad->dev_id, queue_id,
4150 					num_to_process, burst_sz, &total_time,
4151 					&min_time, &max_time);
4152 
4153 	if (iter <= 0)
4154 		return TEST_FAILED;
4155 
4156 	printf("Operation latency:\n"
4157 			"\tavg: %lg cycles, %lg us\n"
4158 			"\tmin: %lg cycles, %lg us\n"
4159 			"\tmax: %lg cycles, %lg us\n",
4160 			(double)total_time / (double)iter,
4161 			(double)(total_time * 1000000) / (double)iter /
4162 			(double)rte_get_tsc_hz(), (double)min_time,
4163 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4164 			(double)max_time, (double)(max_time * 1000000) /
4165 			(double)rte_get_tsc_hz());
4166 
4167 	return TEST_SUCCESS;
4168 }
4169 
4170 #ifdef RTE_BBDEV_OFFLOAD_COST
4171 static int
4172 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4173 		struct rte_bbdev_stats *stats)
4174 {
4175 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4176 	struct rte_bbdev_stats *q_stats;
4177 
4178 	if (queue_id >= dev->data->num_queues)
4179 		return -1;
4180 
4181 	q_stats = &dev->data->queues[queue_id].queue_stats;
4182 
4183 	stats->enqueued_count = q_stats->enqueued_count;
4184 	stats->dequeued_count = q_stats->dequeued_count;
4185 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4186 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4187 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4188 
4189 	return 0;
4190 }
4191 
4192 static int
4193 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4194 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4195 		uint16_t queue_id, const uint16_t num_to_process,
4196 		uint16_t burst_sz, struct test_time_stats *time_st)
4197 {
4198 	int i, dequeued, ret;
4199 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4200 	uint64_t enq_start_time, deq_start_time;
4201 	uint64_t enq_sw_last_time, deq_last_time;
4202 	struct rte_bbdev_stats stats;
4203 
4204 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4205 		uint16_t enq = 0, deq = 0;
4206 
4207 		if (unlikely(num_to_process - dequeued < burst_sz))
4208 			burst_sz = num_to_process - dequeued;
4209 
4210 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4211 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4212 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4213 					bufs->inputs,
4214 					bufs->hard_outputs,
4215 					bufs->soft_outputs,
4216 					ref_op);
4217 
4218 		/* Start time meas for enqueue function offload latency */
4219 		enq_start_time = rte_rdtsc_precise();
4220 		do {
4221 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4222 					&ops_enq[enq], burst_sz - enq);
4223 		} while (unlikely(burst_sz != enq));
4224 
4225 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4226 		TEST_ASSERT_SUCCESS(ret,
4227 				"Failed to get stats for queue (%u) of device (%u)",
4228 				queue_id, dev_id);
4229 
4230 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4231 				stats.acc_offload_cycles;
4232 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4233 				enq_sw_last_time);
4234 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4235 				enq_sw_last_time);
4236 		time_st->enq_sw_total_time += enq_sw_last_time;
4237 
4238 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4239 				stats.acc_offload_cycles);
4240 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4241 				stats.acc_offload_cycles);
4242 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4243 
4244 		/* give time for device to process ops */
4245 		rte_delay_us(200);
4246 
4247 		/* Start time meas for dequeue function offload latency */
4248 		deq_start_time = rte_rdtsc_precise();
4249 		/* Dequeue one operation */
4250 		do {
4251 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4252 					&ops_deq[deq], 1);
4253 		} while (unlikely(deq != 1));
4254 
4255 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4256 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4257 				deq_last_time);
4258 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4259 				deq_last_time);
4260 		time_st->deq_total_time += deq_last_time;
4261 
4262 		/* Dequeue remaining operations if needed*/
4263 		while (burst_sz != deq)
4264 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4265 					&ops_deq[deq], burst_sz - deq);
4266 
4267 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4268 		dequeued += deq;
4269 	}
4270 
4271 	return i;
4272 }
4273 
4274 static int
4275 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4276 		struct test_buffers *bufs,
4277 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4278 		uint16_t queue_id, const uint16_t num_to_process,
4279 		uint16_t burst_sz, struct test_time_stats *time_st)
4280 {
4281 	int i, dequeued, ret;
4282 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4283 	uint64_t enq_start_time, deq_start_time;
4284 	uint64_t enq_sw_last_time, deq_last_time;
4285 	struct rte_bbdev_stats stats;
4286 	bool extDdr = ldpc_cap_flags &
4287 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4288 
4289 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4290 		uint16_t enq = 0, deq = 0;
4291 
4292 		if (unlikely(num_to_process - dequeued < burst_sz))
4293 			burst_sz = num_to_process - dequeued;
4294 
4295 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4296 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4297 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4298 					bufs->inputs,
4299 					bufs->hard_outputs,
4300 					bufs->soft_outputs,
4301 					bufs->harq_inputs,
4302 					bufs->harq_outputs,
4303 					ref_op);
4304 
4305 		if (extDdr)
4306 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4307 					burst_sz, true);
4308 
4309 		/* Start time meas for enqueue function offload latency */
4310 		enq_start_time = rte_rdtsc_precise();
4311 		do {
4312 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4313 					&ops_enq[enq], burst_sz - enq);
4314 		} while (unlikely(burst_sz != enq));
4315 
4316 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4317 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4318 		TEST_ASSERT_SUCCESS(ret,
4319 				"Failed to get stats for queue (%u) of device (%u)",
4320 				queue_id, dev_id);
4321 
4322 		enq_sw_last_time -= stats.acc_offload_cycles;
4323 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4324 				enq_sw_last_time);
4325 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4326 				enq_sw_last_time);
4327 		time_st->enq_sw_total_time += enq_sw_last_time;
4328 
4329 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4330 				stats.acc_offload_cycles);
4331 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4332 				stats.acc_offload_cycles);
4333 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4334 
4335 		/* give time for device to process ops */
4336 		rte_delay_us(200);
4337 
4338 		/* Start time meas for dequeue function offload latency */
4339 		deq_start_time = rte_rdtsc_precise();
4340 		/* Dequeue one operation */
4341 		do {
4342 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4343 					&ops_deq[deq], 1);
4344 		} while (unlikely(deq != 1));
4345 
4346 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4347 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4348 				deq_last_time);
4349 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4350 				deq_last_time);
4351 		time_st->deq_total_time += deq_last_time;
4352 
4353 		/* Dequeue remaining operations if needed*/
4354 		while (burst_sz != deq)
4355 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4356 					&ops_deq[deq], burst_sz - deq);
4357 
4358 		if (extDdr) {
4359 			/* Read loopback is not thread safe */
4360 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4361 		}
4362 
4363 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4364 		dequeued += deq;
4365 	}
4366 
4367 	return i;
4368 }
4369 
4370 static int
4371 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4372 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4373 		uint16_t queue_id, const uint16_t num_to_process,
4374 		uint16_t burst_sz, struct test_time_stats *time_st)
4375 {
4376 	int i, dequeued, ret;
4377 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4378 	uint64_t enq_start_time, deq_start_time;
4379 	uint64_t enq_sw_last_time, deq_last_time;
4380 	struct rte_bbdev_stats stats;
4381 
4382 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4383 		uint16_t enq = 0, deq = 0;
4384 
4385 		if (unlikely(num_to_process - dequeued < burst_sz))
4386 			burst_sz = num_to_process - dequeued;
4387 
4388 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4389 		TEST_ASSERT_SUCCESS(ret,
4390 				"rte_bbdev_enc_op_alloc_bulk() failed");
4391 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4392 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4393 					bufs->inputs,
4394 					bufs->hard_outputs,
4395 					ref_op);
4396 
4397 		/* Start time meas for enqueue function offload latency */
4398 		enq_start_time = rte_rdtsc_precise();
4399 		do {
4400 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4401 					&ops_enq[enq], burst_sz - enq);
4402 		} while (unlikely(burst_sz != enq));
4403 
4404 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4405 
4406 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4407 		TEST_ASSERT_SUCCESS(ret,
4408 				"Failed to get stats for queue (%u) of device (%u)",
4409 				queue_id, dev_id);
4410 		enq_sw_last_time -= stats.acc_offload_cycles;
4411 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4412 				enq_sw_last_time);
4413 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4414 				enq_sw_last_time);
4415 		time_st->enq_sw_total_time += enq_sw_last_time;
4416 
4417 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4418 				stats.acc_offload_cycles);
4419 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4420 				stats.acc_offload_cycles);
4421 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4422 
4423 		/* give time for device to process ops */
4424 		rte_delay_us(200);
4425 
4426 		/* Start time meas for dequeue function offload latency */
4427 		deq_start_time = rte_rdtsc_precise();
4428 		/* Dequeue one operation */
4429 		do {
4430 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4431 					&ops_deq[deq], 1);
4432 		} while (unlikely(deq != 1));
4433 
4434 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4435 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4436 				deq_last_time);
4437 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4438 				deq_last_time);
4439 		time_st->deq_total_time += deq_last_time;
4440 
4441 		while (burst_sz != deq)
4442 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4443 					&ops_deq[deq], burst_sz - deq);
4444 
4445 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4446 		dequeued += deq;
4447 	}
4448 
4449 	return i;
4450 }
4451 
4452 static int
4453 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4454 		struct test_buffers *bufs,
4455 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4456 		uint16_t queue_id, const uint16_t num_to_process,
4457 		uint16_t burst_sz, struct test_time_stats *time_st)
4458 {
4459 	int i, dequeued, ret;
4460 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4461 	uint64_t enq_start_time, deq_start_time;
4462 	uint64_t enq_sw_last_time, deq_last_time;
4463 	struct rte_bbdev_stats stats;
4464 
4465 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4466 		uint16_t enq = 0, deq = 0;
4467 
4468 		if (unlikely(num_to_process - dequeued < burst_sz))
4469 			burst_sz = num_to_process - dequeued;
4470 
4471 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4472 		TEST_ASSERT_SUCCESS(ret,
4473 				"rte_bbdev_enc_op_alloc_bulk() failed");
4474 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4475 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4476 					bufs->inputs,
4477 					bufs->hard_outputs,
4478 					ref_op);
4479 
4480 		/* Start time meas for enqueue function offload latency */
4481 		enq_start_time = rte_rdtsc_precise();
4482 		do {
4483 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4484 					&ops_enq[enq], burst_sz - enq);
4485 		} while (unlikely(burst_sz != enq));
4486 
4487 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4488 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4489 		TEST_ASSERT_SUCCESS(ret,
4490 				"Failed to get stats for queue (%u) of device (%u)",
4491 				queue_id, dev_id);
4492 
4493 		enq_sw_last_time -= stats.acc_offload_cycles;
4494 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4495 				enq_sw_last_time);
4496 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4497 				enq_sw_last_time);
4498 		time_st->enq_sw_total_time += enq_sw_last_time;
4499 
4500 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4501 				stats.acc_offload_cycles);
4502 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4503 				stats.acc_offload_cycles);
4504 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4505 
4506 		/* give time for device to process ops */
4507 		rte_delay_us(200);
4508 
4509 		/* Start time meas for dequeue function offload latency */
4510 		deq_start_time = rte_rdtsc_precise();
4511 		/* Dequeue one operation */
4512 		do {
4513 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4514 					&ops_deq[deq], 1);
4515 		} while (unlikely(deq != 1));
4516 
4517 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4518 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4519 				deq_last_time);
4520 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4521 				deq_last_time);
4522 		time_st->deq_total_time += deq_last_time;
4523 
4524 		while (burst_sz != deq)
4525 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4526 					&ops_deq[deq], burst_sz - deq);
4527 
4528 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4529 		dequeued += deq;
4530 	}
4531 
4532 	return i;
4533 }
4534 #endif
4535 
4536 static int
4537 offload_cost_test(struct active_device *ad,
4538 		struct test_op_params *op_params)
4539 {
4540 #ifndef RTE_BBDEV_OFFLOAD_COST
4541 	RTE_SET_USED(ad);
4542 	RTE_SET_USED(op_params);
4543 	printf("Offload latency test is disabled.\n");
4544 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4545 	return TEST_SKIPPED;
4546 #else
4547 	int iter;
4548 	uint16_t burst_sz = op_params->burst_sz;
4549 	const uint16_t num_to_process = op_params->num_to_process;
4550 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4551 	const uint16_t queue_id = ad->queue_ids[0];
4552 	struct test_buffers *bufs = NULL;
4553 	struct rte_bbdev_info info;
4554 	const char *op_type_str;
4555 	struct test_time_stats time_st;
4556 
4557 	memset(&time_st, 0, sizeof(struct test_time_stats));
4558 	time_st.enq_sw_min_time = UINT64_MAX;
4559 	time_st.enq_acc_min_time = UINT64_MAX;
4560 	time_st.deq_min_time = UINT64_MAX;
4561 
4562 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4563 			"BURST_SIZE should be <= %u", MAX_BURST);
4564 
4565 	rte_bbdev_info_get(ad->dev_id, &info);
4566 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4567 
4568 	op_type_str = rte_bbdev_op_type_str(op_type);
4569 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4570 
4571 	printf("+ ------------------------------------------------------- +\n");
4572 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4573 			info.dev_name, burst_sz, num_to_process, op_type_str);
4574 
4575 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4576 		iter = offload_latency_test_dec(op_params->mp, bufs,
4577 				op_params->ref_dec_op, ad->dev_id, queue_id,
4578 				num_to_process, burst_sz, &time_st);
4579 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4580 		iter = offload_latency_test_enc(op_params->mp, bufs,
4581 				op_params->ref_enc_op, ad->dev_id, queue_id,
4582 				num_to_process, burst_sz, &time_st);
4583 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4584 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4585 				op_params->ref_enc_op, ad->dev_id, queue_id,
4586 				num_to_process, burst_sz, &time_st);
4587 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4588 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4589 			op_params->ref_dec_op, ad->dev_id, queue_id,
4590 			num_to_process, burst_sz, &time_st);
4591 	else
4592 		iter = offload_latency_test_enc(op_params->mp, bufs,
4593 				op_params->ref_enc_op, ad->dev_id, queue_id,
4594 				num_to_process, burst_sz, &time_st);
4595 
4596 	if (iter <= 0)
4597 		return TEST_FAILED;
4598 
4599 	printf("Enqueue driver offload cost latency:\n"
4600 			"\tavg: %lg cycles, %lg us\n"
4601 			"\tmin: %lg cycles, %lg us\n"
4602 			"\tmax: %lg cycles, %lg us\n"
4603 			"Enqueue accelerator offload cost latency:\n"
4604 			"\tavg: %lg cycles, %lg us\n"
4605 			"\tmin: %lg cycles, %lg us\n"
4606 			"\tmax: %lg cycles, %lg us\n",
4607 			(double)time_st.enq_sw_total_time / (double)iter,
4608 			(double)(time_st.enq_sw_total_time * 1000000) /
4609 			(double)iter / (double)rte_get_tsc_hz(),
4610 			(double)time_st.enq_sw_min_time,
4611 			(double)(time_st.enq_sw_min_time * 1000000) /
4612 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4613 			(double)(time_st.enq_sw_max_time * 1000000) /
4614 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4615 			(double)iter,
4616 			(double)(time_st.enq_acc_total_time * 1000000) /
4617 			(double)iter / (double)rte_get_tsc_hz(),
4618 			(double)time_st.enq_acc_min_time,
4619 			(double)(time_st.enq_acc_min_time * 1000000) /
4620 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4621 			(double)(time_st.enq_acc_max_time * 1000000) /
4622 			rte_get_tsc_hz());
4623 
4624 	printf("Dequeue offload cost latency - one op:\n"
4625 			"\tavg: %lg cycles, %lg us\n"
4626 			"\tmin: %lg cycles, %lg us\n"
4627 			"\tmax: %lg cycles, %lg us\n",
4628 			(double)time_st.deq_total_time / (double)iter,
4629 			(double)(time_st.deq_total_time * 1000000) /
4630 			(double)iter / (double)rte_get_tsc_hz(),
4631 			(double)time_st.deq_min_time,
4632 			(double)(time_st.deq_min_time * 1000000) /
4633 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4634 			(double)(time_st.deq_max_time * 1000000) /
4635 			rte_get_tsc_hz());
4636 
4637 	return TEST_SUCCESS;
4638 #endif
4639 }
4640 
4641 #ifdef RTE_BBDEV_OFFLOAD_COST
4642 static int
4643 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4644 		const uint16_t num_to_process, uint16_t burst_sz,
4645 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4646 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4647 {
4648 	int i, deq_total;
4649 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4650 	uint64_t deq_start_time, deq_last_time;
4651 
4652 	/* Test deq offload latency from an empty queue */
4653 
4654 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4655 			++i, deq_total += burst_sz) {
4656 		deq_start_time = rte_rdtsc_precise();
4657 
4658 		if (unlikely(num_to_process - deq_total < burst_sz))
4659 			burst_sz = num_to_process - deq_total;
4660 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4661 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4662 					burst_sz);
4663 		else
4664 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4665 					burst_sz);
4666 
4667 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4668 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4669 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4670 		*deq_total_time += deq_last_time;
4671 	}
4672 
4673 	return i;
4674 }
4675 
4676 static int
4677 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4678 		const uint16_t num_to_process, uint16_t burst_sz,
4679 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4680 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4681 {
4682 	int i, deq_total;
4683 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4684 	uint64_t deq_start_time, deq_last_time;
4685 
4686 	/* Test deq offload latency from an empty queue */
4687 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4688 			++i, deq_total += burst_sz) {
4689 		deq_start_time = rte_rdtsc_precise();
4690 
4691 		if (unlikely(num_to_process - deq_total < burst_sz))
4692 			burst_sz = num_to_process - deq_total;
4693 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4694 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4695 					burst_sz);
4696 		else
4697 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4698 					burst_sz);
4699 
4700 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4701 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4702 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4703 		*deq_total_time += deq_last_time;
4704 	}
4705 
4706 	return i;
4707 }
4708 
4709 #endif
4710 
4711 static int
4712 offload_latency_empty_q_test(struct active_device *ad,
4713 		struct test_op_params *op_params)
4714 {
4715 #ifndef RTE_BBDEV_OFFLOAD_COST
4716 	RTE_SET_USED(ad);
4717 	RTE_SET_USED(op_params);
4718 	printf("Offload latency empty dequeue test is disabled.\n");
4719 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4720 	return TEST_SKIPPED;
4721 #else
4722 	int iter;
4723 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4724 	uint16_t burst_sz = op_params->burst_sz;
4725 	const uint16_t num_to_process = op_params->num_to_process;
4726 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4727 	const uint16_t queue_id = ad->queue_ids[0];
4728 	struct rte_bbdev_info info;
4729 	const char *op_type_str;
4730 
4731 	deq_total_time = deq_max_time = 0;
4732 	deq_min_time = UINT64_MAX;
4733 
4734 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4735 			"BURST_SIZE should be <= %u", MAX_BURST);
4736 
4737 	rte_bbdev_info_get(ad->dev_id, &info);
4738 
4739 	op_type_str = rte_bbdev_op_type_str(op_type);
4740 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4741 
4742 	printf("+ ------------------------------------------------------- +\n");
4743 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4744 			info.dev_name, burst_sz, num_to_process, op_type_str);
4745 
4746 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4747 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4748 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4749 				num_to_process, burst_sz, &deq_total_time,
4750 				&deq_min_time, &deq_max_time, op_type);
4751 	else
4752 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4753 				num_to_process, burst_sz, &deq_total_time,
4754 				&deq_min_time, &deq_max_time, op_type);
4755 
4756 	if (iter <= 0)
4757 		return TEST_FAILED;
4758 
4759 	printf("Empty dequeue offload:\n"
4760 			"\tavg: %lg cycles, %lg us\n"
4761 			"\tmin: %lg cycles, %lg us\n"
4762 			"\tmax: %lg cycles, %lg us\n",
4763 			(double)deq_total_time / (double)iter,
4764 			(double)(deq_total_time * 1000000) / (double)iter /
4765 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4766 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4767 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4768 			rte_get_tsc_hz());
4769 
4770 	return TEST_SUCCESS;
4771 #endif
4772 }
4773 
4774 static int
4775 bler_tc(void)
4776 {
4777 	return run_test_case(bler_test);
4778 }
4779 
4780 static int
4781 throughput_tc(void)
4782 {
4783 	return run_test_case(throughput_test);
4784 }
4785 
4786 static int
4787 offload_cost_tc(void)
4788 {
4789 	return run_test_case(offload_cost_test);
4790 }
4791 
4792 static int
4793 offload_latency_empty_q_tc(void)
4794 {
4795 	return run_test_case(offload_latency_empty_q_test);
4796 }
4797 
4798 static int
4799 latency_tc(void)
4800 {
4801 	return run_test_case(latency_test);
4802 }
4803 
4804 static int
4805 interrupt_tc(void)
4806 {
4807 	return run_test_case(throughput_test);
4808 }
4809 
4810 static struct unit_test_suite bbdev_bler_testsuite = {
4811 	.suite_name = "BBdev BLER Tests",
4812 	.setup = testsuite_setup,
4813 	.teardown = testsuite_teardown,
4814 	.unit_test_cases = {
4815 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4816 		TEST_CASES_END() /**< NULL terminate unit test array */
4817 	}
4818 };
4819 
4820 static struct unit_test_suite bbdev_throughput_testsuite = {
4821 	.suite_name = "BBdev Throughput Tests",
4822 	.setup = testsuite_setup,
4823 	.teardown = testsuite_teardown,
4824 	.unit_test_cases = {
4825 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4826 		TEST_CASES_END() /**< NULL terminate unit test array */
4827 	}
4828 };
4829 
4830 static struct unit_test_suite bbdev_validation_testsuite = {
4831 	.suite_name = "BBdev Validation Tests",
4832 	.setup = testsuite_setup,
4833 	.teardown = testsuite_teardown,
4834 	.unit_test_cases = {
4835 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4836 		TEST_CASES_END() /**< NULL terminate unit test array */
4837 	}
4838 };
4839 
4840 static struct unit_test_suite bbdev_latency_testsuite = {
4841 	.suite_name = "BBdev Latency Tests",
4842 	.setup = testsuite_setup,
4843 	.teardown = testsuite_teardown,
4844 	.unit_test_cases = {
4845 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4846 		TEST_CASES_END() /**< NULL terminate unit test array */
4847 	}
4848 };
4849 
4850 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4851 	.suite_name = "BBdev Offload Cost Tests",
4852 	.setup = testsuite_setup,
4853 	.teardown = testsuite_teardown,
4854 	.unit_test_cases = {
4855 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4856 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4857 		TEST_CASES_END() /**< NULL terminate unit test array */
4858 	}
4859 };
4860 
4861 static struct unit_test_suite bbdev_interrupt_testsuite = {
4862 	.suite_name = "BBdev Interrupt Tests",
4863 	.setup = interrupt_testsuite_setup,
4864 	.teardown = testsuite_teardown,
4865 	.unit_test_cases = {
4866 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4867 		TEST_CASES_END() /**< NULL terminate unit test array */
4868 	}
4869 };
4870 
4871 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
4872 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4873 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4874 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4875 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4876 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
4877