xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 12a652a02b080f26a1e9fd0169a58d6bcbe7b03c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
22 #include <fpga_lte_fec.h>
23 #endif
24 
25 #include "main.h"
26 #include "test_bbdev_vector.h"
27 
28 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
29 
30 #define MAX_QUEUES RTE_MAX_LCORE
31 #define TEST_REPETITIONS 1000
32 
33 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
34 #define FPGA_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
35 #define FPGA_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
36 #define VF_UL_QUEUE_VALUE 4
37 #define VF_DL_QUEUE_VALUE 4
38 #define UL_BANDWIDTH 3
39 #define DL_BANDWIDTH 3
40 #define UL_LOAD_BALANCE 128
41 #define DL_LOAD_BALANCE 128
42 #define FLR_TIMEOUT 610
43 #endif
44 
45 #define OPS_CACHE_SIZE 256U
46 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
47 
48 #define SYNC_WAIT 0
49 #define SYNC_START 1
50 
51 #define INVALID_QUEUE_ID -1
52 
53 static struct test_bbdev_vector test_vector;
54 
55 /* Switch between PMD and Interrupt for throughput TC */
56 static bool intr_enabled;
57 
58 /* Represents tested active devices */
59 static struct active_device {
60 	const char *driver_name;
61 	uint8_t dev_id;
62 	uint16_t supported_ops;
63 	uint16_t queue_ids[MAX_QUEUES];
64 	uint16_t nb_queues;
65 	struct rte_mempool *ops_mempool;
66 	struct rte_mempool *in_mbuf_pool;
67 	struct rte_mempool *hard_out_mbuf_pool;
68 	struct rte_mempool *soft_out_mbuf_pool;
69 	struct rte_mempool *harq_in_mbuf_pool;
70 	struct rte_mempool *harq_out_mbuf_pool;
71 } active_devs[RTE_BBDEV_MAX_DEVS];
72 
73 static uint8_t nb_active_devs;
74 
75 /* Data buffers used by BBDEV ops */
76 struct test_buffers {
77 	struct rte_bbdev_op_data *inputs;
78 	struct rte_bbdev_op_data *hard_outputs;
79 	struct rte_bbdev_op_data *soft_outputs;
80 	struct rte_bbdev_op_data *harq_inputs;
81 	struct rte_bbdev_op_data *harq_outputs;
82 };
83 
84 /* Operation parameters specific for given test case */
85 struct test_op_params {
86 	struct rte_mempool *mp;
87 	struct rte_bbdev_dec_op *ref_dec_op;
88 	struct rte_bbdev_enc_op *ref_enc_op;
89 	uint16_t burst_sz;
90 	uint16_t num_to_process;
91 	uint16_t num_lcores;
92 	int vector_mask;
93 	rte_atomic16_t sync;
94 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
95 };
96 
97 /* Contains per lcore params */
98 struct thread_params {
99 	uint8_t dev_id;
100 	uint16_t queue_id;
101 	uint32_t lcore_id;
102 	uint64_t start_time;
103 	double ops_per_sec;
104 	double mbps;
105 	uint8_t iter_count;
106 	rte_atomic16_t nb_dequeued;
107 	rte_atomic16_t processing_status;
108 	rte_atomic16_t burst_sz;
109 	struct test_op_params *op_params;
110 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
111 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
112 };
113 
114 #ifdef RTE_BBDEV_OFFLOAD_COST
115 /* Stores time statistics */
116 struct test_time_stats {
117 	/* Stores software enqueue total working time */
118 	uint64_t enq_sw_total_time;
119 	/* Stores minimum value of software enqueue working time */
120 	uint64_t enq_sw_min_time;
121 	/* Stores maximum value of software enqueue working time */
122 	uint64_t enq_sw_max_time;
123 	/* Stores turbo enqueue total working time */
124 	uint64_t enq_acc_total_time;
125 	/* Stores minimum value of accelerator enqueue working time */
126 	uint64_t enq_acc_min_time;
127 	/* Stores maximum value of accelerator enqueue working time */
128 	uint64_t enq_acc_max_time;
129 	/* Stores dequeue total working time */
130 	uint64_t deq_total_time;
131 	/* Stores minimum value of dequeue working time */
132 	uint64_t deq_min_time;
133 	/* Stores maximum value of dequeue working time */
134 	uint64_t deq_max_time;
135 };
136 #endif
137 
138 typedef int (test_case_function)(struct active_device *ad,
139 		struct test_op_params *op_params);
140 
141 static inline void
142 mbuf_reset(struct rte_mbuf *m)
143 {
144 	m->pkt_len = 0;
145 
146 	do {
147 		m->data_len = 0;
148 		m = m->next;
149 	} while (m != NULL);
150 }
151 
152 /* Read flag value 0/1 from bitmap */
153 static inline bool
154 check_bit(uint32_t bitmap, uint32_t bitmask)
155 {
156 	return bitmap & bitmask;
157 }
158 
159 static inline void
160 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
161 {
162 	ad->supported_ops |= (1 << op_type);
163 }
164 
165 static inline bool
166 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
167 {
168 	return ad->supported_ops & (1 << op_type);
169 }
170 
171 static inline bool
172 flags_match(uint32_t flags_req, uint32_t flags_present)
173 {
174 	return (flags_req & flags_present) == flags_req;
175 }
176 
177 static void
178 clear_soft_out_cap(uint32_t *op_flags)
179 {
180 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
181 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
182 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
183 }
184 
185 static int
186 check_dev_cap(const struct rte_bbdev_info *dev_info)
187 {
188 	unsigned int i;
189 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
190 		nb_harq_inputs, nb_harq_outputs;
191 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
192 
193 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
194 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
195 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
196 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
197 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
198 
199 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
200 		if (op_cap->type != test_vector.op_type)
201 			continue;
202 
203 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
204 			const struct rte_bbdev_op_cap_turbo_dec *cap =
205 					&op_cap->cap.turbo_dec;
206 			/* Ignore lack of soft output capability, just skip
207 			 * checking if soft output is valid.
208 			 */
209 			if ((test_vector.turbo_dec.op_flags &
210 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
211 					!(cap->capability_flags &
212 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
213 				printf(
214 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
215 					dev_info->dev_name);
216 				clear_soft_out_cap(
217 					&test_vector.turbo_dec.op_flags);
218 			}
219 
220 			if (!flags_match(test_vector.turbo_dec.op_flags,
221 					cap->capability_flags))
222 				return TEST_FAILED;
223 			if (nb_inputs > cap->num_buffers_src) {
224 				printf("Too many inputs defined: %u, max: %u\n",
225 					nb_inputs, cap->num_buffers_src);
226 				return TEST_FAILED;
227 			}
228 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
229 					(test_vector.turbo_dec.op_flags &
230 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
231 				printf(
232 					"Too many soft outputs defined: %u, max: %u\n",
233 						nb_soft_outputs,
234 						cap->num_buffers_soft_out);
235 				return TEST_FAILED;
236 			}
237 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
238 				printf(
239 					"Too many hard outputs defined: %u, max: %u\n",
240 						nb_hard_outputs,
241 						cap->num_buffers_hard_out);
242 				return TEST_FAILED;
243 			}
244 			if (intr_enabled && !(cap->capability_flags &
245 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
246 				printf(
247 					"Dequeue interrupts are not supported!\n");
248 				return TEST_FAILED;
249 			}
250 
251 			return TEST_SUCCESS;
252 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
253 			const struct rte_bbdev_op_cap_turbo_enc *cap =
254 					&op_cap->cap.turbo_enc;
255 
256 			if (!flags_match(test_vector.turbo_enc.op_flags,
257 					cap->capability_flags))
258 				return TEST_FAILED;
259 			if (nb_inputs > cap->num_buffers_src) {
260 				printf("Too many inputs defined: %u, max: %u\n",
261 					nb_inputs, cap->num_buffers_src);
262 				return TEST_FAILED;
263 			}
264 			if (nb_hard_outputs > cap->num_buffers_dst) {
265 				printf(
266 					"Too many hard outputs defined: %u, max: %u\n",
267 					nb_hard_outputs, cap->num_buffers_dst);
268 				return TEST_FAILED;
269 			}
270 			if (intr_enabled && !(cap->capability_flags &
271 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
272 				printf(
273 					"Dequeue interrupts are not supported!\n");
274 				return TEST_FAILED;
275 			}
276 
277 			return TEST_SUCCESS;
278 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
279 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
280 					&op_cap->cap.ldpc_enc;
281 
282 			if (!flags_match(test_vector.ldpc_enc.op_flags,
283 					cap->capability_flags)){
284 				printf("Flag Mismatch\n");
285 				return TEST_FAILED;
286 			}
287 			if (nb_inputs > cap->num_buffers_src) {
288 				printf("Too many inputs defined: %u, max: %u\n",
289 					nb_inputs, cap->num_buffers_src);
290 				return TEST_FAILED;
291 			}
292 			if (nb_hard_outputs > cap->num_buffers_dst) {
293 				printf(
294 					"Too many hard outputs defined: %u, max: %u\n",
295 					nb_hard_outputs, cap->num_buffers_dst);
296 				return TEST_FAILED;
297 			}
298 			if (intr_enabled && !(cap->capability_flags &
299 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
300 				printf(
301 					"Dequeue interrupts are not supported!\n");
302 				return TEST_FAILED;
303 			}
304 
305 			return TEST_SUCCESS;
306 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
307 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
308 					&op_cap->cap.ldpc_dec;
309 
310 			if (!flags_match(test_vector.ldpc_dec.op_flags,
311 					cap->capability_flags)){
312 				printf("Flag Mismatch\n");
313 				return TEST_FAILED;
314 			}
315 			if (nb_inputs > cap->num_buffers_src) {
316 				printf("Too many inputs defined: %u, max: %u\n",
317 					nb_inputs, cap->num_buffers_src);
318 				return TEST_FAILED;
319 			}
320 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
321 				printf(
322 					"Too many hard outputs defined: %u, max: %u\n",
323 					nb_hard_outputs,
324 					cap->num_buffers_hard_out);
325 				return TEST_FAILED;
326 			}
327 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
328 				printf(
329 					"Too many HARQ inputs defined: %u, max: %u\n",
330 					nb_hard_outputs,
331 					cap->num_buffers_hard_out);
332 				return TEST_FAILED;
333 			}
334 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
335 				printf(
336 					"Too many HARQ outputs defined: %u, max: %u\n",
337 					nb_hard_outputs,
338 					cap->num_buffers_hard_out);
339 				return TEST_FAILED;
340 			}
341 			if (intr_enabled && !(cap->capability_flags &
342 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
343 				printf(
344 					"Dequeue interrupts are not supported!\n");
345 				return TEST_FAILED;
346 			}
347 
348 			return TEST_SUCCESS;
349 		}
350 	}
351 
352 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
353 		return TEST_SUCCESS; /* Special case for NULL device */
354 
355 	return TEST_FAILED;
356 }
357 
358 /* calculates optimal mempool size not smaller than the val */
359 static unsigned int
360 optimal_mempool_size(unsigned int val)
361 {
362 	return rte_align32pow2(val + 1) - 1;
363 }
364 
365 /* allocates mbuf mempool for inputs and outputs */
366 static struct rte_mempool *
367 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
368 		int socket_id, unsigned int mbuf_pool_size,
369 		const char *op_type_str)
370 {
371 	unsigned int i;
372 	uint32_t max_seg_sz = 0;
373 	char pool_name[RTE_MEMPOOL_NAMESIZE];
374 
375 	/* find max input segment size */
376 	for (i = 0; i < entries->nb_segments; ++i)
377 		if (entries->segments[i].length > max_seg_sz)
378 			max_seg_sz = entries->segments[i].length;
379 
380 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
381 			dev_id);
382 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
383 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
384 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
385 }
386 
387 static int
388 create_mempools(struct active_device *ad, int socket_id,
389 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
390 {
391 	struct rte_mempool *mp;
392 	unsigned int ops_pool_size, mbuf_pool_size = 0;
393 	char pool_name[RTE_MEMPOOL_NAMESIZE];
394 	const char *op_type_str;
395 	enum rte_bbdev_op_type op_type = org_op_type;
396 
397 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
398 	struct op_data_entries *hard_out =
399 			&test_vector.entries[DATA_HARD_OUTPUT];
400 	struct op_data_entries *soft_out =
401 			&test_vector.entries[DATA_SOFT_OUTPUT];
402 	struct op_data_entries *harq_in =
403 			&test_vector.entries[DATA_HARQ_INPUT];
404 	struct op_data_entries *harq_out =
405 			&test_vector.entries[DATA_HARQ_OUTPUT];
406 
407 	/* allocate ops mempool */
408 	ops_pool_size = optimal_mempool_size(RTE_MAX(
409 			/* Ops used plus 1 reference op */
410 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
411 			/* Minimal cache size plus 1 reference op */
412 			(unsigned int)(1.5 * rte_lcore_count() *
413 					OPS_CACHE_SIZE + 1)),
414 			OPS_POOL_SIZE_MIN));
415 
416 	if (org_op_type == RTE_BBDEV_OP_NONE)
417 		op_type = RTE_BBDEV_OP_TURBO_ENC;
418 
419 	op_type_str = rte_bbdev_op_type_str(op_type);
420 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
421 
422 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
423 			ad->dev_id);
424 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
425 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
426 	TEST_ASSERT_NOT_NULL(mp,
427 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
428 			ops_pool_size,
429 			ad->dev_id,
430 			socket_id);
431 	ad->ops_mempool = mp;
432 
433 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
434 	if (org_op_type == RTE_BBDEV_OP_NONE)
435 		return TEST_SUCCESS;
436 
437 	/* Inputs */
438 	mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
439 	mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
440 	TEST_ASSERT_NOT_NULL(mp,
441 			"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
442 			mbuf_pool_size,
443 			ad->dev_id,
444 			socket_id);
445 	ad->in_mbuf_pool = mp;
446 
447 	/* Hard outputs */
448 	mbuf_pool_size = optimal_mempool_size(ops_pool_size *
449 			hard_out->nb_segments);
450 	mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
451 			"hard_out");
452 	TEST_ASSERT_NOT_NULL(mp,
453 			"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
454 			mbuf_pool_size,
455 			ad->dev_id,
456 			socket_id);
457 	ad->hard_out_mbuf_pool = mp;
458 
459 
460 	/* Soft outputs */
461 	if (soft_out->nb_segments > 0) {
462 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
463 				soft_out->nb_segments);
464 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
465 				mbuf_pool_size,
466 				"soft_out");
467 		TEST_ASSERT_NOT_NULL(mp,
468 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
469 				mbuf_pool_size,
470 				ad->dev_id,
471 				socket_id);
472 		ad->soft_out_mbuf_pool = mp;
473 	}
474 
475 	/* HARQ inputs */
476 	if (harq_in->nb_segments > 0) {
477 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
478 				harq_in->nb_segments);
479 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
480 				mbuf_pool_size,
481 				"harq_in");
482 		TEST_ASSERT_NOT_NULL(mp,
483 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
484 				mbuf_pool_size,
485 				ad->dev_id,
486 				socket_id);
487 		ad->harq_in_mbuf_pool = mp;
488 	}
489 
490 	/* HARQ outputs */
491 	if (harq_out->nb_segments > 0) {
492 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
493 				harq_out->nb_segments);
494 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
495 				mbuf_pool_size,
496 				"harq_out");
497 		TEST_ASSERT_NOT_NULL(mp,
498 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
499 				mbuf_pool_size,
500 				ad->dev_id,
501 				socket_id);
502 		ad->harq_out_mbuf_pool = mp;
503 	}
504 
505 	return TEST_SUCCESS;
506 }
507 
508 static int
509 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
510 		struct test_bbdev_vector *vector)
511 {
512 	int ret;
513 	unsigned int queue_id;
514 	struct rte_bbdev_queue_conf qconf;
515 	struct active_device *ad = &active_devs[nb_active_devs];
516 	unsigned int nb_queues;
517 	enum rte_bbdev_op_type op_type = vector->op_type;
518 
519 /* Configure fpga lte fec with PF & VF values
520  * if '-i' flag is set and using fpga device
521  */
522 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
523 	if ((get_init_device() == true) &&
524 		(!strcmp(info->drv.driver_name, FPGA_PF_DRIVER_NAME))) {
525 		struct fpga_lte_fec_conf conf;
526 		unsigned int i;
527 
528 		printf("Configure FPGA FEC Driver %s with default values\n",
529 				info->drv.driver_name);
530 
531 		/* clear default configuration before initialization */
532 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
533 
534 		/* Set PF mode :
535 		 * true if PF is used for data plane
536 		 * false for VFs
537 		 */
538 		conf.pf_mode_en = true;
539 
540 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
541 			/* Number of UL queues per VF (fpga supports 8 VFs) */
542 			conf.vf_ul_queues_number[i] = VF_UL_QUEUE_VALUE;
543 			/* Number of DL queues per VF (fpga supports 8 VFs) */
544 			conf.vf_dl_queues_number[i] = VF_DL_QUEUE_VALUE;
545 		}
546 
547 		/* UL bandwidth. Needed for schedule algorithm */
548 		conf.ul_bandwidth = UL_BANDWIDTH;
549 		/* DL bandwidth */
550 		conf.dl_bandwidth = DL_BANDWIDTH;
551 
552 		/* UL & DL load Balance Factor to 64 */
553 		conf.ul_load_balance = UL_LOAD_BALANCE;
554 		conf.dl_load_balance = DL_LOAD_BALANCE;
555 
556 		/**< FLR timeout value */
557 		conf.flr_time_out = FLR_TIMEOUT;
558 
559 		/* setup FPGA PF with configuration information */
560 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
561 		TEST_ASSERT_SUCCESS(ret,
562 				"Failed to configure 4G FPGA PF for bbdev %s",
563 				info->dev_name);
564 	}
565 #endif
566 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
567 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
568 
569 	/* setup device */
570 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
571 	if (ret < 0) {
572 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
573 				dev_id, nb_queues, info->socket_id, ret);
574 		return TEST_FAILED;
575 	}
576 
577 	/* configure interrupts if needed */
578 	if (intr_enabled) {
579 		ret = rte_bbdev_intr_enable(dev_id);
580 		if (ret < 0) {
581 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
582 					ret);
583 			return TEST_FAILED;
584 		}
585 	}
586 
587 	/* setup device queues */
588 	qconf.socket = info->socket_id;
589 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
590 	qconf.priority = 0;
591 	qconf.deferred_start = 0;
592 	qconf.op_type = op_type;
593 
594 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
595 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
596 		if (ret != 0) {
597 			printf(
598 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
599 					queue_id, qconf.priority, dev_id);
600 			qconf.priority++;
601 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
602 					&qconf);
603 		}
604 		if (ret != 0) {
605 			printf("All queues on dev %u allocated: %u\n",
606 					dev_id, queue_id);
607 			break;
608 		}
609 		ad->queue_ids[queue_id] = queue_id;
610 	}
611 	TEST_ASSERT(queue_id != 0,
612 			"ERROR Failed to configure any queues on dev %u",
613 			dev_id);
614 	ad->nb_queues = queue_id;
615 
616 	set_avail_op(ad, op_type);
617 
618 	return TEST_SUCCESS;
619 }
620 
621 static int
622 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
623 		struct test_bbdev_vector *vector)
624 {
625 	int ret;
626 
627 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
628 	active_devs[nb_active_devs].dev_id = dev_id;
629 
630 	ret = add_bbdev_dev(dev_id, info, vector);
631 	if (ret == TEST_SUCCESS)
632 		++nb_active_devs;
633 	return ret;
634 }
635 
636 static uint8_t
637 populate_active_devices(void)
638 {
639 	int ret;
640 	uint8_t dev_id;
641 	uint8_t nb_devs_added = 0;
642 	struct rte_bbdev_info info;
643 
644 	RTE_BBDEV_FOREACH(dev_id) {
645 		rte_bbdev_info_get(dev_id, &info);
646 
647 		if (check_dev_cap(&info)) {
648 			printf(
649 				"Device %d (%s) does not support specified capabilities\n",
650 					dev_id, info.dev_name);
651 			continue;
652 		}
653 
654 		ret = add_active_device(dev_id, &info, &test_vector);
655 		if (ret != 0) {
656 			printf("Adding active bbdev %s skipped\n",
657 					info.dev_name);
658 			continue;
659 		}
660 		nb_devs_added++;
661 	}
662 
663 	return nb_devs_added;
664 }
665 
666 static int
667 read_test_vector(void)
668 {
669 	int ret;
670 
671 	memset(&test_vector, 0, sizeof(test_vector));
672 	printf("Test vector file = %s\n", get_vector_filename());
673 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
674 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
675 			get_vector_filename());
676 
677 	return TEST_SUCCESS;
678 }
679 
680 static int
681 testsuite_setup(void)
682 {
683 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
684 
685 	if (populate_active_devices() == 0) {
686 		printf("No suitable devices found!\n");
687 		return TEST_SKIPPED;
688 	}
689 
690 	return TEST_SUCCESS;
691 }
692 
693 static int
694 interrupt_testsuite_setup(void)
695 {
696 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
697 
698 	/* Enable interrupts */
699 	intr_enabled = true;
700 
701 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
702 	if (populate_active_devices() == 0 ||
703 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
704 		intr_enabled = false;
705 		printf("No suitable devices found!\n");
706 		return TEST_SKIPPED;
707 	}
708 
709 	return TEST_SUCCESS;
710 }
711 
712 static void
713 testsuite_teardown(void)
714 {
715 	uint8_t dev_id;
716 
717 	/* Unconfigure devices */
718 	RTE_BBDEV_FOREACH(dev_id)
719 		rte_bbdev_close(dev_id);
720 
721 	/* Clear active devices structs. */
722 	memset(active_devs, 0, sizeof(active_devs));
723 	nb_active_devs = 0;
724 }
725 
726 static int
727 ut_setup(void)
728 {
729 	uint8_t i, dev_id;
730 
731 	for (i = 0; i < nb_active_devs; i++) {
732 		dev_id = active_devs[i].dev_id;
733 		/* reset bbdev stats */
734 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
735 				"Failed to reset stats of bbdev %u", dev_id);
736 		/* start the device */
737 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
738 				"Failed to start bbdev %u", dev_id);
739 	}
740 
741 	return TEST_SUCCESS;
742 }
743 
744 static void
745 ut_teardown(void)
746 {
747 	uint8_t i, dev_id;
748 	struct rte_bbdev_stats stats;
749 
750 	for (i = 0; i < nb_active_devs; i++) {
751 		dev_id = active_devs[i].dev_id;
752 		/* read stats and print */
753 		rte_bbdev_stats_get(dev_id, &stats);
754 		/* Stop the device */
755 		rte_bbdev_stop(dev_id);
756 	}
757 }
758 
759 static int
760 init_op_data_objs(struct rte_bbdev_op_data *bufs,
761 		struct op_data_entries *ref_entries,
762 		struct rte_mempool *mbuf_pool, const uint16_t n,
763 		enum op_data_type op_type, uint16_t min_alignment)
764 {
765 	int ret;
766 	unsigned int i, j;
767 
768 	for (i = 0; i < n; ++i) {
769 		char *data;
770 		struct op_data_buf *seg = &ref_entries->segments[0];
771 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
772 		TEST_ASSERT_NOT_NULL(m_head,
773 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
774 				op_type, n * ref_entries->nb_segments,
775 				mbuf_pool->size);
776 
777 		TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
778 				(uint32_t)UINT16_MAX),
779 				"Given data is bigger than allowed mbuf segment size");
780 
781 		bufs[i].data = m_head;
782 		bufs[i].offset = 0;
783 		bufs[i].length = 0;
784 
785 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
786 			data = rte_pktmbuf_append(m_head, seg->length);
787 			TEST_ASSERT_NOT_NULL(data,
788 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
789 					seg->length, op_type);
790 
791 			TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
792 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
793 					data, min_alignment);
794 			rte_memcpy(data, seg->addr, seg->length);
795 			bufs[i].length += seg->length;
796 
797 			for (j = 1; j < ref_entries->nb_segments; ++j) {
798 				struct rte_mbuf *m_tail =
799 						rte_pktmbuf_alloc(mbuf_pool);
800 				TEST_ASSERT_NOT_NULL(m_tail,
801 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
802 						op_type,
803 						n * ref_entries->nb_segments,
804 						mbuf_pool->size);
805 				seg += 1;
806 
807 				data = rte_pktmbuf_append(m_tail, seg->length);
808 				TEST_ASSERT_NOT_NULL(data,
809 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
810 						seg->length, op_type);
811 
812 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
813 						min_alignment),
814 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
815 						data, min_alignment);
816 				rte_memcpy(data, seg->addr, seg->length);
817 				bufs[i].length += seg->length;
818 
819 				ret = rte_pktmbuf_chain(m_head, m_tail);
820 				TEST_ASSERT_SUCCESS(ret,
821 						"Couldn't chain mbufs from %d data type mbuf pool",
822 						op_type);
823 			}
824 		} else {
825 
826 			/* allocate chained-mbuf for output buffer */
827 			for (j = 1; j < ref_entries->nb_segments; ++j) {
828 				struct rte_mbuf *m_tail =
829 						rte_pktmbuf_alloc(mbuf_pool);
830 				TEST_ASSERT_NOT_NULL(m_tail,
831 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
832 						op_type,
833 						n * ref_entries->nb_segments,
834 						mbuf_pool->size);
835 
836 				ret = rte_pktmbuf_chain(m_head, m_tail);
837 				TEST_ASSERT_SUCCESS(ret,
838 						"Couldn't chain mbufs from %d data type mbuf pool",
839 						op_type);
840 			}
841 		}
842 	}
843 
844 	return 0;
845 }
846 
847 static int
848 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
849 		const int socket)
850 {
851 	int i;
852 
853 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
854 	if (*buffers == NULL) {
855 		printf("WARNING: Failed to allocate op_data on socket %d\n",
856 				socket);
857 		/* try to allocate memory on other detected sockets */
858 		for (i = 0; i < socket; i++) {
859 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
860 			if (*buffers != NULL)
861 				break;
862 		}
863 	}
864 
865 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
866 }
867 
868 static void
869 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
870 		const uint16_t n, const int8_t max_llr_modulus)
871 {
872 	uint16_t i, byte_idx;
873 
874 	for (i = 0; i < n; ++i) {
875 		struct rte_mbuf *m = input_ops[i].data;
876 		while (m != NULL) {
877 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
878 					input_ops[i].offset);
879 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
880 					++byte_idx)
881 				llr[byte_idx] = round((double)max_llr_modulus *
882 						llr[byte_idx] / INT8_MAX);
883 
884 			m = m->next;
885 		}
886 	}
887 }
888 
889 static void
890 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
891 		const uint16_t n, const int8_t llr_size,
892 		const int8_t llr_decimals)
893 {
894 	if (input_ops == NULL)
895 		return;
896 
897 	uint16_t i, byte_idx;
898 
899 	int16_t llr_max, llr_min, llr_tmp;
900 	llr_max = (1 << (llr_size - 1)) - 1;
901 	llr_min = -llr_max;
902 	for (i = 0; i < n; ++i) {
903 		struct rte_mbuf *m = input_ops[i].data;
904 		while (m != NULL) {
905 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
906 					input_ops[i].offset);
907 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
908 					++byte_idx) {
909 
910 				llr_tmp = llr[byte_idx];
911 				if (llr_decimals == 2)
912 					llr_tmp *= 2;
913 				else if (llr_decimals == 0)
914 					llr_tmp /= 2;
915 				llr_tmp = RTE_MIN(llr_max,
916 						RTE_MAX(llr_min, llr_tmp));
917 				llr[byte_idx] = (int8_t) llr_tmp;
918 			}
919 
920 			m = m->next;
921 		}
922 	}
923 }
924 
925 
926 
927 static int
928 fill_queue_buffers(struct test_op_params *op_params,
929 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
930 		struct rte_mempool *soft_out_mp,
931 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
932 		uint16_t queue_id,
933 		const struct rte_bbdev_op_cap *capabilities,
934 		uint16_t min_alignment, const int socket_id)
935 {
936 	int ret;
937 	enum op_data_type type;
938 	const uint16_t n = op_params->num_to_process;
939 
940 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
941 		in_mp,
942 		soft_out_mp,
943 		hard_out_mp,
944 		harq_in_mp,
945 		harq_out_mp,
946 	};
947 
948 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
949 		&op_params->q_bufs[socket_id][queue_id].inputs,
950 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
951 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
952 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
953 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
954 	};
955 
956 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
957 		struct op_data_entries *ref_entries =
958 				&test_vector.entries[type];
959 		if (ref_entries->nb_segments == 0)
960 			continue;
961 
962 		ret = allocate_buffers_on_socket(queue_ops[type],
963 				n * sizeof(struct rte_bbdev_op_data),
964 				socket_id);
965 		TEST_ASSERT_SUCCESS(ret,
966 				"Couldn't allocate memory for rte_bbdev_op_data structs");
967 
968 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
969 				mbuf_pools[type], n, type, min_alignment);
970 		TEST_ASSERT_SUCCESS(ret,
971 				"Couldn't init rte_bbdev_op_data structs");
972 	}
973 
974 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
975 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
976 			capabilities->cap.turbo_dec.max_llr_modulus);
977 
978 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
979 		ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
980 			capabilities->cap.ldpc_dec.llr_size,
981 			capabilities->cap.ldpc_dec.llr_decimals);
982 		ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
983 				capabilities->cap.ldpc_dec.llr_size,
984 				capabilities->cap.ldpc_dec.llr_decimals);
985 	}
986 
987 	return 0;
988 }
989 
990 static void
991 free_buffers(struct active_device *ad, struct test_op_params *op_params)
992 {
993 	unsigned int i, j;
994 
995 	rte_mempool_free(ad->ops_mempool);
996 	rte_mempool_free(ad->in_mbuf_pool);
997 	rte_mempool_free(ad->hard_out_mbuf_pool);
998 	rte_mempool_free(ad->soft_out_mbuf_pool);
999 	rte_mempool_free(ad->harq_in_mbuf_pool);
1000 	rte_mempool_free(ad->harq_out_mbuf_pool);
1001 
1002 	for (i = 0; i < rte_lcore_count(); ++i) {
1003 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1004 			rte_free(op_params->q_bufs[j][i].inputs);
1005 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1006 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1007 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1008 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1009 		}
1010 	}
1011 }
1012 
1013 static void
1014 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1015 		unsigned int start_idx,
1016 		struct rte_bbdev_op_data *inputs,
1017 		struct rte_bbdev_op_data *hard_outputs,
1018 		struct rte_bbdev_op_data *soft_outputs,
1019 		struct rte_bbdev_dec_op *ref_op)
1020 {
1021 	unsigned int i;
1022 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1023 
1024 	for (i = 0; i < n; ++i) {
1025 		if (turbo_dec->code_block_mode == 0) {
1026 			ops[i]->turbo_dec.tb_params.ea =
1027 					turbo_dec->tb_params.ea;
1028 			ops[i]->turbo_dec.tb_params.eb =
1029 					turbo_dec->tb_params.eb;
1030 			ops[i]->turbo_dec.tb_params.k_pos =
1031 					turbo_dec->tb_params.k_pos;
1032 			ops[i]->turbo_dec.tb_params.k_neg =
1033 					turbo_dec->tb_params.k_neg;
1034 			ops[i]->turbo_dec.tb_params.c =
1035 					turbo_dec->tb_params.c;
1036 			ops[i]->turbo_dec.tb_params.c_neg =
1037 					turbo_dec->tb_params.c_neg;
1038 			ops[i]->turbo_dec.tb_params.cab =
1039 					turbo_dec->tb_params.cab;
1040 			ops[i]->turbo_dec.tb_params.r =
1041 					turbo_dec->tb_params.r;
1042 		} else {
1043 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1044 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1045 		}
1046 
1047 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1048 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1049 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1050 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1051 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1052 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1053 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1054 
1055 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1056 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1057 		if (soft_outputs != NULL)
1058 			ops[i]->turbo_dec.soft_output =
1059 				soft_outputs[start_idx + i];
1060 	}
1061 }
1062 
1063 static void
1064 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1065 		unsigned int start_idx,
1066 		struct rte_bbdev_op_data *inputs,
1067 		struct rte_bbdev_op_data *outputs,
1068 		struct rte_bbdev_enc_op *ref_op)
1069 {
1070 	unsigned int i;
1071 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1072 	for (i = 0; i < n; ++i) {
1073 		if (turbo_enc->code_block_mode == 0) {
1074 			ops[i]->turbo_enc.tb_params.ea =
1075 					turbo_enc->tb_params.ea;
1076 			ops[i]->turbo_enc.tb_params.eb =
1077 					turbo_enc->tb_params.eb;
1078 			ops[i]->turbo_enc.tb_params.k_pos =
1079 					turbo_enc->tb_params.k_pos;
1080 			ops[i]->turbo_enc.tb_params.k_neg =
1081 					turbo_enc->tb_params.k_neg;
1082 			ops[i]->turbo_enc.tb_params.c =
1083 					turbo_enc->tb_params.c;
1084 			ops[i]->turbo_enc.tb_params.c_neg =
1085 					turbo_enc->tb_params.c_neg;
1086 			ops[i]->turbo_enc.tb_params.cab =
1087 					turbo_enc->tb_params.cab;
1088 			ops[i]->turbo_enc.tb_params.ncb_pos =
1089 					turbo_enc->tb_params.ncb_pos;
1090 			ops[i]->turbo_enc.tb_params.ncb_neg =
1091 					turbo_enc->tb_params.ncb_neg;
1092 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1093 		} else {
1094 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1095 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1096 			ops[i]->turbo_enc.cb_params.ncb =
1097 					turbo_enc->cb_params.ncb;
1098 		}
1099 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1100 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1101 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1102 
1103 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1104 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1105 	}
1106 }
1107 
1108 static void
1109 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1110 		unsigned int start_idx,
1111 		struct rte_bbdev_op_data *inputs,
1112 		struct rte_bbdev_op_data *hard_outputs,
1113 		struct rte_bbdev_op_data *soft_outputs,
1114 		struct rte_bbdev_op_data *harq_inputs,
1115 		struct rte_bbdev_op_data *harq_outputs,
1116 		struct rte_bbdev_dec_op *ref_op)
1117 {
1118 	unsigned int i;
1119 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1120 
1121 	for (i = 0; i < n; ++i) {
1122 		if (ldpc_dec->code_block_mode == 0) {
1123 			ops[i]->ldpc_dec.tb_params.ea =
1124 					ldpc_dec->tb_params.ea;
1125 			ops[i]->ldpc_dec.tb_params.eb =
1126 					ldpc_dec->tb_params.eb;
1127 			ops[i]->ldpc_dec.tb_params.c =
1128 					ldpc_dec->tb_params.c;
1129 			ops[i]->ldpc_dec.tb_params.cab =
1130 					ldpc_dec->tb_params.cab;
1131 			ops[i]->ldpc_dec.tb_params.r =
1132 					ldpc_dec->tb_params.r;
1133 		} else {
1134 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1135 		}
1136 
1137 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1138 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1139 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1140 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1141 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1142 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1143 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1144 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1145 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1146 
1147 		ops[i]->ldpc_dec.hard_output = hard_outputs[start_idx + i];
1148 		ops[i]->ldpc_dec.input = inputs[start_idx + i];
1149 		if (soft_outputs != NULL)
1150 			ops[i]->ldpc_dec.soft_output =
1151 				soft_outputs[start_idx + i];
1152 		if (harq_inputs != NULL)
1153 			ops[i]->ldpc_dec.harq_combined_input =
1154 					harq_inputs[start_idx + i];
1155 		if (harq_outputs != NULL)
1156 			ops[i]->ldpc_dec.harq_combined_output =
1157 				harq_outputs[start_idx + i];
1158 	}
1159 }
1160 
1161 
1162 static void
1163 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1164 		unsigned int start_idx,
1165 		struct rte_bbdev_op_data *inputs,
1166 		struct rte_bbdev_op_data *outputs,
1167 		struct rte_bbdev_enc_op *ref_op)
1168 {
1169 	unsigned int i;
1170 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1171 	for (i = 0; i < n; ++i) {
1172 		if (ldpc_enc->code_block_mode == 0) {
1173 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1174 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1175 			ops[i]->ldpc_enc.tb_params.cab =
1176 					ldpc_enc->tb_params.cab;
1177 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1178 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1179 		} else {
1180 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1181 		}
1182 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1183 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1184 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1185 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1186 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1187 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1188 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1189 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1190 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1191 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1192 	}
1193 }
1194 
1195 static int
1196 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1197 		unsigned int order_idx, const int expected_status)
1198 {
1199 	TEST_ASSERT(op->status == expected_status,
1200 			"op_status (%d) != expected_status (%d)",
1201 			op->status, expected_status);
1202 
1203 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1204 			"Ordering error, expected %p, got %p",
1205 			(void *)(uintptr_t)order_idx, op->opaque_data);
1206 
1207 	return TEST_SUCCESS;
1208 }
1209 
1210 static int
1211 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1212 		unsigned int order_idx, const int expected_status)
1213 {
1214 	TEST_ASSERT(op->status == expected_status,
1215 			"op_status (%d) != expected_status (%d)",
1216 			op->status, expected_status);
1217 
1218 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1219 			"Ordering error, expected %p, got %p",
1220 			(void *)(uintptr_t)order_idx, op->opaque_data);
1221 
1222 	return TEST_SUCCESS;
1223 }
1224 
1225 static inline int
1226 validate_op_chain(struct rte_bbdev_op_data *op,
1227 		struct op_data_entries *orig_op)
1228 {
1229 	uint8_t i;
1230 	struct rte_mbuf *m = op->data;
1231 	uint8_t nb_dst_segments = orig_op->nb_segments;
1232 	uint32_t total_data_size = 0;
1233 
1234 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1235 			"Number of segments differ in original (%u) and filled (%u) op",
1236 			nb_dst_segments, m->nb_segs);
1237 
1238 	/* Validate each mbuf segment length */
1239 	for (i = 0; i < nb_dst_segments; ++i) {
1240 		/* Apply offset to the first mbuf segment */
1241 		uint16_t offset = (i == 0) ? op->offset : 0;
1242 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1243 		total_data_size += orig_op->segments[i].length;
1244 
1245 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1246 				"Length of segment differ in original (%u) and filled (%u) op",
1247 				orig_op->segments[i].length, data_len);
1248 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1249 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1250 				data_len,
1251 				"Output buffers (CB=%u) are not equal", i);
1252 		m = m->next;
1253 	}
1254 
1255 	/* Validate total mbuf pkt length */
1256 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1257 	TEST_ASSERT(total_data_size == pkt_len,
1258 			"Length of data differ in original (%u) and filled (%u) op",
1259 			total_data_size, pkt_len);
1260 
1261 	return TEST_SUCCESS;
1262 }
1263 
1264 static int
1265 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1266 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1267 {
1268 	unsigned int i;
1269 	int ret;
1270 	struct op_data_entries *hard_data_orig =
1271 			&test_vector.entries[DATA_HARD_OUTPUT];
1272 	struct op_data_entries *soft_data_orig =
1273 			&test_vector.entries[DATA_SOFT_OUTPUT];
1274 	struct rte_bbdev_op_turbo_dec *ops_td;
1275 	struct rte_bbdev_op_data *hard_output;
1276 	struct rte_bbdev_op_data *soft_output;
1277 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1278 
1279 	for (i = 0; i < n; ++i) {
1280 		ops_td = &ops[i]->turbo_dec;
1281 		hard_output = &ops_td->hard_output;
1282 		soft_output = &ops_td->soft_output;
1283 
1284 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1285 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1286 					"Returned iter_count (%d) > expected iter_count (%d)",
1287 					ops_td->iter_count, ref_td->iter_count);
1288 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1289 		TEST_ASSERT_SUCCESS(ret,
1290 				"Checking status and ordering for decoder failed");
1291 
1292 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1293 				hard_data_orig),
1294 				"Hard output buffers (CB=%u) are not equal",
1295 				i);
1296 
1297 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1298 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1299 					soft_data_orig),
1300 					"Soft output buffers (CB=%u) are not equal",
1301 					i);
1302 	}
1303 
1304 	return TEST_SUCCESS;
1305 }
1306 
1307 
1308 static int
1309 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1310 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1311 {
1312 	unsigned int i;
1313 	int ret;
1314 	struct op_data_entries *hard_data_orig =
1315 			&test_vector.entries[DATA_HARD_OUTPUT];
1316 	struct op_data_entries *soft_data_orig =
1317 			&test_vector.entries[DATA_SOFT_OUTPUT];
1318 	struct op_data_entries *harq_data_orig =
1319 				&test_vector.entries[DATA_HARQ_OUTPUT];
1320 	struct rte_bbdev_op_ldpc_dec *ops_td;
1321 	struct rte_bbdev_op_data *hard_output;
1322 	struct rte_bbdev_op_data *harq_output;
1323 	struct rte_bbdev_op_data *soft_output;
1324 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1325 
1326 	for (i = 0; i < n; ++i) {
1327 		ops_td = &ops[i]->ldpc_dec;
1328 		hard_output = &ops_td->hard_output;
1329 		harq_output = &ops_td->harq_combined_output;
1330 		soft_output = &ops_td->soft_output;
1331 
1332 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1333 		TEST_ASSERT_SUCCESS(ret,
1334 				"Checking status and ordering for decoder failed");
1335 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1336 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1337 					"Returned iter_count (%d) > expected iter_count (%d)",
1338 					ops_td->iter_count, ref_td->iter_count);
1339 		/* We can ignore data when the decoding failed to converge */
1340 		if ((ops[i]->status &  (1 << RTE_BBDEV_SYNDROME_ERROR)) == 0)
1341 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1342 					hard_data_orig),
1343 					"Hard output buffers (CB=%u) are not equal",
1344 					i);
1345 
1346 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1347 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1348 					soft_data_orig),
1349 					"Soft output buffers (CB=%u) are not equal",
1350 					i);
1351 		if (ref_op->ldpc_dec.op_flags &
1352 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1353 			ldpc_input_llr_scaling(harq_output, 1, 8, 0);
1354 			TEST_ASSERT_SUCCESS(validate_op_chain(harq_output,
1355 					harq_data_orig),
1356 					"HARQ output buffers (CB=%u) are not equal",
1357 					i);
1358 		}
1359 	}
1360 
1361 	return TEST_SUCCESS;
1362 }
1363 
1364 
1365 static int
1366 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1367 		struct rte_bbdev_enc_op *ref_op)
1368 {
1369 	unsigned int i;
1370 	int ret;
1371 	struct op_data_entries *hard_data_orig =
1372 			&test_vector.entries[DATA_HARD_OUTPUT];
1373 
1374 	for (i = 0; i < n; ++i) {
1375 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1376 		TEST_ASSERT_SUCCESS(ret,
1377 				"Checking status and ordering for encoder failed");
1378 		TEST_ASSERT_SUCCESS(validate_op_chain(
1379 				&ops[i]->turbo_enc.output,
1380 				hard_data_orig),
1381 				"Output buffers (CB=%u) are not equal",
1382 				i);
1383 	}
1384 
1385 	return TEST_SUCCESS;
1386 }
1387 
1388 static int
1389 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1390 		struct rte_bbdev_enc_op *ref_op)
1391 {
1392 	unsigned int i;
1393 	int ret;
1394 	struct op_data_entries *hard_data_orig =
1395 			&test_vector.entries[DATA_HARD_OUTPUT];
1396 
1397 	for (i = 0; i < n; ++i) {
1398 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1399 		TEST_ASSERT_SUCCESS(ret,
1400 				"Checking status and ordering for encoder failed");
1401 		TEST_ASSERT_SUCCESS(validate_op_chain(
1402 				&ops[i]->ldpc_enc.output,
1403 				hard_data_orig),
1404 				"Output buffers (CB=%u) are not equal",
1405 				i);
1406 	}
1407 
1408 	return TEST_SUCCESS;
1409 }
1410 
1411 static void
1412 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1413 {
1414 	unsigned int i;
1415 	struct op_data_entries *entry;
1416 
1417 	op->turbo_dec = test_vector.turbo_dec;
1418 	entry = &test_vector.entries[DATA_INPUT];
1419 	for (i = 0; i < entry->nb_segments; ++i)
1420 		op->turbo_dec.input.length +=
1421 				entry->segments[i].length;
1422 }
1423 
1424 static void
1425 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1426 {
1427 	unsigned int i;
1428 	struct op_data_entries *entry;
1429 
1430 	op->ldpc_dec = test_vector.ldpc_dec;
1431 	entry = &test_vector.entries[DATA_INPUT];
1432 	for (i = 0; i < entry->nb_segments; ++i)
1433 		op->ldpc_dec.input.length +=
1434 				entry->segments[i].length;
1435 	if (test_vector.ldpc_dec.op_flags &
1436 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1437 		entry = &test_vector.entries[DATA_HARQ_INPUT];
1438 		for (i = 0; i < entry->nb_segments; ++i)
1439 			op->ldpc_dec.harq_combined_input.length +=
1440 				entry->segments[i].length;
1441 	}
1442 }
1443 
1444 
1445 static void
1446 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1447 {
1448 	unsigned int i;
1449 	struct op_data_entries *entry;
1450 
1451 	op->turbo_enc = test_vector.turbo_enc;
1452 	entry = &test_vector.entries[DATA_INPUT];
1453 	for (i = 0; i < entry->nb_segments; ++i)
1454 		op->turbo_enc.input.length +=
1455 				entry->segments[i].length;
1456 }
1457 
1458 static void
1459 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1460 {
1461 	unsigned int i;
1462 	struct op_data_entries *entry;
1463 
1464 	op->ldpc_enc = test_vector.ldpc_enc;
1465 	entry = &test_vector.entries[DATA_INPUT];
1466 	for (i = 0; i < entry->nb_segments; ++i)
1467 		op->ldpc_enc.input.length +=
1468 				entry->segments[i].length;
1469 }
1470 
1471 static uint32_t
1472 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1473 {
1474 	uint8_t i;
1475 	uint32_t c, r, tb_size = 0;
1476 
1477 	if (op->turbo_dec.code_block_mode) {
1478 		tb_size = op->turbo_dec.tb_params.k_neg;
1479 	} else {
1480 		c = op->turbo_dec.tb_params.c;
1481 		r = op->turbo_dec.tb_params.r;
1482 		for (i = 0; i < c-r; i++)
1483 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1484 				op->turbo_dec.tb_params.k_neg :
1485 				op->turbo_dec.tb_params.k_pos;
1486 	}
1487 	return tb_size;
1488 }
1489 
1490 static uint32_t
1491 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1492 {
1493 	uint8_t i;
1494 	uint32_t c, r, tb_size = 0;
1495 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1496 
1497 	if (op->ldpc_dec.code_block_mode) {
1498 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1499 	} else {
1500 		c = op->ldpc_dec.tb_params.c;
1501 		r = op->ldpc_dec.tb_params.r;
1502 		for (i = 0; i < c-r; i++)
1503 			tb_size += sys_cols * op->ldpc_dec.z_c
1504 					- op->ldpc_dec.n_filler;
1505 	}
1506 	return tb_size;
1507 }
1508 
1509 static uint32_t
1510 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1511 {
1512 	uint8_t i;
1513 	uint32_t c, r, tb_size = 0;
1514 
1515 	if (op->turbo_enc.code_block_mode) {
1516 		tb_size = op->turbo_enc.tb_params.k_neg;
1517 	} else {
1518 		c = op->turbo_enc.tb_params.c;
1519 		r = op->turbo_enc.tb_params.r;
1520 		for (i = 0; i < c-r; i++)
1521 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1522 				op->turbo_enc.tb_params.k_neg :
1523 				op->turbo_enc.tb_params.k_pos;
1524 	}
1525 	return tb_size;
1526 }
1527 
1528 static uint32_t
1529 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1530 {
1531 	uint8_t i;
1532 	uint32_t c, r, tb_size = 0;
1533 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1534 
1535 	if (op->turbo_enc.code_block_mode) {
1536 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1537 	} else {
1538 		c = op->turbo_enc.tb_params.c;
1539 		r = op->turbo_enc.tb_params.r;
1540 		for (i = 0; i < c-r; i++)
1541 			tb_size += sys_cols * op->ldpc_enc.z_c
1542 					- op->ldpc_enc.n_filler;
1543 	}
1544 	return tb_size;
1545 }
1546 
1547 
1548 static int
1549 init_test_op_params(struct test_op_params *op_params,
1550 		enum rte_bbdev_op_type op_type, const int expected_status,
1551 		const int vector_mask, struct rte_mempool *ops_mp,
1552 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1553 {
1554 	int ret = 0;
1555 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1556 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1557 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1558 				&op_params->ref_dec_op, 1);
1559 	else
1560 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1561 				&op_params->ref_enc_op, 1);
1562 
1563 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1564 
1565 	op_params->mp = ops_mp;
1566 	op_params->burst_sz = burst_sz;
1567 	op_params->num_to_process = num_to_process;
1568 	op_params->num_lcores = num_lcores;
1569 	op_params->vector_mask = vector_mask;
1570 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1571 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1572 		op_params->ref_dec_op->status = expected_status;
1573 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1574 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
1575 		op_params->ref_enc_op->status = expected_status;
1576 	return 0;
1577 }
1578 
1579 static int
1580 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1581 		struct test_op_params *op_params)
1582 {
1583 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1584 	unsigned int i;
1585 	struct active_device *ad;
1586 	unsigned int burst_sz = get_burst_sz();
1587 	enum rte_bbdev_op_type op_type = test_vector.op_type;
1588 	const struct rte_bbdev_op_cap *capabilities = NULL;
1589 
1590 	ad = &active_devs[dev_id];
1591 
1592 	/* Check if device supports op_type */
1593 	if (!is_avail_op(ad, test_vector.op_type))
1594 		return TEST_SUCCESS;
1595 
1596 	struct rte_bbdev_info info;
1597 	rte_bbdev_info_get(ad->dev_id, &info);
1598 	socket_id = GET_SOCKET(info.socket_id);
1599 
1600 	f_ret = create_mempools(ad, socket_id, op_type,
1601 			get_num_ops());
1602 	if (f_ret != TEST_SUCCESS) {
1603 		printf("Couldn't create mempools");
1604 		goto fail;
1605 	}
1606 	if (op_type == RTE_BBDEV_OP_NONE)
1607 		op_type = RTE_BBDEV_OP_TURBO_ENC;
1608 
1609 	f_ret = init_test_op_params(op_params, test_vector.op_type,
1610 			test_vector.expected_status,
1611 			test_vector.mask,
1612 			ad->ops_mempool,
1613 			burst_sz,
1614 			get_num_ops(),
1615 			get_num_lcores());
1616 	if (f_ret != TEST_SUCCESS) {
1617 		printf("Couldn't init test op params");
1618 		goto fail;
1619 	}
1620 
1621 
1622 	/* Find capabilities */
1623 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1624 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1625 		if (cap->type == test_vector.op_type) {
1626 			capabilities = cap;
1627 			break;
1628 		}
1629 		cap++;
1630 	}
1631 	TEST_ASSERT_NOT_NULL(capabilities,
1632 			"Couldn't find capabilities");
1633 
1634 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1635 		create_reference_dec_op(op_params->ref_dec_op);
1636 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1637 		create_reference_enc_op(op_params->ref_enc_op);
1638 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1639 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
1640 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1641 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
1642 
1643 	for (i = 0; i < ad->nb_queues; ++i) {
1644 		f_ret = fill_queue_buffers(op_params,
1645 				ad->in_mbuf_pool,
1646 				ad->hard_out_mbuf_pool,
1647 				ad->soft_out_mbuf_pool,
1648 				ad->harq_in_mbuf_pool,
1649 				ad->harq_out_mbuf_pool,
1650 				ad->queue_ids[i],
1651 				capabilities,
1652 				info.drv.min_alignment,
1653 				socket_id);
1654 		if (f_ret != TEST_SUCCESS) {
1655 			printf("Couldn't init queue buffers");
1656 			goto fail;
1657 		}
1658 	}
1659 
1660 	/* Run test case function */
1661 	t_ret = test_case_func(ad, op_params);
1662 
1663 	/* Free active device resources and return */
1664 	free_buffers(ad, op_params);
1665 	return t_ret;
1666 
1667 fail:
1668 	free_buffers(ad, op_params);
1669 	return TEST_FAILED;
1670 }
1671 
1672 /* Run given test function per active device per supported op type
1673  * per burst size.
1674  */
1675 static int
1676 run_test_case(test_case_function *test_case_func)
1677 {
1678 	int ret = 0;
1679 	uint8_t dev;
1680 
1681 	/* Alloc op_params */
1682 	struct test_op_params *op_params = rte_zmalloc(NULL,
1683 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1684 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1685 			RTE_ALIGN(sizeof(struct test_op_params),
1686 				RTE_CACHE_LINE_SIZE));
1687 
1688 	/* For each device run test case function */
1689 	for (dev = 0; dev < nb_active_devs; ++dev)
1690 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
1691 
1692 	rte_free(op_params);
1693 
1694 	return ret;
1695 }
1696 
1697 static void
1698 dequeue_event_callback(uint16_t dev_id,
1699 		enum rte_bbdev_event_type event, void *cb_arg,
1700 		void *ret_param)
1701 {
1702 	int ret;
1703 	uint16_t i;
1704 	uint64_t total_time;
1705 	uint16_t deq, burst_sz, num_ops;
1706 	uint16_t queue_id = *(uint16_t *) ret_param;
1707 	struct rte_bbdev_info info;
1708 	double tb_len_bits;
1709 	struct thread_params *tp = cb_arg;
1710 
1711 	/* Find matching thread params using queue_id */
1712 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1713 		if (tp->queue_id == queue_id)
1714 			break;
1715 
1716 	if (i == MAX_QUEUES) {
1717 		printf("%s: Queue_id from interrupt details was not found!\n",
1718 				__func__);
1719 		return;
1720 	}
1721 
1722 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1723 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1724 		printf(
1725 			"Dequeue interrupt handler called for incorrect event!\n");
1726 		return;
1727 	}
1728 
1729 	burst_sz = rte_atomic16_read(&tp->burst_sz);
1730 	num_ops = tp->op_params->num_to_process;
1731 
1732 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
1733 			test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1734 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1735 				&tp->dec_ops[
1736 					rte_atomic16_read(&tp->nb_dequeued)],
1737 				burst_sz);
1738 	else
1739 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1740 				&tp->enc_ops[
1741 					rte_atomic16_read(&tp->nb_dequeued)],
1742 				burst_sz);
1743 
1744 	if (deq < burst_sz) {
1745 		printf(
1746 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1747 			burst_sz, deq);
1748 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1749 		return;
1750 	}
1751 
1752 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1753 		rte_atomic16_add(&tp->nb_dequeued, deq);
1754 		return;
1755 	}
1756 
1757 	total_time = rte_rdtsc_precise() - tp->start_time;
1758 
1759 	rte_bbdev_info_get(dev_id, &info);
1760 
1761 	ret = TEST_SUCCESS;
1762 
1763 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1764 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1765 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1766 				tp->op_params->vector_mask);
1767 		/* get the max of iter_count for all dequeued ops */
1768 		for (i = 0; i < num_ops; ++i)
1769 			tp->iter_count = RTE_MAX(
1770 					tp->dec_ops[i]->turbo_dec.iter_count,
1771 					tp->iter_count);
1772 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1773 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1774 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1775 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1776 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1777 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
1778 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1779 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
1780 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1781 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1782 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1783 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
1784 				tp->op_params->vector_mask);
1785 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1786 	}
1787 
1788 	if (ret) {
1789 		printf("Buffers validation failed\n");
1790 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1791 	}
1792 
1793 	switch (test_vector.op_type) {
1794 	case RTE_BBDEV_OP_TURBO_DEC:
1795 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1796 		break;
1797 	case RTE_BBDEV_OP_TURBO_ENC:
1798 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1799 		break;
1800 	case RTE_BBDEV_OP_LDPC_DEC:
1801 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
1802 		break;
1803 	case RTE_BBDEV_OP_LDPC_ENC:
1804 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
1805 		break;
1806 	case RTE_BBDEV_OP_NONE:
1807 		tb_len_bits = 0.0;
1808 		break;
1809 	default:
1810 		printf("Unknown op type: %d\n", test_vector.op_type);
1811 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1812 		return;
1813 	}
1814 
1815 	tp->ops_per_sec += ((double)num_ops) /
1816 			((double)total_time / (double)rte_get_tsc_hz());
1817 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1818 			((double)total_time / (double)rte_get_tsc_hz());
1819 
1820 	rte_atomic16_add(&tp->nb_dequeued, deq);
1821 }
1822 
1823 static int
1824 throughput_intr_lcore_dec(void *arg)
1825 {
1826 	struct thread_params *tp = arg;
1827 	unsigned int enqueued;
1828 	const uint16_t queue_id = tp->queue_id;
1829 	const uint16_t burst_sz = tp->op_params->burst_sz;
1830 	const uint16_t num_to_process = tp->op_params->num_to_process;
1831 	struct rte_bbdev_dec_op *ops[num_to_process];
1832 	struct test_buffers *bufs = NULL;
1833 	struct rte_bbdev_info info;
1834 	int ret, i, j;
1835 	uint16_t num_to_enq, enq;
1836 
1837 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1838 			"BURST_SIZE should be <= %u", MAX_BURST);
1839 
1840 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1841 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1842 			tp->dev_id, queue_id);
1843 
1844 	rte_bbdev_info_get(tp->dev_id, &info);
1845 
1846 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1847 			"NUM_OPS cannot exceed %u for this device",
1848 			info.drv.queue_size_lim);
1849 
1850 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1851 
1852 	rte_atomic16_clear(&tp->processing_status);
1853 	rte_atomic16_clear(&tp->nb_dequeued);
1854 
1855 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1856 		rte_pause();
1857 
1858 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1859 				num_to_process);
1860 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1861 			num_to_process);
1862 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1863 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1864 				bufs->hard_outputs, bufs->soft_outputs,
1865 				tp->op_params->ref_dec_op);
1866 
1867 	/* Set counter to validate the ordering */
1868 	for (j = 0; j < num_to_process; ++j)
1869 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1870 
1871 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1872 		for (i = 0; i < num_to_process; ++i)
1873 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1874 
1875 		tp->start_time = rte_rdtsc_precise();
1876 		for (enqueued = 0; enqueued < num_to_process;) {
1877 			num_to_enq = burst_sz;
1878 
1879 			if (unlikely(num_to_process - enqueued < num_to_enq))
1880 				num_to_enq = num_to_process - enqueued;
1881 
1882 			enq = 0;
1883 			do {
1884 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1885 						queue_id, &ops[enqueued],
1886 						num_to_enq);
1887 			} while (unlikely(num_to_enq != enq));
1888 			enqueued += enq;
1889 
1890 			/* Write to thread burst_sz current number of enqueued
1891 			 * descriptors. It ensures that proper number of
1892 			 * descriptors will be dequeued in callback
1893 			 * function - needed for last batch in case where
1894 			 * the number of operations is not a multiple of
1895 			 * burst size.
1896 			 */
1897 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1898 
1899 			/* Wait until processing of previous batch is
1900 			 * completed
1901 			 */
1902 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1903 					(int16_t) enqueued)
1904 				rte_pause();
1905 		}
1906 		if (j != TEST_REPETITIONS - 1)
1907 			rte_atomic16_clear(&tp->nb_dequeued);
1908 	}
1909 
1910 	return TEST_SUCCESS;
1911 }
1912 
1913 static int
1914 throughput_intr_lcore_enc(void *arg)
1915 {
1916 	struct thread_params *tp = arg;
1917 	unsigned int enqueued;
1918 	const uint16_t queue_id = tp->queue_id;
1919 	const uint16_t burst_sz = tp->op_params->burst_sz;
1920 	const uint16_t num_to_process = tp->op_params->num_to_process;
1921 	struct rte_bbdev_enc_op *ops[num_to_process];
1922 	struct test_buffers *bufs = NULL;
1923 	struct rte_bbdev_info info;
1924 	int ret, i, j;
1925 	uint16_t num_to_enq, enq;
1926 
1927 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1928 			"BURST_SIZE should be <= %u", MAX_BURST);
1929 
1930 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1931 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1932 			tp->dev_id, queue_id);
1933 
1934 	rte_bbdev_info_get(tp->dev_id, &info);
1935 
1936 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1937 			"NUM_OPS cannot exceed %u for this device",
1938 			info.drv.queue_size_lim);
1939 
1940 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1941 
1942 	rte_atomic16_clear(&tp->processing_status);
1943 	rte_atomic16_clear(&tp->nb_dequeued);
1944 
1945 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1946 		rte_pause();
1947 
1948 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1949 			num_to_process);
1950 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1951 			num_to_process);
1952 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1953 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1954 				bufs->hard_outputs, tp->op_params->ref_enc_op);
1955 
1956 	/* Set counter to validate the ordering */
1957 	for (j = 0; j < num_to_process; ++j)
1958 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1959 
1960 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1961 		for (i = 0; i < num_to_process; ++i)
1962 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1963 
1964 		tp->start_time = rte_rdtsc_precise();
1965 		for (enqueued = 0; enqueued < num_to_process;) {
1966 			num_to_enq = burst_sz;
1967 
1968 			if (unlikely(num_to_process - enqueued < num_to_enq))
1969 				num_to_enq = num_to_process - enqueued;
1970 
1971 			enq = 0;
1972 			do {
1973 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1974 						queue_id, &ops[enqueued],
1975 						num_to_enq);
1976 			} while (unlikely(enq != num_to_enq));
1977 			enqueued += enq;
1978 
1979 			/* Write to thread burst_sz current number of enqueued
1980 			 * descriptors. It ensures that proper number of
1981 			 * descriptors will be dequeued in callback
1982 			 * function - needed for last batch in case where
1983 			 * the number of operations is not a multiple of
1984 			 * burst size.
1985 			 */
1986 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1987 
1988 			/* Wait until processing of previous batch is
1989 			 * completed
1990 			 */
1991 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1992 					(int16_t) enqueued)
1993 				rte_pause();
1994 		}
1995 		if (j != TEST_REPETITIONS - 1)
1996 			rte_atomic16_clear(&tp->nb_dequeued);
1997 	}
1998 
1999 	return TEST_SUCCESS;
2000 }
2001 
2002 static int
2003 throughput_pmd_lcore_dec(void *arg)
2004 {
2005 	struct thread_params *tp = arg;
2006 	uint16_t enq, deq;
2007 	uint64_t total_time = 0, start_time;
2008 	const uint16_t queue_id = tp->queue_id;
2009 	const uint16_t burst_sz = tp->op_params->burst_sz;
2010 	const uint16_t num_ops = tp->op_params->num_to_process;
2011 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2012 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2013 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2014 	struct test_buffers *bufs = NULL;
2015 	int i, j, ret;
2016 	struct rte_bbdev_info info;
2017 	uint16_t num_to_enq;
2018 
2019 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2020 			"BURST_SIZE should be <= %u", MAX_BURST);
2021 
2022 	rte_bbdev_info_get(tp->dev_id, &info);
2023 
2024 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2025 			"NUM_OPS cannot exceed %u for this device",
2026 			info.drv.queue_size_lim);
2027 
2028 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2029 
2030 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2031 		rte_pause();
2032 
2033 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2034 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2035 
2036 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2037 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2038 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
2039 
2040 	/* Set counter to validate the ordering */
2041 	for (j = 0; j < num_ops; ++j)
2042 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2043 
2044 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2045 
2046 		for (j = 0; j < num_ops; ++j)
2047 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2048 
2049 		start_time = rte_rdtsc_precise();
2050 
2051 		for (enq = 0, deq = 0; enq < num_ops;) {
2052 			num_to_enq = burst_sz;
2053 
2054 			if (unlikely(num_ops - enq < num_to_enq))
2055 				num_to_enq = num_ops - enq;
2056 
2057 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2058 					queue_id, &ops_enq[enq], num_to_enq);
2059 
2060 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2061 					queue_id, &ops_deq[deq], enq - deq);
2062 		}
2063 
2064 		/* dequeue the remaining */
2065 		while (deq < enq) {
2066 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2067 					queue_id, &ops_deq[deq], enq - deq);
2068 		}
2069 
2070 		total_time += rte_rdtsc_precise() - start_time;
2071 	}
2072 
2073 	tp->iter_count = 0;
2074 	/* get the max of iter_count for all dequeued ops */
2075 	for (i = 0; i < num_ops; ++i) {
2076 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2077 				tp->iter_count);
2078 	}
2079 
2080 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2081 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
2082 				tp->op_params->vector_mask);
2083 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2084 	}
2085 
2086 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2087 
2088 	double tb_len_bits = calc_dec_TB_size(ref_op);
2089 
2090 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2091 			((double)total_time / (double)rte_get_tsc_hz());
2092 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2093 			1000000.0) / ((double)total_time /
2094 			(double)rte_get_tsc_hz());
2095 
2096 	return TEST_SUCCESS;
2097 }
2098 
2099 static int
2100 throughput_pmd_lcore_ldpc_dec(void *arg)
2101 {
2102 	struct thread_params *tp = arg;
2103 	uint16_t enq, deq;
2104 	uint64_t total_time = 0, start_time;
2105 	const uint16_t queue_id = tp->queue_id;
2106 	const uint16_t burst_sz = tp->op_params->burst_sz;
2107 	const uint16_t num_ops = tp->op_params->num_to_process;
2108 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2109 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2110 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2111 	struct test_buffers *bufs = NULL;
2112 	int i, j, ret;
2113 	struct rte_bbdev_info info;
2114 	uint16_t num_to_enq;
2115 
2116 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2117 			"BURST_SIZE should be <= %u", MAX_BURST);
2118 
2119 	rte_bbdev_info_get(tp->dev_id, &info);
2120 
2121 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2122 			"NUM_OPS cannot exceed %u for this device",
2123 			info.drv.queue_size_lim);
2124 
2125 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2126 
2127 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2128 		rte_pause();
2129 
2130 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2131 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2132 
2133 	/* For throughput tests we need to disable early termination */
2134 	if (check_bit(ref_op->ldpc_dec.op_flags,
2135 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2136 		ref_op->ldpc_dec.op_flags -=
2137 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2138 	ref_op->ldpc_dec.iter_max = 6;
2139 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2140 
2141 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2142 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2143 				bufs->hard_outputs, bufs->soft_outputs,
2144 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2145 
2146 	/* Set counter to validate the ordering */
2147 	for (j = 0; j < num_ops; ++j)
2148 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2149 
2150 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2151 		for (j = 0; j < num_ops; ++j) {
2152 			mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
2153 			if (check_bit(ref_op->ldpc_dec.op_flags,
2154 					RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE))
2155 				mbuf_reset(
2156 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
2157 		}
2158 
2159 		start_time = rte_rdtsc_precise();
2160 
2161 		for (enq = 0, deq = 0; enq < num_ops;) {
2162 			num_to_enq = burst_sz;
2163 
2164 			if (unlikely(num_ops - enq < num_to_enq))
2165 				num_to_enq = num_ops - enq;
2166 
2167 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2168 					queue_id, &ops_enq[enq], num_to_enq);
2169 
2170 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2171 					queue_id, &ops_deq[deq], enq - deq);
2172 		}
2173 
2174 		/* dequeue the remaining */
2175 		while (deq < enq) {
2176 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2177 					queue_id, &ops_deq[deq], enq - deq);
2178 		}
2179 
2180 		total_time += rte_rdtsc_precise() - start_time;
2181 	}
2182 
2183 	tp->iter_count = 0;
2184 	/* get the max of iter_count for all dequeued ops */
2185 	for (i = 0; i < num_ops; ++i) {
2186 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2187 				tp->iter_count);
2188 	}
2189 
2190 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2191 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2192 				tp->op_params->vector_mask);
2193 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2194 	}
2195 
2196 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2197 
2198 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2199 
2200 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2201 			((double)total_time / (double)rte_get_tsc_hz());
2202 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2203 			1000000.0) / ((double)total_time /
2204 			(double)rte_get_tsc_hz());
2205 
2206 	return TEST_SUCCESS;
2207 }
2208 
2209 static int
2210 throughput_pmd_lcore_enc(void *arg)
2211 {
2212 	struct thread_params *tp = arg;
2213 	uint16_t enq, deq;
2214 	uint64_t total_time = 0, start_time;
2215 	const uint16_t queue_id = tp->queue_id;
2216 	const uint16_t burst_sz = tp->op_params->burst_sz;
2217 	const uint16_t num_ops = tp->op_params->num_to_process;
2218 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2219 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2220 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2221 	struct test_buffers *bufs = NULL;
2222 	int i, j, ret;
2223 	struct rte_bbdev_info info;
2224 	uint16_t num_to_enq;
2225 
2226 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2227 			"BURST_SIZE should be <= %u", MAX_BURST);
2228 
2229 	rte_bbdev_info_get(tp->dev_id, &info);
2230 
2231 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2232 			"NUM_OPS cannot exceed %u for this device",
2233 			info.drv.queue_size_lim);
2234 
2235 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2236 
2237 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2238 		rte_pause();
2239 
2240 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2241 			num_ops);
2242 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2243 			num_ops);
2244 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2245 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2246 				bufs->hard_outputs, ref_op);
2247 
2248 	/* Set counter to validate the ordering */
2249 	for (j = 0; j < num_ops; ++j)
2250 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2251 
2252 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2253 
2254 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2255 			for (j = 0; j < num_ops; ++j)
2256 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2257 
2258 		start_time = rte_rdtsc_precise();
2259 
2260 		for (enq = 0, deq = 0; enq < num_ops;) {
2261 			num_to_enq = burst_sz;
2262 
2263 			if (unlikely(num_ops - enq < num_to_enq))
2264 				num_to_enq = num_ops - enq;
2265 
2266 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2267 					queue_id, &ops_enq[enq], num_to_enq);
2268 
2269 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2270 					queue_id, &ops_deq[deq], enq - deq);
2271 		}
2272 
2273 		/* dequeue the remaining */
2274 		while (deq < enq) {
2275 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2276 					queue_id, &ops_deq[deq], enq - deq);
2277 		}
2278 
2279 		total_time += rte_rdtsc_precise() - start_time;
2280 	}
2281 
2282 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2283 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
2284 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2285 	}
2286 
2287 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2288 
2289 	double tb_len_bits = calc_enc_TB_size(ref_op);
2290 
2291 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2292 			((double)total_time / (double)rte_get_tsc_hz());
2293 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2294 			/ 1000000.0) / ((double)total_time /
2295 			(double)rte_get_tsc_hz());
2296 
2297 	return TEST_SUCCESS;
2298 }
2299 
2300 static int
2301 throughput_pmd_lcore_ldpc_enc(void *arg)
2302 {
2303 	struct thread_params *tp = arg;
2304 	uint16_t enq, deq;
2305 	uint64_t total_time = 0, start_time;
2306 	const uint16_t queue_id = tp->queue_id;
2307 	const uint16_t burst_sz = tp->op_params->burst_sz;
2308 	const uint16_t num_ops = tp->op_params->num_to_process;
2309 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2310 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2311 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2312 	struct test_buffers *bufs = NULL;
2313 	int i, j, ret;
2314 	struct rte_bbdev_info info;
2315 	uint16_t num_to_enq;
2316 
2317 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2318 			"BURST_SIZE should be <= %u", MAX_BURST);
2319 
2320 	rte_bbdev_info_get(tp->dev_id, &info);
2321 
2322 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2323 			"NUM_OPS cannot exceed %u for this device",
2324 			info.drv.queue_size_lim);
2325 
2326 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2327 
2328 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2329 		rte_pause();
2330 
2331 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2332 			num_ops);
2333 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2334 			num_ops);
2335 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2336 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2337 				bufs->hard_outputs, ref_op);
2338 
2339 	/* Set counter to validate the ordering */
2340 	for (j = 0; j < num_ops; ++j)
2341 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2342 
2343 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2344 
2345 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2346 			for (j = 0; j < num_ops; ++j)
2347 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2348 
2349 		start_time = rte_rdtsc_precise();
2350 
2351 		for (enq = 0, deq = 0; enq < num_ops;) {
2352 			num_to_enq = burst_sz;
2353 
2354 			if (unlikely(num_ops - enq < num_to_enq))
2355 				num_to_enq = num_ops - enq;
2356 
2357 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2358 					queue_id, &ops_enq[enq], num_to_enq);
2359 
2360 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2361 					queue_id, &ops_deq[deq], enq - deq);
2362 		}
2363 
2364 		/* dequeue the remaining */
2365 		while (deq < enq) {
2366 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2367 					queue_id, &ops_deq[deq], enq - deq);
2368 		}
2369 
2370 		total_time += rte_rdtsc_precise() - start_time;
2371 	}
2372 
2373 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2374 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2375 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2376 	}
2377 
2378 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2379 
2380 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2381 
2382 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2383 			((double)total_time / (double)rte_get_tsc_hz());
2384 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2385 			/ 1000000.0) / ((double)total_time /
2386 			(double)rte_get_tsc_hz());
2387 
2388 	return TEST_SUCCESS;
2389 }
2390 
2391 static void
2392 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2393 {
2394 	unsigned int iter = 0;
2395 	double total_mops = 0, total_mbps = 0;
2396 
2397 	for (iter = 0; iter < used_cores; iter++) {
2398 		printf(
2399 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2400 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2401 			t_params[iter].mbps);
2402 		total_mops += t_params[iter].ops_per_sec;
2403 		total_mbps += t_params[iter].mbps;
2404 	}
2405 	printf(
2406 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2407 		used_cores, total_mops, total_mbps);
2408 }
2409 
2410 static void
2411 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2412 {
2413 	unsigned int iter = 0;
2414 	double total_mops = 0, total_mbps = 0;
2415 	uint8_t iter_count = 0;
2416 
2417 	for (iter = 0; iter < used_cores; iter++) {
2418 		printf(
2419 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2420 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2421 			t_params[iter].mbps, t_params[iter].iter_count);
2422 		total_mops += t_params[iter].ops_per_sec;
2423 		total_mbps += t_params[iter].mbps;
2424 		iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2425 	}
2426 	printf(
2427 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2428 		used_cores, total_mops, total_mbps, iter_count);
2429 }
2430 
2431 /*
2432  * Test function that determines how long an enqueue + dequeue of a burst
2433  * takes on available lcores.
2434  */
2435 static int
2436 throughput_test(struct active_device *ad,
2437 		struct test_op_params *op_params)
2438 {
2439 	int ret;
2440 	unsigned int lcore_id, used_cores = 0;
2441 	struct thread_params *t_params, *tp;
2442 	struct rte_bbdev_info info;
2443 	lcore_function_t *throughput_function;
2444 	uint16_t num_lcores;
2445 	const char *op_type_str;
2446 
2447 	rte_bbdev_info_get(ad->dev_id, &info);
2448 
2449 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2450 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2451 			test_vector.op_type);
2452 
2453 	printf("+ ------------------------------------------------------- +\n");
2454 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2455 			info.dev_name, ad->nb_queues, op_params->burst_sz,
2456 			op_params->num_to_process, op_params->num_lcores,
2457 			op_type_str,
2458 			intr_enabled ? "Interrupt mode" : "PMD mode",
2459 			(double)rte_get_tsc_hz() / 1000000000.0);
2460 
2461 	/* Set number of lcores */
2462 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
2463 			? ad->nb_queues
2464 			: op_params->num_lcores;
2465 
2466 	/* Allocate memory for thread parameters structure */
2467 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2468 			RTE_CACHE_LINE_SIZE);
2469 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2470 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2471 				RTE_CACHE_LINE_SIZE));
2472 
2473 	if (intr_enabled) {
2474 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2475 			throughput_function = throughput_intr_lcore_dec;
2476 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2477 			throughput_function = throughput_intr_lcore_dec;
2478 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2479 			throughput_function = throughput_intr_lcore_enc;
2480 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2481 			throughput_function = throughput_intr_lcore_enc;
2482 		else
2483 			throughput_function = throughput_intr_lcore_enc;
2484 
2485 		/* Dequeue interrupt callback registration */
2486 		ret = rte_bbdev_callback_register(ad->dev_id,
2487 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2488 				t_params);
2489 		if (ret < 0) {
2490 			rte_free(t_params);
2491 			return ret;
2492 		}
2493 	} else {
2494 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2495 			throughput_function = throughput_pmd_lcore_dec;
2496 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2497 			throughput_function = throughput_pmd_lcore_ldpc_dec;
2498 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2499 			throughput_function = throughput_pmd_lcore_enc;
2500 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2501 			throughput_function = throughput_pmd_lcore_ldpc_enc;
2502 		else
2503 			throughput_function = throughput_pmd_lcore_enc;
2504 	}
2505 
2506 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2507 
2508 	/* Master core is set at first entry */
2509 	t_params[0].dev_id = ad->dev_id;
2510 	t_params[0].lcore_id = rte_lcore_id();
2511 	t_params[0].op_params = op_params;
2512 	t_params[0].queue_id = ad->queue_ids[used_cores++];
2513 	t_params[0].iter_count = 0;
2514 
2515 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2516 		if (used_cores >= num_lcores)
2517 			break;
2518 
2519 		t_params[used_cores].dev_id = ad->dev_id;
2520 		t_params[used_cores].lcore_id = lcore_id;
2521 		t_params[used_cores].op_params = op_params;
2522 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2523 		t_params[used_cores].iter_count = 0;
2524 
2525 		rte_eal_remote_launch(throughput_function,
2526 				&t_params[used_cores++], lcore_id);
2527 	}
2528 
2529 	rte_atomic16_set(&op_params->sync, SYNC_START);
2530 	ret = throughput_function(&t_params[0]);
2531 
2532 	/* Master core is always used */
2533 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
2534 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2535 
2536 	/* Return if test failed */
2537 	if (ret) {
2538 		rte_free(t_params);
2539 		return ret;
2540 	}
2541 
2542 	/* Print throughput if interrupts are disabled and test passed */
2543 	if (!intr_enabled) {
2544 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2545 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2546 			print_dec_throughput(t_params, num_lcores);
2547 		else
2548 			print_enc_throughput(t_params, num_lcores);
2549 		rte_free(t_params);
2550 		return ret;
2551 	}
2552 
2553 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
2554 	 * all pending operations. Skip waiting for queues which reported an
2555 	 * error using processing_status variable.
2556 	 * Wait for master lcore operations.
2557 	 */
2558 	tp = &t_params[0];
2559 	while ((rte_atomic16_read(&tp->nb_dequeued) <
2560 			op_params->num_to_process) &&
2561 			(rte_atomic16_read(&tp->processing_status) !=
2562 			TEST_FAILED))
2563 		rte_pause();
2564 
2565 	tp->ops_per_sec /= TEST_REPETITIONS;
2566 	tp->mbps /= TEST_REPETITIONS;
2567 	ret |= (int)rte_atomic16_read(&tp->processing_status);
2568 
2569 	/* Wait for slave lcores operations */
2570 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2571 		tp = &t_params[used_cores];
2572 
2573 		while ((rte_atomic16_read(&tp->nb_dequeued) <
2574 				op_params->num_to_process) &&
2575 				(rte_atomic16_read(&tp->processing_status) !=
2576 				TEST_FAILED))
2577 			rte_pause();
2578 
2579 		tp->ops_per_sec /= TEST_REPETITIONS;
2580 		tp->mbps /= TEST_REPETITIONS;
2581 		ret |= (int)rte_atomic16_read(&tp->processing_status);
2582 	}
2583 
2584 	/* Print throughput if test passed */
2585 	if (!ret) {
2586 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2587 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2588 			print_dec_throughput(t_params, num_lcores);
2589 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
2590 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2591 			print_enc_throughput(t_params, num_lcores);
2592 	}
2593 
2594 	rte_free(t_params);
2595 	return ret;
2596 }
2597 
2598 static int
2599 latency_test_dec(struct rte_mempool *mempool,
2600 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2601 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2602 		const uint16_t num_to_process, uint16_t burst_sz,
2603 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2604 {
2605 	int ret = TEST_SUCCESS;
2606 	uint16_t i, j, dequeued;
2607 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2608 	uint64_t start_time = 0, last_time = 0;
2609 
2610 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2611 		uint16_t enq = 0, deq = 0;
2612 		bool first_time = true;
2613 		last_time = 0;
2614 
2615 		if (unlikely(num_to_process - dequeued < burst_sz))
2616 			burst_sz = num_to_process - dequeued;
2617 
2618 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2619 		TEST_ASSERT_SUCCESS(ret,
2620 				"rte_bbdev_dec_op_alloc_bulk() failed");
2621 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2622 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2623 					bufs->inputs,
2624 					bufs->hard_outputs,
2625 					bufs->soft_outputs,
2626 					ref_op);
2627 
2628 		/* Set counter to validate the ordering */
2629 		for (j = 0; j < burst_sz; ++j)
2630 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2631 
2632 		start_time = rte_rdtsc_precise();
2633 
2634 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
2635 				burst_sz);
2636 		TEST_ASSERT(enq == burst_sz,
2637 				"Error enqueueing burst, expected %u, got %u",
2638 				burst_sz, enq);
2639 
2640 		/* Dequeue */
2641 		do {
2642 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2643 					&ops_deq[deq], burst_sz - deq);
2644 			if (likely(first_time && (deq > 0))) {
2645 				last_time = rte_rdtsc_precise() - start_time;
2646 				first_time = false;
2647 			}
2648 		} while (unlikely(burst_sz != deq));
2649 
2650 		*max_time = RTE_MAX(*max_time, last_time);
2651 		*min_time = RTE_MIN(*min_time, last_time);
2652 		*total_time += last_time;
2653 
2654 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2655 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
2656 					vector_mask);
2657 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2658 		}
2659 
2660 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2661 		dequeued += deq;
2662 	}
2663 
2664 	return i;
2665 }
2666 
2667 static int
2668 latency_test_ldpc_dec(struct rte_mempool *mempool,
2669 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2670 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2671 		const uint16_t num_to_process, uint16_t burst_sz,
2672 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2673 {
2674 	int ret = TEST_SUCCESS;
2675 	uint16_t i, j, dequeued;
2676 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2677 	uint64_t start_time = 0, last_time = 0;
2678 
2679 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2680 		uint16_t enq = 0, deq = 0;
2681 		bool first_time = true;
2682 		last_time = 0;
2683 
2684 		if (unlikely(num_to_process - dequeued < burst_sz))
2685 			burst_sz = num_to_process - dequeued;
2686 
2687 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2688 		TEST_ASSERT_SUCCESS(ret,
2689 				"rte_bbdev_dec_op_alloc_bulk() failed");
2690 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2691 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
2692 					bufs->inputs,
2693 					bufs->hard_outputs,
2694 					bufs->soft_outputs,
2695 					bufs->harq_inputs,
2696 					bufs->harq_outputs,
2697 					ref_op);
2698 
2699 		/* Set counter to validate the ordering */
2700 		for (j = 0; j < burst_sz; ++j)
2701 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2702 
2703 		start_time = rte_rdtsc_precise();
2704 
2705 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2706 				&ops_enq[enq], burst_sz);
2707 		TEST_ASSERT(enq == burst_sz,
2708 				"Error enqueueing burst, expected %u, got %u",
2709 				burst_sz, enq);
2710 
2711 		/* Dequeue */
2712 		do {
2713 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2714 					&ops_deq[deq], burst_sz - deq);
2715 			if (likely(first_time && (deq > 0))) {
2716 				last_time = rte_rdtsc_precise() - start_time;
2717 				first_time = false;
2718 			}
2719 		} while (unlikely(burst_sz != deq));
2720 
2721 		*max_time = RTE_MAX(*max_time, last_time);
2722 		*min_time = RTE_MIN(*min_time, last_time);
2723 		*total_time += last_time;
2724 
2725 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2726 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
2727 					vector_mask);
2728 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2729 		}
2730 
2731 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2732 		dequeued += deq;
2733 	}
2734 
2735 	return i;
2736 }
2737 
2738 static int
2739 latency_test_enc(struct rte_mempool *mempool,
2740 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2741 		uint16_t dev_id, uint16_t queue_id,
2742 		const uint16_t num_to_process, uint16_t burst_sz,
2743 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2744 {
2745 	int ret = TEST_SUCCESS;
2746 	uint16_t i, j, dequeued;
2747 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2748 	uint64_t start_time = 0, last_time = 0;
2749 
2750 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2751 		uint16_t enq = 0, deq = 0;
2752 		bool first_time = true;
2753 		last_time = 0;
2754 
2755 		if (unlikely(num_to_process - dequeued < burst_sz))
2756 			burst_sz = num_to_process - dequeued;
2757 
2758 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2759 		TEST_ASSERT_SUCCESS(ret,
2760 				"rte_bbdev_enc_op_alloc_bulk() failed");
2761 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2762 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2763 					bufs->inputs,
2764 					bufs->hard_outputs,
2765 					ref_op);
2766 
2767 		/* Set counter to validate the ordering */
2768 		for (j = 0; j < burst_sz; ++j)
2769 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2770 
2771 		start_time = rte_rdtsc_precise();
2772 
2773 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
2774 				burst_sz);
2775 		TEST_ASSERT(enq == burst_sz,
2776 				"Error enqueueing burst, expected %u, got %u",
2777 				burst_sz, enq);
2778 
2779 		/* Dequeue */
2780 		do {
2781 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2782 					&ops_deq[deq], burst_sz - deq);
2783 			if (likely(first_time && (deq > 0))) {
2784 				last_time += rte_rdtsc_precise() - start_time;
2785 				first_time = false;
2786 			}
2787 		} while (unlikely(burst_sz != deq));
2788 
2789 		*max_time = RTE_MAX(*max_time, last_time);
2790 		*min_time = RTE_MIN(*min_time, last_time);
2791 		*total_time += last_time;
2792 
2793 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2794 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2795 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2796 		}
2797 
2798 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2799 		dequeued += deq;
2800 	}
2801 
2802 	return i;
2803 }
2804 
2805 static int
2806 latency_test_ldpc_enc(struct rte_mempool *mempool,
2807 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2808 		uint16_t dev_id, uint16_t queue_id,
2809 		const uint16_t num_to_process, uint16_t burst_sz,
2810 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2811 {
2812 	int ret = TEST_SUCCESS;
2813 	uint16_t i, j, dequeued;
2814 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2815 	uint64_t start_time = 0, last_time = 0;
2816 
2817 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2818 		uint16_t enq = 0, deq = 0;
2819 		bool first_time = true;
2820 		last_time = 0;
2821 
2822 		if (unlikely(num_to_process - dequeued < burst_sz))
2823 			burst_sz = num_to_process - dequeued;
2824 
2825 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2826 
2827 		TEST_ASSERT_SUCCESS(ret,
2828 				"rte_bbdev_enc_op_alloc_bulk() failed");
2829 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2830 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
2831 					bufs->inputs,
2832 					bufs->hard_outputs,
2833 					ref_op);
2834 
2835 		/* Set counter to validate the ordering */
2836 		for (j = 0; j < burst_sz; ++j)
2837 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2838 
2839 		start_time = rte_rdtsc_precise();
2840 
2841 		/*
2842 		 * printf("Latency Debug %d\n",
2843 		 * ops_enq[0]->ldpc_enc.cb_params.z_c); REMOVEME
2844 		 */
2845 
2846 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
2847 				&ops_enq[enq], burst_sz);
2848 		TEST_ASSERT(enq == burst_sz,
2849 				"Error enqueueing burst, expected %u, got %u",
2850 				burst_sz, enq);
2851 
2852 		/* Dequeue */
2853 		do {
2854 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2855 					&ops_deq[deq], burst_sz - deq);
2856 			if (likely(first_time && (deq > 0))) {
2857 				last_time += rte_rdtsc_precise() - start_time;
2858 				first_time = false;
2859 			}
2860 		} while (unlikely(burst_sz != deq));
2861 
2862 		*max_time = RTE_MAX(*max_time, last_time);
2863 		*min_time = RTE_MIN(*min_time, last_time);
2864 		*total_time += last_time;
2865 
2866 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2867 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2868 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2869 		}
2870 
2871 		/*
2872 		 * printf("Ready to free - deq %d num_to_process %d\n", FIXME
2873 		 *		deq, num_to_process);
2874 		 * printf("cache %d\n", ops_enq[0]->mempool->cache_size);
2875 		 */
2876 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2877 		dequeued += deq;
2878 	}
2879 
2880 	return i;
2881 }
2882 
2883 static int
2884 latency_test(struct active_device *ad,
2885 		struct test_op_params *op_params)
2886 {
2887 	int iter;
2888 	uint16_t burst_sz = op_params->burst_sz;
2889 	const uint16_t num_to_process = op_params->num_to_process;
2890 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2891 	const uint16_t queue_id = ad->queue_ids[0];
2892 	struct test_buffers *bufs = NULL;
2893 	struct rte_bbdev_info info;
2894 	uint64_t total_time, min_time, max_time;
2895 	const char *op_type_str;
2896 
2897 	total_time = max_time = 0;
2898 	min_time = UINT64_MAX;
2899 
2900 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2901 			"BURST_SIZE should be <= %u", MAX_BURST);
2902 
2903 	rte_bbdev_info_get(ad->dev_id, &info);
2904 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2905 
2906 	op_type_str = rte_bbdev_op_type_str(op_type);
2907 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2908 
2909 	printf("+ ------------------------------------------------------- +\n");
2910 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
2911 			info.dev_name, burst_sz, num_to_process, op_type_str);
2912 
2913 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2914 		iter = latency_test_dec(op_params->mp, bufs,
2915 				op_params->ref_dec_op, op_params->vector_mask,
2916 				ad->dev_id, queue_id, num_to_process,
2917 				burst_sz, &total_time, &min_time, &max_time);
2918 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
2919 		iter = latency_test_enc(op_params->mp, bufs,
2920 				op_params->ref_enc_op, ad->dev_id, queue_id,
2921 				num_to_process, burst_sz, &total_time,
2922 				&min_time, &max_time);
2923 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
2924 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
2925 				op_params->ref_enc_op, ad->dev_id, queue_id,
2926 				num_to_process, burst_sz, &total_time,
2927 				&min_time, &max_time);
2928 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
2929 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
2930 				op_params->ref_dec_op, op_params->vector_mask,
2931 				ad->dev_id, queue_id, num_to_process,
2932 				burst_sz, &total_time, &min_time, &max_time);
2933 	else
2934 		iter = latency_test_enc(op_params->mp, bufs,
2935 					op_params->ref_enc_op,
2936 					ad->dev_id, queue_id,
2937 					num_to_process, burst_sz, &total_time,
2938 					&min_time, &max_time);
2939 
2940 	if (iter <= 0)
2941 		return TEST_FAILED;
2942 
2943 	printf("Operation latency:\n"
2944 			"\tavg: %lg cycles, %lg us\n"
2945 			"\tmin: %lg cycles, %lg us\n"
2946 			"\tmax: %lg cycles, %lg us\n",
2947 			(double)total_time / (double)iter,
2948 			(double)(total_time * 1000000) / (double)iter /
2949 			(double)rte_get_tsc_hz(), (double)min_time,
2950 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2951 			(double)max_time, (double)(max_time * 1000000) /
2952 			(double)rte_get_tsc_hz());
2953 
2954 	return TEST_SUCCESS;
2955 }
2956 
2957 #ifdef RTE_BBDEV_OFFLOAD_COST
2958 static int
2959 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2960 		struct rte_bbdev_stats *stats)
2961 {
2962 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2963 	struct rte_bbdev_stats *q_stats;
2964 
2965 	if (queue_id >= dev->data->num_queues)
2966 		return -1;
2967 
2968 	q_stats = &dev->data->queues[queue_id].queue_stats;
2969 
2970 	stats->enqueued_count = q_stats->enqueued_count;
2971 	stats->dequeued_count = q_stats->dequeued_count;
2972 	stats->enqueue_err_count = q_stats->enqueue_err_count;
2973 	stats->dequeue_err_count = q_stats->dequeue_err_count;
2974 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2975 
2976 	return 0;
2977 }
2978 
2979 static int
2980 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2981 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2982 		uint16_t queue_id, const uint16_t num_to_process,
2983 		uint16_t burst_sz, struct test_time_stats *time_st)
2984 {
2985 	int i, dequeued, ret;
2986 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2987 	uint64_t enq_start_time, deq_start_time;
2988 	uint64_t enq_sw_last_time, deq_last_time;
2989 	struct rte_bbdev_stats stats;
2990 
2991 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2992 		uint16_t enq = 0, deq = 0;
2993 
2994 		if (unlikely(num_to_process - dequeued < burst_sz))
2995 			burst_sz = num_to_process - dequeued;
2996 
2997 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2998 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2999 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3000 					bufs->inputs,
3001 					bufs->hard_outputs,
3002 					bufs->soft_outputs,
3003 					ref_op);
3004 
3005 		/* Start time meas for enqueue function offload latency */
3006 		enq_start_time = rte_rdtsc_precise();
3007 		do {
3008 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3009 					&ops_enq[enq], burst_sz - enq);
3010 		} while (unlikely(burst_sz != enq));
3011 
3012 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3013 		TEST_ASSERT_SUCCESS(ret,
3014 				"Failed to get stats for queue (%u) of device (%u)",
3015 				queue_id, dev_id);
3016 
3017 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3018 				stats.acc_offload_cycles;
3019 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3020 				enq_sw_last_time);
3021 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3022 				enq_sw_last_time);
3023 		time_st->enq_sw_total_time += enq_sw_last_time;
3024 
3025 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3026 				stats.acc_offload_cycles);
3027 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3028 				stats.acc_offload_cycles);
3029 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3030 
3031 		/* give time for device to process ops */
3032 		rte_delay_us(200);
3033 
3034 		/* Start time meas for dequeue function offload latency */
3035 		deq_start_time = rte_rdtsc_precise();
3036 		/* Dequeue one operation */
3037 		do {
3038 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3039 					&ops_deq[deq], 1);
3040 		} while (unlikely(deq != 1));
3041 
3042 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3043 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3044 				deq_last_time);
3045 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3046 				deq_last_time);
3047 		time_st->deq_total_time += deq_last_time;
3048 
3049 		/* Dequeue remaining operations if needed*/
3050 		while (burst_sz != deq)
3051 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3052 					&ops_deq[deq], burst_sz - deq);
3053 
3054 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3055 		dequeued += deq;
3056 	}
3057 
3058 	return i;
3059 }
3060 
3061 static int
3062 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3063 		struct test_buffers *bufs,
3064 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3065 		uint16_t queue_id, const uint16_t num_to_process,
3066 		uint16_t burst_sz, struct test_time_stats *time_st)
3067 {
3068 	int i, dequeued, ret;
3069 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3070 	uint64_t enq_start_time, deq_start_time;
3071 	uint64_t enq_sw_last_time, deq_last_time;
3072 	struct rte_bbdev_stats stats;
3073 
3074 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3075 		uint16_t enq = 0, deq = 0;
3076 
3077 		if (unlikely(num_to_process - dequeued < burst_sz))
3078 			burst_sz = num_to_process - dequeued;
3079 
3080 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3081 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3082 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3083 					bufs->inputs,
3084 					bufs->hard_outputs,
3085 					bufs->soft_outputs,
3086 					bufs->harq_inputs,
3087 					bufs->harq_outputs,
3088 					ref_op);
3089 
3090 		/* Start time meas for enqueue function offload latency */
3091 		enq_start_time = rte_rdtsc_precise();
3092 		do {
3093 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3094 					&ops_enq[enq], burst_sz - enq);
3095 		} while (unlikely(burst_sz != enq));
3096 
3097 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3098 		TEST_ASSERT_SUCCESS(ret,
3099 				"Failed to get stats for queue (%u) of device (%u)",
3100 				queue_id, dev_id);
3101 
3102 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3103 				stats.acc_offload_cycles;
3104 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3105 				enq_sw_last_time);
3106 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3107 				enq_sw_last_time);
3108 		time_st->enq_sw_total_time += enq_sw_last_time;
3109 
3110 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3111 				stats.acc_offload_cycles);
3112 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3113 				stats.acc_offload_cycles);
3114 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3115 
3116 		/* give time for device to process ops */
3117 		rte_delay_us(200);
3118 
3119 		/* Start time meas for dequeue function offload latency */
3120 		deq_start_time = rte_rdtsc_precise();
3121 		/* Dequeue one operation */
3122 		do {
3123 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3124 					&ops_deq[deq], 1);
3125 		} while (unlikely(deq != 1));
3126 
3127 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3128 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3129 				deq_last_time);
3130 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3131 				deq_last_time);
3132 		time_st->deq_total_time += deq_last_time;
3133 
3134 		/* Dequeue remaining operations if needed*/
3135 		while (burst_sz != deq)
3136 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3137 					&ops_deq[deq], burst_sz - deq);
3138 
3139 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3140 		dequeued += deq;
3141 	}
3142 
3143 	return i;
3144 }
3145 
3146 static int
3147 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3148 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3149 		uint16_t queue_id, const uint16_t num_to_process,
3150 		uint16_t burst_sz, struct test_time_stats *time_st)
3151 {
3152 	int i, dequeued, ret;
3153 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3154 	uint64_t enq_start_time, deq_start_time;
3155 	uint64_t enq_sw_last_time, deq_last_time;
3156 	struct rte_bbdev_stats stats;
3157 
3158 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3159 		uint16_t enq = 0, deq = 0;
3160 
3161 		if (unlikely(num_to_process - dequeued < burst_sz))
3162 			burst_sz = num_to_process - dequeued;
3163 
3164 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3165 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
3166 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3167 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3168 					bufs->inputs,
3169 					bufs->hard_outputs,
3170 					ref_op);
3171 
3172 		/* Start time meas for enqueue function offload latency */
3173 		enq_start_time = rte_rdtsc_precise();
3174 		do {
3175 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3176 					&ops_enq[enq], burst_sz - enq);
3177 		} while (unlikely(burst_sz != enq));
3178 
3179 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3180 		TEST_ASSERT_SUCCESS(ret,
3181 				"Failed to get stats for queue (%u) of device (%u)",
3182 				queue_id, dev_id);
3183 
3184 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3185 				stats.acc_offload_cycles;
3186 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3187 				enq_sw_last_time);
3188 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3189 				enq_sw_last_time);
3190 		time_st->enq_sw_total_time += enq_sw_last_time;
3191 
3192 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3193 				stats.acc_offload_cycles);
3194 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3195 				stats.acc_offload_cycles);
3196 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3197 
3198 		/* give time for device to process ops */
3199 		rte_delay_us(200);
3200 
3201 		/* Start time meas for dequeue function offload latency */
3202 		deq_start_time = rte_rdtsc_precise();
3203 		/* Dequeue one operation */
3204 		do {
3205 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3206 					&ops_deq[deq], 1);
3207 		} while (unlikely(deq != 1));
3208 
3209 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3210 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3211 				deq_last_time);
3212 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3213 				deq_last_time);
3214 		time_st->deq_total_time += deq_last_time;
3215 
3216 		while (burst_sz != deq)
3217 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3218 					&ops_deq[deq], burst_sz - deq);
3219 
3220 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3221 		dequeued += deq;
3222 	}
3223 
3224 	return i;
3225 }
3226 
3227 static int
3228 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3229 		struct test_buffers *bufs,
3230 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3231 		uint16_t queue_id, const uint16_t num_to_process,
3232 		uint16_t burst_sz, struct test_time_stats *time_st)
3233 {
3234 	int i, dequeued, ret;
3235 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3236 	uint64_t enq_start_time, deq_start_time;
3237 	uint64_t enq_sw_last_time, deq_last_time;
3238 	struct rte_bbdev_stats stats;
3239 
3240 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3241 		uint16_t enq = 0, deq = 0;
3242 
3243 		if (unlikely(num_to_process - dequeued < burst_sz))
3244 			burst_sz = num_to_process - dequeued;
3245 
3246 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3247 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
3248 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3249 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3250 					bufs->inputs,
3251 					bufs->hard_outputs,
3252 					ref_op);
3253 
3254 		/* Start time meas for enqueue function offload latency */
3255 		enq_start_time = rte_rdtsc_precise();
3256 		do {
3257 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3258 					&ops_enq[enq], burst_sz - enq);
3259 		} while (unlikely(burst_sz != enq));
3260 
3261 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3262 		TEST_ASSERT_SUCCESS(ret,
3263 				"Failed to get stats for queue (%u) of device (%u)",
3264 				queue_id, dev_id);
3265 
3266 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3267 				stats.acc_offload_cycles;
3268 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3269 				enq_sw_last_time);
3270 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3271 				enq_sw_last_time);
3272 		time_st->enq_sw_total_time += enq_sw_last_time;
3273 
3274 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3275 				stats.acc_offload_cycles);
3276 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3277 				stats.acc_offload_cycles);
3278 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3279 
3280 		/* give time for device to process ops */
3281 		rte_delay_us(200);
3282 
3283 		/* Start time meas for dequeue function offload latency */
3284 		deq_start_time = rte_rdtsc_precise();
3285 		/* Dequeue one operation */
3286 		do {
3287 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3288 					&ops_deq[deq], 1);
3289 		} while (unlikely(deq != 1));
3290 
3291 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3292 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3293 				deq_last_time);
3294 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3295 				deq_last_time);
3296 		time_st->deq_total_time += deq_last_time;
3297 
3298 		while (burst_sz != deq)
3299 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3300 					&ops_deq[deq], burst_sz - deq);
3301 
3302 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3303 		dequeued += deq;
3304 	}
3305 
3306 	return i;
3307 }
3308 #endif
3309 
3310 static int
3311 offload_cost_test(struct active_device *ad,
3312 		struct test_op_params *op_params)
3313 {
3314 #ifndef RTE_BBDEV_OFFLOAD_COST
3315 	RTE_SET_USED(ad);
3316 	RTE_SET_USED(op_params);
3317 	printf("Offload latency test is disabled.\n");
3318 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3319 	return TEST_SKIPPED;
3320 #else
3321 	int iter;
3322 	uint16_t burst_sz = op_params->burst_sz;
3323 	const uint16_t num_to_process = op_params->num_to_process;
3324 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3325 	const uint16_t queue_id = ad->queue_ids[0];
3326 	struct test_buffers *bufs = NULL;
3327 	struct rte_bbdev_info info;
3328 	const char *op_type_str;
3329 	struct test_time_stats time_st;
3330 
3331 	memset(&time_st, 0, sizeof(struct test_time_stats));
3332 	time_st.enq_sw_min_time = UINT64_MAX;
3333 	time_st.enq_acc_min_time = UINT64_MAX;
3334 	time_st.deq_min_time = UINT64_MAX;
3335 
3336 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3337 			"BURST_SIZE should be <= %u", MAX_BURST);
3338 
3339 	rte_bbdev_info_get(ad->dev_id, &info);
3340 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3341 
3342 	op_type_str = rte_bbdev_op_type_str(op_type);
3343 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3344 
3345 	printf("+ ------------------------------------------------------- +\n");
3346 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3347 			info.dev_name, burst_sz, num_to_process, op_type_str);
3348 
3349 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3350 		iter = offload_latency_test_dec(op_params->mp, bufs,
3351 				op_params->ref_dec_op, ad->dev_id, queue_id,
3352 				num_to_process, burst_sz, &time_st);
3353 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3354 		iter = offload_latency_test_enc(op_params->mp, bufs,
3355 				op_params->ref_enc_op, ad->dev_id, queue_id,
3356 				num_to_process, burst_sz, &time_st);
3357 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3358 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3359 				op_params->ref_enc_op, ad->dev_id, queue_id,
3360 				num_to_process, burst_sz, &time_st);
3361 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3362 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3363 			op_params->ref_dec_op, ad->dev_id, queue_id,
3364 			num_to_process, burst_sz, &time_st);
3365 	else
3366 		iter = offload_latency_test_enc(op_params->mp, bufs,
3367 				op_params->ref_enc_op, ad->dev_id, queue_id,
3368 				num_to_process, burst_sz, &time_st);
3369 
3370 	if (iter <= 0)
3371 		return TEST_FAILED;
3372 
3373 	printf("Enqueue driver offload cost latency:\n"
3374 			"\tavg: %lg cycles, %lg us\n"
3375 			"\tmin: %lg cycles, %lg us\n"
3376 			"\tmax: %lg cycles, %lg us\n"
3377 			"Enqueue accelerator offload cost latency:\n"
3378 			"\tavg: %lg cycles, %lg us\n"
3379 			"\tmin: %lg cycles, %lg us\n"
3380 			"\tmax: %lg cycles, %lg us\n",
3381 			(double)time_st.enq_sw_total_time / (double)iter,
3382 			(double)(time_st.enq_sw_total_time * 1000000) /
3383 			(double)iter / (double)rte_get_tsc_hz(),
3384 			(double)time_st.enq_sw_min_time,
3385 			(double)(time_st.enq_sw_min_time * 1000000) /
3386 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3387 			(double)(time_st.enq_sw_max_time * 1000000) /
3388 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3389 			(double)iter,
3390 			(double)(time_st.enq_acc_total_time * 1000000) /
3391 			(double)iter / (double)rte_get_tsc_hz(),
3392 			(double)time_st.enq_acc_min_time,
3393 			(double)(time_st.enq_acc_min_time * 1000000) /
3394 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3395 			(double)(time_st.enq_acc_max_time * 1000000) /
3396 			rte_get_tsc_hz());
3397 
3398 	printf("Dequeue offload cost latency - one op:\n"
3399 			"\tavg: %lg cycles, %lg us\n"
3400 			"\tmin: %lg cycles, %lg us\n"
3401 			"\tmax: %lg cycles, %lg us\n",
3402 			(double)time_st.deq_total_time / (double)iter,
3403 			(double)(time_st.deq_total_time * 1000000) /
3404 			(double)iter / (double)rte_get_tsc_hz(),
3405 			(double)time_st.deq_min_time,
3406 			(double)(time_st.deq_min_time * 1000000) /
3407 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
3408 			(double)(time_st.deq_max_time * 1000000) /
3409 			rte_get_tsc_hz());
3410 
3411 	return TEST_SUCCESS;
3412 #endif
3413 }
3414 
3415 #ifdef RTE_BBDEV_OFFLOAD_COST
3416 static int
3417 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3418 		const uint16_t num_to_process, uint16_t burst_sz,
3419 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3420 		uint64_t *deq_max_time)
3421 {
3422 	int i, deq_total;
3423 	struct rte_bbdev_dec_op *ops[MAX_BURST];
3424 	uint64_t deq_start_time, deq_last_time;
3425 
3426 	/* Test deq offload latency from an empty queue */
3427 
3428 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3429 			++i, deq_total += burst_sz) {
3430 		deq_start_time = rte_rdtsc_precise();
3431 
3432 		if (unlikely(num_to_process - deq_total < burst_sz))
3433 			burst_sz = num_to_process - deq_total;
3434 		rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3435 
3436 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3437 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3438 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3439 		*deq_total_time += deq_last_time;
3440 	}
3441 
3442 	return i;
3443 }
3444 
3445 static int
3446 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3447 		const uint16_t num_to_process, uint16_t burst_sz,
3448 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3449 		uint64_t *deq_max_time)
3450 {
3451 	int i, deq_total;
3452 	struct rte_bbdev_enc_op *ops[MAX_BURST];
3453 	uint64_t deq_start_time, deq_last_time;
3454 
3455 	/* Test deq offload latency from an empty queue */
3456 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3457 			++i, deq_total += burst_sz) {
3458 		deq_start_time = rte_rdtsc_precise();
3459 
3460 		if (unlikely(num_to_process - deq_total < burst_sz))
3461 			burst_sz = num_to_process - deq_total;
3462 		rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3463 
3464 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3465 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3466 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3467 		*deq_total_time += deq_last_time;
3468 	}
3469 
3470 	return i;
3471 }
3472 #endif
3473 
3474 static int
3475 offload_latency_empty_q_test(struct active_device *ad,
3476 		struct test_op_params *op_params)
3477 {
3478 #ifndef RTE_BBDEV_OFFLOAD_COST
3479 	RTE_SET_USED(ad);
3480 	RTE_SET_USED(op_params);
3481 	printf("Offload latency empty dequeue test is disabled.\n");
3482 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3483 	return TEST_SKIPPED;
3484 #else
3485 	int iter;
3486 	uint64_t deq_total_time, deq_min_time, deq_max_time;
3487 	uint16_t burst_sz = op_params->burst_sz;
3488 	const uint16_t num_to_process = op_params->num_to_process;
3489 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3490 	const uint16_t queue_id = ad->queue_ids[0];
3491 	struct rte_bbdev_info info;
3492 	const char *op_type_str;
3493 
3494 	deq_total_time = deq_max_time = 0;
3495 	deq_min_time = UINT64_MAX;
3496 
3497 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3498 			"BURST_SIZE should be <= %u", MAX_BURST);
3499 
3500 	rte_bbdev_info_get(ad->dev_id, &info);
3501 
3502 	op_type_str = rte_bbdev_op_type_str(op_type);
3503 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3504 
3505 	printf("+ ------------------------------------------------------- +\n");
3506 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3507 			info.dev_name, burst_sz, num_to_process, op_type_str);
3508 
3509 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3510 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3511 				num_to_process, burst_sz, &deq_total_time,
3512 				&deq_min_time, &deq_max_time);
3513 	else
3514 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3515 				num_to_process, burst_sz, &deq_total_time,
3516 				&deq_min_time, &deq_max_time);
3517 
3518 	if (iter <= 0)
3519 		return TEST_FAILED;
3520 
3521 	printf("Empty dequeue offload:\n"
3522 			"\tavg: %lg cycles, %lg us\n"
3523 			"\tmin: %lg cycles, %lg us\n"
3524 			"\tmax: %lg cycles, %lg us\n",
3525 			(double)deq_total_time / (double)iter,
3526 			(double)(deq_total_time * 1000000) / (double)iter /
3527 			(double)rte_get_tsc_hz(), (double)deq_min_time,
3528 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3529 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
3530 			rte_get_tsc_hz());
3531 
3532 	return TEST_SUCCESS;
3533 #endif
3534 }
3535 
3536 static int
3537 throughput_tc(void)
3538 {
3539 	return run_test_case(throughput_test);
3540 }
3541 
3542 static int
3543 offload_cost_tc(void)
3544 {
3545 	return run_test_case(offload_cost_test);
3546 }
3547 
3548 static int
3549 offload_latency_empty_q_tc(void)
3550 {
3551 	return run_test_case(offload_latency_empty_q_test);
3552 }
3553 
3554 static int
3555 latency_tc(void)
3556 {
3557 	return run_test_case(latency_test);
3558 }
3559 
3560 static int
3561 interrupt_tc(void)
3562 {
3563 	return run_test_case(throughput_test);
3564 }
3565 
3566 static struct unit_test_suite bbdev_throughput_testsuite = {
3567 	.suite_name = "BBdev Throughput Tests",
3568 	.setup = testsuite_setup,
3569 	.teardown = testsuite_teardown,
3570 	.unit_test_cases = {
3571 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
3572 		TEST_CASES_END() /**< NULL terminate unit test array */
3573 	}
3574 };
3575 
3576 static struct unit_test_suite bbdev_validation_testsuite = {
3577 	.suite_name = "BBdev Validation Tests",
3578 	.setup = testsuite_setup,
3579 	.teardown = testsuite_teardown,
3580 	.unit_test_cases = {
3581 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3582 		TEST_CASES_END() /**< NULL terminate unit test array */
3583 	}
3584 };
3585 
3586 static struct unit_test_suite bbdev_latency_testsuite = {
3587 	.suite_name = "BBdev Latency Tests",
3588 	.setup = testsuite_setup,
3589 	.teardown = testsuite_teardown,
3590 	.unit_test_cases = {
3591 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3592 		TEST_CASES_END() /**< NULL terminate unit test array */
3593 	}
3594 };
3595 
3596 static struct unit_test_suite bbdev_offload_cost_testsuite = {
3597 	.suite_name = "BBdev Offload Cost Tests",
3598 	.setup = testsuite_setup,
3599 	.teardown = testsuite_teardown,
3600 	.unit_test_cases = {
3601 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
3602 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
3603 		TEST_CASES_END() /**< NULL terminate unit test array */
3604 	}
3605 };
3606 
3607 static struct unit_test_suite bbdev_interrupt_testsuite = {
3608 	.suite_name = "BBdev Interrupt Tests",
3609 	.setup = interrupt_testsuite_setup,
3610 	.teardown = testsuite_teardown,
3611 	.unit_test_cases = {
3612 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
3613 		TEST_CASES_END() /**< NULL terminate unit test array */
3614 	}
3615 };
3616 
3617 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
3618 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
3619 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
3620 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
3621 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
3622