xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 2a7bb4fdf61e9edfb7adbaecb50e728b82da9e23)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 
20 #include "main.h"
21 #include "test_bbdev_vector.h"
22 
23 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
24 
25 #define MAX_QUEUES RTE_MAX_LCORE
26 #define TEST_REPETITIONS 1000
27 
28 #define OPS_CACHE_SIZE 256U
29 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
30 
31 #define SYNC_WAIT 0
32 #define SYNC_START 1
33 
34 #define INVALID_QUEUE_ID -1
35 
36 static struct test_bbdev_vector test_vector;
37 
38 /* Switch between PMD and Interrupt for throughput TC */
39 static bool intr_enabled;
40 
41 /* Represents tested active devices */
42 static struct active_device {
43 	const char *driver_name;
44 	uint8_t dev_id;
45 	uint16_t supported_ops;
46 	uint16_t queue_ids[MAX_QUEUES];
47 	uint16_t nb_queues;
48 	struct rte_mempool *ops_mempool;
49 	struct rte_mempool *in_mbuf_pool;
50 	struct rte_mempool *hard_out_mbuf_pool;
51 	struct rte_mempool *soft_out_mbuf_pool;
52 } active_devs[RTE_BBDEV_MAX_DEVS];
53 
54 static uint8_t nb_active_devs;
55 
56 /* Data buffers used by BBDEV ops */
57 struct test_buffers {
58 	struct rte_bbdev_op_data *inputs;
59 	struct rte_bbdev_op_data *hard_outputs;
60 	struct rte_bbdev_op_data *soft_outputs;
61 };
62 
63 /* Operation parameters specific for given test case */
64 struct test_op_params {
65 	struct rte_mempool *mp;
66 	struct rte_bbdev_dec_op *ref_dec_op;
67 	struct rte_bbdev_enc_op *ref_enc_op;
68 	uint16_t burst_sz;
69 	uint16_t num_to_process;
70 	uint16_t num_lcores;
71 	int vector_mask;
72 	rte_atomic16_t sync;
73 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
74 };
75 
76 /* Contains per lcore params */
77 struct thread_params {
78 	uint8_t dev_id;
79 	uint16_t queue_id;
80 	uint32_t lcore_id;
81 	uint64_t start_time;
82 	double ops_per_sec;
83 	double mbps;
84 	uint8_t iter_count;
85 	rte_atomic16_t nb_dequeued;
86 	rte_atomic16_t processing_status;
87 	rte_atomic16_t burst_sz;
88 	struct test_op_params *op_params;
89 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
90 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
91 };
92 
93 #ifdef RTE_BBDEV_OFFLOAD_COST
94 /* Stores time statistics */
95 struct test_time_stats {
96 	/* Stores software enqueue total working time */
97 	uint64_t enq_sw_total_time;
98 	/* Stores minimum value of software enqueue working time */
99 	uint64_t enq_sw_min_time;
100 	/* Stores maximum value of software enqueue working time */
101 	uint64_t enq_sw_max_time;
102 	/* Stores turbo enqueue total working time */
103 	uint64_t enq_acc_total_time;
104 	/* Stores minimum value of accelerator enqueue working time */
105 	uint64_t enq_acc_min_time;
106 	/* Stores maximum value of accelerator enqueue working time */
107 	uint64_t enq_acc_max_time;
108 	/* Stores dequeue total working time */
109 	uint64_t deq_total_time;
110 	/* Stores minimum value of dequeue working time */
111 	uint64_t deq_min_time;
112 	/* Stores maximum value of dequeue working time */
113 	uint64_t deq_max_time;
114 };
115 #endif
116 
117 typedef int (test_case_function)(struct active_device *ad,
118 		struct test_op_params *op_params);
119 
120 static inline void
121 mbuf_reset(struct rte_mbuf *m)
122 {
123 	m->pkt_len = 0;
124 
125 	do {
126 		m->data_len = 0;
127 		m = m->next;
128 	} while (m != NULL);
129 }
130 
131 static inline void
132 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
133 {
134 	ad->supported_ops |= (1 << op_type);
135 }
136 
137 static inline bool
138 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
139 {
140 	return ad->supported_ops & (1 << op_type);
141 }
142 
143 static inline bool
144 flags_match(uint32_t flags_req, uint32_t flags_present)
145 {
146 	return (flags_req & flags_present) == flags_req;
147 }
148 
149 static void
150 clear_soft_out_cap(uint32_t *op_flags)
151 {
152 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
153 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
154 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
155 }
156 
157 static int
158 check_dev_cap(const struct rte_bbdev_info *dev_info)
159 {
160 	unsigned int i;
161 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs;
162 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
163 
164 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
165 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
166 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
167 
168 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
169 		if (op_cap->type != test_vector.op_type)
170 			continue;
171 
172 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
173 			const struct rte_bbdev_op_cap_turbo_dec *cap =
174 					&op_cap->cap.turbo_dec;
175 			/* Ignore lack of soft output capability, just skip
176 			 * checking if soft output is valid.
177 			 */
178 			if ((test_vector.turbo_dec.op_flags &
179 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
180 					!(cap->capability_flags &
181 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
182 				printf(
183 					"WARNING: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
184 					dev_info->dev_name);
185 				clear_soft_out_cap(
186 					&test_vector.turbo_dec.op_flags);
187 			}
188 
189 			if (!flags_match(test_vector.turbo_dec.op_flags,
190 					cap->capability_flags))
191 				return TEST_FAILED;
192 			if (nb_inputs > cap->num_buffers_src) {
193 				printf("Too many inputs defined: %u, max: %u\n",
194 					nb_inputs, cap->num_buffers_src);
195 				return TEST_FAILED;
196 			}
197 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
198 					(test_vector.turbo_dec.op_flags &
199 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
200 				printf(
201 					"Too many soft outputs defined: %u, max: %u\n",
202 						nb_soft_outputs,
203 						cap->num_buffers_soft_out);
204 				return TEST_FAILED;
205 			}
206 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
207 				printf(
208 					"Too many hard outputs defined: %u, max: %u\n",
209 						nb_hard_outputs,
210 						cap->num_buffers_hard_out);
211 				return TEST_FAILED;
212 			}
213 			if (intr_enabled && !(cap->capability_flags &
214 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
215 				printf(
216 					"Dequeue interrupts are not supported!\n");
217 				return TEST_FAILED;
218 			}
219 
220 			return TEST_SUCCESS;
221 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
222 			const struct rte_bbdev_op_cap_turbo_enc *cap =
223 					&op_cap->cap.turbo_enc;
224 
225 			if (!flags_match(test_vector.turbo_enc.op_flags,
226 					cap->capability_flags))
227 				return TEST_FAILED;
228 			if (nb_inputs > cap->num_buffers_src) {
229 				printf("Too many inputs defined: %u, max: %u\n",
230 					nb_inputs, cap->num_buffers_src);
231 				return TEST_FAILED;
232 			}
233 			if (nb_hard_outputs > cap->num_buffers_dst) {
234 				printf(
235 					"Too many hard outputs defined: %u, max: %u\n",
236 					nb_hard_outputs, cap->num_buffers_src);
237 				return TEST_FAILED;
238 			}
239 			if (intr_enabled && !(cap->capability_flags &
240 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
241 				printf(
242 					"Dequeue interrupts are not supported!\n");
243 				return TEST_FAILED;
244 			}
245 
246 			return TEST_SUCCESS;
247 		}
248 	}
249 
250 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
251 		return TEST_SUCCESS; /* Special case for NULL device */
252 
253 	return TEST_FAILED;
254 }
255 
256 /* calculates optimal mempool size not smaller than the val */
257 static unsigned int
258 optimal_mempool_size(unsigned int val)
259 {
260 	return rte_align32pow2(val + 1) - 1;
261 }
262 
263 /* allocates mbuf mempool for inputs and outputs */
264 static struct rte_mempool *
265 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
266 		int socket_id, unsigned int mbuf_pool_size,
267 		const char *op_type_str)
268 {
269 	unsigned int i;
270 	uint32_t max_seg_sz = 0;
271 	char pool_name[RTE_MEMPOOL_NAMESIZE];
272 
273 	/* find max input segment size */
274 	for (i = 0; i < entries->nb_segments; ++i)
275 		if (entries->segments[i].length > max_seg_sz)
276 			max_seg_sz = entries->segments[i].length;
277 
278 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
279 			dev_id);
280 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
281 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
282 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
283 }
284 
285 static int
286 create_mempools(struct active_device *ad, int socket_id,
287 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
288 {
289 	struct rte_mempool *mp;
290 	unsigned int ops_pool_size, mbuf_pool_size = 0;
291 	char pool_name[RTE_MEMPOOL_NAMESIZE];
292 	const char *op_type_str;
293 	enum rte_bbdev_op_type op_type = org_op_type;
294 
295 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
296 	struct op_data_entries *hard_out =
297 			&test_vector.entries[DATA_HARD_OUTPUT];
298 	struct op_data_entries *soft_out =
299 			&test_vector.entries[DATA_SOFT_OUTPUT];
300 
301 	/* allocate ops mempool */
302 	ops_pool_size = optimal_mempool_size(RTE_MAX(
303 			/* Ops used plus 1 reference op */
304 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
305 			/* Minimal cache size plus 1 reference op */
306 			(unsigned int)(1.5 * rte_lcore_count() *
307 					OPS_CACHE_SIZE + 1)),
308 			OPS_POOL_SIZE_MIN));
309 
310 	if (org_op_type == RTE_BBDEV_OP_NONE)
311 		op_type = RTE_BBDEV_OP_TURBO_ENC;
312 
313 	op_type_str = rte_bbdev_op_type_str(op_type);
314 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
315 
316 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
317 			ad->dev_id);
318 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
319 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
320 	TEST_ASSERT_NOT_NULL(mp,
321 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
322 			ops_pool_size,
323 			ad->dev_id,
324 			socket_id);
325 	ad->ops_mempool = mp;
326 
327 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
328 	if (org_op_type == RTE_BBDEV_OP_NONE)
329 		return TEST_SUCCESS;
330 
331 	/* Inputs */
332 	mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
333 	mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
334 	TEST_ASSERT_NOT_NULL(mp,
335 			"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
336 			mbuf_pool_size,
337 			ad->dev_id,
338 			socket_id);
339 	ad->in_mbuf_pool = mp;
340 
341 	/* Hard outputs */
342 	mbuf_pool_size = optimal_mempool_size(ops_pool_size *
343 			hard_out->nb_segments);
344 	mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
345 			"hard_out");
346 	TEST_ASSERT_NOT_NULL(mp,
347 			"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
348 			mbuf_pool_size,
349 			ad->dev_id,
350 			socket_id);
351 	ad->hard_out_mbuf_pool = mp;
352 
353 	if (soft_out->nb_segments == 0)
354 		return TEST_SUCCESS;
355 
356 	/* Soft outputs */
357 	mbuf_pool_size = optimal_mempool_size(ops_pool_size *
358 			soft_out->nb_segments);
359 	mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, mbuf_pool_size,
360 			"soft_out");
361 	TEST_ASSERT_NOT_NULL(mp,
362 			"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
363 			mbuf_pool_size,
364 			ad->dev_id,
365 			socket_id);
366 	ad->soft_out_mbuf_pool = mp;
367 
368 	return 0;
369 }
370 
371 static int
372 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
373 		struct test_bbdev_vector *vector)
374 {
375 	int ret;
376 	unsigned int queue_id;
377 	struct rte_bbdev_queue_conf qconf;
378 	struct active_device *ad = &active_devs[nb_active_devs];
379 	unsigned int nb_queues;
380 	enum rte_bbdev_op_type op_type = vector->op_type;
381 
382 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
383 	/* setup device */
384 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
385 	if (ret < 0) {
386 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
387 				dev_id, nb_queues, info->socket_id, ret);
388 		return TEST_FAILED;
389 	}
390 
391 	/* configure interrupts if needed */
392 	if (intr_enabled) {
393 		ret = rte_bbdev_intr_enable(dev_id);
394 		if (ret < 0) {
395 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
396 					ret);
397 			return TEST_FAILED;
398 		}
399 	}
400 
401 	/* setup device queues */
402 	qconf.socket = info->socket_id;
403 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
404 	qconf.priority = 0;
405 	qconf.deferred_start = 0;
406 	qconf.op_type = op_type;
407 
408 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
409 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
410 		if (ret != 0) {
411 			printf(
412 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
413 					queue_id, qconf.priority, dev_id);
414 			qconf.priority++;
415 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
416 					&qconf);
417 		}
418 		if (ret != 0) {
419 			printf("All queues on dev %u allocated: %u\n",
420 					dev_id, queue_id);
421 			break;
422 		}
423 		ad->queue_ids[queue_id] = queue_id;
424 	}
425 	TEST_ASSERT(queue_id != 0,
426 			"ERROR Failed to configure any queues on dev %u",
427 			dev_id);
428 	ad->nb_queues = queue_id;
429 
430 	set_avail_op(ad, op_type);
431 
432 	return TEST_SUCCESS;
433 }
434 
435 static int
436 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
437 		struct test_bbdev_vector *vector)
438 {
439 	int ret;
440 
441 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
442 	active_devs[nb_active_devs].dev_id = dev_id;
443 
444 	ret = add_bbdev_dev(dev_id, info, vector);
445 	if (ret == TEST_SUCCESS)
446 		++nb_active_devs;
447 	return ret;
448 }
449 
450 static uint8_t
451 populate_active_devices(void)
452 {
453 	int ret;
454 	uint8_t dev_id;
455 	uint8_t nb_devs_added = 0;
456 	struct rte_bbdev_info info;
457 
458 	RTE_BBDEV_FOREACH(dev_id) {
459 		rte_bbdev_info_get(dev_id, &info);
460 
461 		if (check_dev_cap(&info)) {
462 			printf(
463 				"Device %d (%s) does not support specified capabilities\n",
464 					dev_id, info.dev_name);
465 			continue;
466 		}
467 
468 		ret = add_active_device(dev_id, &info, &test_vector);
469 		if (ret != 0) {
470 			printf("Adding active bbdev %s skipped\n",
471 					info.dev_name);
472 			continue;
473 		}
474 		nb_devs_added++;
475 	}
476 
477 	return nb_devs_added;
478 }
479 
480 static int
481 read_test_vector(void)
482 {
483 	int ret;
484 
485 	memset(&test_vector, 0, sizeof(test_vector));
486 	printf("Test vector file = %s\n", get_vector_filename());
487 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
488 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
489 			get_vector_filename());
490 
491 	return TEST_SUCCESS;
492 }
493 
494 static int
495 testsuite_setup(void)
496 {
497 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
498 
499 	if (populate_active_devices() == 0) {
500 		printf("No suitable devices found!\n");
501 		return TEST_SKIPPED;
502 	}
503 
504 	return TEST_SUCCESS;
505 }
506 
507 static int
508 interrupt_testsuite_setup(void)
509 {
510 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
511 
512 	/* Enable interrupts */
513 	intr_enabled = true;
514 
515 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
516 	if (populate_active_devices() == 0 ||
517 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
518 		intr_enabled = false;
519 		printf("No suitable devices found!\n");
520 		return TEST_SKIPPED;
521 	}
522 
523 	return TEST_SUCCESS;
524 }
525 
526 static void
527 testsuite_teardown(void)
528 {
529 	uint8_t dev_id;
530 
531 	/* Unconfigure devices */
532 	RTE_BBDEV_FOREACH(dev_id)
533 		rte_bbdev_close(dev_id);
534 
535 	/* Clear active devices structs. */
536 	memset(active_devs, 0, sizeof(active_devs));
537 	nb_active_devs = 0;
538 }
539 
540 static int
541 ut_setup(void)
542 {
543 	uint8_t i, dev_id;
544 
545 	for (i = 0; i < nb_active_devs; i++) {
546 		dev_id = active_devs[i].dev_id;
547 		/* reset bbdev stats */
548 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
549 				"Failed to reset stats of bbdev %u", dev_id);
550 		/* start the device */
551 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
552 				"Failed to start bbdev %u", dev_id);
553 	}
554 
555 	return TEST_SUCCESS;
556 }
557 
558 static void
559 ut_teardown(void)
560 {
561 	uint8_t i, dev_id;
562 	struct rte_bbdev_stats stats;
563 
564 	for (i = 0; i < nb_active_devs; i++) {
565 		dev_id = active_devs[i].dev_id;
566 		/* read stats and print */
567 		rte_bbdev_stats_get(dev_id, &stats);
568 		/* Stop the device */
569 		rte_bbdev_stop(dev_id);
570 	}
571 }
572 
573 static int
574 init_op_data_objs(struct rte_bbdev_op_data *bufs,
575 		struct op_data_entries *ref_entries,
576 		struct rte_mempool *mbuf_pool, const uint16_t n,
577 		enum op_data_type op_type, uint16_t min_alignment)
578 {
579 	int ret;
580 	unsigned int i, j;
581 
582 	for (i = 0; i < n; ++i) {
583 		char *data;
584 		struct op_data_buf *seg = &ref_entries->segments[0];
585 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
586 		TEST_ASSERT_NOT_NULL(m_head,
587 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
588 				op_type, n * ref_entries->nb_segments,
589 				mbuf_pool->size);
590 
591 		TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
592 				(uint32_t)UINT16_MAX),
593 				"Given data is bigger than allowed mbuf segment size");
594 
595 		bufs[i].data = m_head;
596 		bufs[i].offset = 0;
597 		bufs[i].length = 0;
598 
599 		if (op_type == DATA_INPUT) {
600 			data = rte_pktmbuf_append(m_head, seg->length);
601 			TEST_ASSERT_NOT_NULL(data,
602 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
603 					seg->length, op_type);
604 
605 			TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
606 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
607 					data, min_alignment);
608 			rte_memcpy(data, seg->addr, seg->length);
609 			bufs[i].length += seg->length;
610 
611 			for (j = 1; j < ref_entries->nb_segments; ++j) {
612 				struct rte_mbuf *m_tail =
613 						rte_pktmbuf_alloc(mbuf_pool);
614 				TEST_ASSERT_NOT_NULL(m_tail,
615 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
616 						op_type,
617 						n * ref_entries->nb_segments,
618 						mbuf_pool->size);
619 				seg += 1;
620 
621 				data = rte_pktmbuf_append(m_tail, seg->length);
622 				TEST_ASSERT_NOT_NULL(data,
623 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
624 						seg->length, op_type);
625 
626 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
627 						min_alignment),
628 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
629 						data, min_alignment);
630 				rte_memcpy(data, seg->addr, seg->length);
631 				bufs[i].length += seg->length;
632 
633 				ret = rte_pktmbuf_chain(m_head, m_tail);
634 				TEST_ASSERT_SUCCESS(ret,
635 						"Couldn't chain mbufs from %d data type mbuf pool",
636 						op_type);
637 			}
638 
639 		} else {
640 
641 			/* allocate chained-mbuf for output buffer */
642 			for (j = 1; j < ref_entries->nb_segments; ++j) {
643 				struct rte_mbuf *m_tail =
644 						rte_pktmbuf_alloc(mbuf_pool);
645 				TEST_ASSERT_NOT_NULL(m_tail,
646 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
647 						op_type,
648 						n * ref_entries->nb_segments,
649 						mbuf_pool->size);
650 
651 				ret = rte_pktmbuf_chain(m_head, m_tail);
652 				TEST_ASSERT_SUCCESS(ret,
653 						"Couldn't chain mbufs from %d data type mbuf pool",
654 						op_type);
655 			}
656 		}
657 	}
658 
659 	return 0;
660 }
661 
662 static int
663 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
664 		const int socket)
665 {
666 	int i;
667 
668 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
669 	if (*buffers == NULL) {
670 		printf("WARNING: Failed to allocate op_data on socket %d\n",
671 				socket);
672 		/* try to allocate memory on other detected sockets */
673 		for (i = 0; i < socket; i++) {
674 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
675 			if (*buffers != NULL)
676 				break;
677 		}
678 	}
679 
680 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
681 }
682 
683 static void
684 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
685 		uint16_t n, int8_t max_llr_modulus)
686 {
687 	uint16_t i, byte_idx;
688 
689 	for (i = 0; i < n; ++i) {
690 		struct rte_mbuf *m = input_ops[i].data;
691 		while (m != NULL) {
692 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
693 					input_ops[i].offset);
694 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
695 					++byte_idx)
696 				llr[byte_idx] = round((double)max_llr_modulus *
697 						llr[byte_idx] / INT8_MAX);
698 
699 			m = m->next;
700 		}
701 	}
702 }
703 
704 static int
705 fill_queue_buffers(struct test_op_params *op_params,
706 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
707 		struct rte_mempool *soft_out_mp, uint16_t queue_id,
708 		const struct rte_bbdev_op_cap *capabilities,
709 		uint16_t min_alignment, const int socket_id)
710 {
711 	int ret;
712 	enum op_data_type type;
713 	const uint16_t n = op_params->num_to_process;
714 
715 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
716 		in_mp,
717 		soft_out_mp,
718 		hard_out_mp,
719 	};
720 
721 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
722 		&op_params->q_bufs[socket_id][queue_id].inputs,
723 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
724 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
725 	};
726 
727 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
728 		struct op_data_entries *ref_entries =
729 				&test_vector.entries[type];
730 		if (ref_entries->nb_segments == 0)
731 			continue;
732 
733 		ret = allocate_buffers_on_socket(queue_ops[type],
734 				n * sizeof(struct rte_bbdev_op_data),
735 				socket_id);
736 		TEST_ASSERT_SUCCESS(ret,
737 				"Couldn't allocate memory for rte_bbdev_op_data structs");
738 
739 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
740 				mbuf_pools[type], n, type, min_alignment);
741 		TEST_ASSERT_SUCCESS(ret,
742 				"Couldn't init rte_bbdev_op_data structs");
743 	}
744 
745 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
746 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
747 			capabilities->cap.turbo_dec.max_llr_modulus);
748 
749 	return 0;
750 }
751 
752 static void
753 free_buffers(struct active_device *ad, struct test_op_params *op_params)
754 {
755 	unsigned int i, j;
756 
757 	rte_mempool_free(ad->ops_mempool);
758 	rte_mempool_free(ad->in_mbuf_pool);
759 	rte_mempool_free(ad->hard_out_mbuf_pool);
760 	rte_mempool_free(ad->soft_out_mbuf_pool);
761 
762 	for (i = 0; i < rte_lcore_count(); ++i) {
763 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
764 			rte_free(op_params->q_bufs[j][i].inputs);
765 			rte_free(op_params->q_bufs[j][i].hard_outputs);
766 			rte_free(op_params->q_bufs[j][i].soft_outputs);
767 		}
768 	}
769 }
770 
771 static void
772 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
773 		unsigned int start_idx,
774 		struct rte_bbdev_op_data *inputs,
775 		struct rte_bbdev_op_data *hard_outputs,
776 		struct rte_bbdev_op_data *soft_outputs,
777 		struct rte_bbdev_dec_op *ref_op)
778 {
779 	unsigned int i;
780 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
781 
782 	for (i = 0; i < n; ++i) {
783 		if (turbo_dec->code_block_mode == 0) {
784 			ops[i]->turbo_dec.tb_params.ea =
785 					turbo_dec->tb_params.ea;
786 			ops[i]->turbo_dec.tb_params.eb =
787 					turbo_dec->tb_params.eb;
788 			ops[i]->turbo_dec.tb_params.k_pos =
789 					turbo_dec->tb_params.k_pos;
790 			ops[i]->turbo_dec.tb_params.k_neg =
791 					turbo_dec->tb_params.k_neg;
792 			ops[i]->turbo_dec.tb_params.c =
793 					turbo_dec->tb_params.c;
794 			ops[i]->turbo_dec.tb_params.c_neg =
795 					turbo_dec->tb_params.c_neg;
796 			ops[i]->turbo_dec.tb_params.cab =
797 					turbo_dec->tb_params.cab;
798 			ops[i]->turbo_dec.tb_params.r =
799 					turbo_dec->tb_params.r;
800 		} else {
801 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
802 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
803 		}
804 
805 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
806 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
807 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
808 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
809 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
810 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
811 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
812 
813 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
814 		ops[i]->turbo_dec.input = inputs[start_idx + i];
815 		if (soft_outputs != NULL)
816 			ops[i]->turbo_dec.soft_output =
817 				soft_outputs[start_idx + i];
818 	}
819 }
820 
821 static void
822 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
823 		unsigned int start_idx,
824 		struct rte_bbdev_op_data *inputs,
825 		struct rte_bbdev_op_data *outputs,
826 		struct rte_bbdev_enc_op *ref_op)
827 {
828 	unsigned int i;
829 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
830 	for (i = 0; i < n; ++i) {
831 		if (turbo_enc->code_block_mode == 0) {
832 			ops[i]->turbo_enc.tb_params.ea =
833 					turbo_enc->tb_params.ea;
834 			ops[i]->turbo_enc.tb_params.eb =
835 					turbo_enc->tb_params.eb;
836 			ops[i]->turbo_enc.tb_params.k_pos =
837 					turbo_enc->tb_params.k_pos;
838 			ops[i]->turbo_enc.tb_params.k_neg =
839 					turbo_enc->tb_params.k_neg;
840 			ops[i]->turbo_enc.tb_params.c =
841 					turbo_enc->tb_params.c;
842 			ops[i]->turbo_enc.tb_params.c_neg =
843 					turbo_enc->tb_params.c_neg;
844 			ops[i]->turbo_enc.tb_params.cab =
845 					turbo_enc->tb_params.cab;
846 			ops[i]->turbo_enc.tb_params.ncb_pos =
847 					turbo_enc->tb_params.ncb_pos;
848 			ops[i]->turbo_enc.tb_params.ncb_neg =
849 					turbo_enc->tb_params.ncb_neg;
850 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
851 		} else {
852 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
853 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
854 			ops[i]->turbo_enc.cb_params.ncb =
855 					turbo_enc->cb_params.ncb;
856 		}
857 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
858 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
859 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
860 
861 		ops[i]->turbo_enc.output = outputs[start_idx + i];
862 		ops[i]->turbo_enc.input = inputs[start_idx + i];
863 	}
864 }
865 
866 static int
867 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
868 		unsigned int order_idx, const int expected_status)
869 {
870 	TEST_ASSERT(op->status == expected_status,
871 			"op_status (%d) != expected_status (%d)",
872 			op->status, expected_status);
873 
874 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
875 			"Ordering error, expected %p, got %p",
876 			(void *)(uintptr_t)order_idx, op->opaque_data);
877 
878 	return TEST_SUCCESS;
879 }
880 
881 static int
882 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
883 		unsigned int order_idx, const int expected_status)
884 {
885 	TEST_ASSERT(op->status == expected_status,
886 			"op_status (%d) != expected_status (%d)",
887 			op->status, expected_status);
888 
889 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
890 			"Ordering error, expected %p, got %p",
891 			(void *)(uintptr_t)order_idx, op->opaque_data);
892 
893 	return TEST_SUCCESS;
894 }
895 
896 static inline int
897 validate_op_chain(struct rte_bbdev_op_data *op,
898 		struct op_data_entries *orig_op)
899 {
900 	uint8_t i;
901 	struct rte_mbuf *m = op->data;
902 	uint8_t nb_dst_segments = orig_op->nb_segments;
903 	uint32_t total_data_size = 0;
904 
905 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
906 			"Number of segments differ in original (%u) and filled (%u) op",
907 			nb_dst_segments, m->nb_segs);
908 
909 	/* Validate each mbuf segment length */
910 	for (i = 0; i < nb_dst_segments; ++i) {
911 		/* Apply offset to the first mbuf segment */
912 		uint16_t offset = (i == 0) ? op->offset : 0;
913 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
914 		total_data_size += orig_op->segments[i].length;
915 
916 		TEST_ASSERT(orig_op->segments[i].length == data_len,
917 				"Length of segment differ in original (%u) and filled (%u) op",
918 				orig_op->segments[i].length, data_len);
919 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
920 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
921 				data_len,
922 				"Output buffers (CB=%u) are not equal", i);
923 		m = m->next;
924 	}
925 
926 	/* Validate total mbuf pkt length */
927 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
928 	TEST_ASSERT(total_data_size == pkt_len,
929 			"Length of data differ in original (%u) and filled (%u) op",
930 			total_data_size, pkt_len);
931 
932 	return TEST_SUCCESS;
933 }
934 
935 static int
936 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
937 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
938 {
939 	unsigned int i;
940 	int ret;
941 	struct op_data_entries *hard_data_orig =
942 			&test_vector.entries[DATA_HARD_OUTPUT];
943 	struct op_data_entries *soft_data_orig =
944 			&test_vector.entries[DATA_SOFT_OUTPUT];
945 	struct rte_bbdev_op_turbo_dec *ops_td;
946 	struct rte_bbdev_op_data *hard_output;
947 	struct rte_bbdev_op_data *soft_output;
948 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
949 
950 	for (i = 0; i < n; ++i) {
951 		ops_td = &ops[i]->turbo_dec;
952 		hard_output = &ops_td->hard_output;
953 		soft_output = &ops_td->soft_output;
954 
955 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
956 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
957 					"Returned iter_count (%d) > expected iter_count (%d)",
958 					ops_td->iter_count, ref_td->iter_count);
959 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
960 		TEST_ASSERT_SUCCESS(ret,
961 				"Checking status and ordering for decoder failed");
962 
963 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
964 				hard_data_orig),
965 				"Hard output buffers (CB=%u) are not equal",
966 				i);
967 
968 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
969 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
970 					soft_data_orig),
971 					"Soft output buffers (CB=%u) are not equal",
972 					i);
973 	}
974 
975 	return TEST_SUCCESS;
976 }
977 
978 static int
979 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
980 		struct rte_bbdev_enc_op *ref_op)
981 {
982 	unsigned int i;
983 	int ret;
984 	struct op_data_entries *hard_data_orig =
985 			&test_vector.entries[DATA_HARD_OUTPUT];
986 
987 	for (i = 0; i < n; ++i) {
988 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
989 		TEST_ASSERT_SUCCESS(ret,
990 				"Checking status and ordering for encoder failed");
991 		TEST_ASSERT_SUCCESS(validate_op_chain(
992 				&ops[i]->turbo_enc.output,
993 				hard_data_orig),
994 				"Output buffers (CB=%u) are not equal",
995 				i);
996 	}
997 
998 	return TEST_SUCCESS;
999 }
1000 
1001 static void
1002 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1003 {
1004 	unsigned int i;
1005 	struct op_data_entries *entry;
1006 
1007 	op->turbo_dec = test_vector.turbo_dec;
1008 	entry = &test_vector.entries[DATA_INPUT];
1009 	for (i = 0; i < entry->nb_segments; ++i)
1010 		op->turbo_dec.input.length +=
1011 				entry->segments[i].length;
1012 }
1013 
1014 static void
1015 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1016 {
1017 	unsigned int i;
1018 	struct op_data_entries *entry;
1019 
1020 	op->turbo_enc = test_vector.turbo_enc;
1021 	entry = &test_vector.entries[DATA_INPUT];
1022 	for (i = 0; i < entry->nb_segments; ++i)
1023 		op->turbo_enc.input.length +=
1024 				entry->segments[i].length;
1025 }
1026 
1027 static uint32_t
1028 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1029 {
1030 	uint8_t i;
1031 	uint32_t c, r, tb_size = 0;
1032 
1033 	if (op->turbo_dec.code_block_mode) {
1034 		tb_size = op->turbo_dec.tb_params.k_neg;
1035 	} else {
1036 		c = op->turbo_dec.tb_params.c;
1037 		r = op->turbo_dec.tb_params.r;
1038 		for (i = 0; i < c-r; i++)
1039 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1040 				op->turbo_dec.tb_params.k_neg :
1041 				op->turbo_dec.tb_params.k_pos;
1042 	}
1043 	return tb_size;
1044 }
1045 
1046 static uint32_t
1047 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1048 {
1049 	uint8_t i;
1050 	uint32_t c, r, tb_size = 0;
1051 
1052 	if (op->turbo_enc.code_block_mode) {
1053 		tb_size = op->turbo_enc.tb_params.k_neg;
1054 	} else {
1055 		c = op->turbo_enc.tb_params.c;
1056 		r = op->turbo_enc.tb_params.r;
1057 		for (i = 0; i < c-r; i++)
1058 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1059 				op->turbo_enc.tb_params.k_neg :
1060 				op->turbo_enc.tb_params.k_pos;
1061 	}
1062 	return tb_size;
1063 }
1064 
1065 static int
1066 init_test_op_params(struct test_op_params *op_params,
1067 		enum rte_bbdev_op_type op_type, const int expected_status,
1068 		const int vector_mask, struct rte_mempool *ops_mp,
1069 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1070 {
1071 	int ret = 0;
1072 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1073 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1074 				&op_params->ref_dec_op, 1);
1075 	else
1076 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1077 				&op_params->ref_enc_op, 1);
1078 
1079 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1080 
1081 	op_params->mp = ops_mp;
1082 	op_params->burst_sz = burst_sz;
1083 	op_params->num_to_process = num_to_process;
1084 	op_params->num_lcores = num_lcores;
1085 	op_params->vector_mask = vector_mask;
1086 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1087 		op_params->ref_dec_op->status = expected_status;
1088 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
1089 		op_params->ref_enc_op->status = expected_status;
1090 
1091 	return 0;
1092 }
1093 
1094 static int
1095 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1096 		struct test_op_params *op_params)
1097 {
1098 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1099 	unsigned int i;
1100 	struct active_device *ad;
1101 	unsigned int burst_sz = get_burst_sz();
1102 	enum rte_bbdev_op_type op_type = test_vector.op_type;
1103 	const struct rte_bbdev_op_cap *capabilities = NULL;
1104 
1105 	ad = &active_devs[dev_id];
1106 
1107 	/* Check if device supports op_type */
1108 	if (!is_avail_op(ad, test_vector.op_type))
1109 		return TEST_SUCCESS;
1110 
1111 	struct rte_bbdev_info info;
1112 	rte_bbdev_info_get(ad->dev_id, &info);
1113 	socket_id = GET_SOCKET(info.socket_id);
1114 
1115 	f_ret = create_mempools(ad, socket_id, op_type,
1116 			get_num_ops());
1117 	if (f_ret != TEST_SUCCESS) {
1118 		printf("Couldn't create mempools");
1119 		goto fail;
1120 	}
1121 	if (op_type == RTE_BBDEV_OP_NONE)
1122 		op_type = RTE_BBDEV_OP_TURBO_ENC;
1123 
1124 	f_ret = init_test_op_params(op_params, test_vector.op_type,
1125 			test_vector.expected_status,
1126 			test_vector.mask,
1127 			ad->ops_mempool,
1128 			burst_sz,
1129 			get_num_ops(),
1130 			get_num_lcores());
1131 	if (f_ret != TEST_SUCCESS) {
1132 		printf("Couldn't init test op params");
1133 		goto fail;
1134 	}
1135 
1136 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1137 		/* Find Decoder capabilities */
1138 		const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1139 		while (cap->type != RTE_BBDEV_OP_NONE) {
1140 			if (cap->type == RTE_BBDEV_OP_TURBO_DEC) {
1141 				capabilities = cap;
1142 				break;
1143 			}
1144 		}
1145 		TEST_ASSERT_NOT_NULL(capabilities,
1146 				"Couldn't find Decoder capabilities");
1147 
1148 		create_reference_dec_op(op_params->ref_dec_op);
1149 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1150 		create_reference_enc_op(op_params->ref_enc_op);
1151 
1152 	for (i = 0; i < ad->nb_queues; ++i) {
1153 		f_ret = fill_queue_buffers(op_params,
1154 				ad->in_mbuf_pool,
1155 				ad->hard_out_mbuf_pool,
1156 				ad->soft_out_mbuf_pool,
1157 				ad->queue_ids[i],
1158 				capabilities,
1159 				info.drv.min_alignment,
1160 				socket_id);
1161 		if (f_ret != TEST_SUCCESS) {
1162 			printf("Couldn't init queue buffers");
1163 			goto fail;
1164 		}
1165 	}
1166 
1167 	/* Run test case function */
1168 	t_ret = test_case_func(ad, op_params);
1169 
1170 	/* Free active device resources and return */
1171 	free_buffers(ad, op_params);
1172 	return t_ret;
1173 
1174 fail:
1175 	free_buffers(ad, op_params);
1176 	return TEST_FAILED;
1177 }
1178 
1179 /* Run given test function per active device per supported op type
1180  * per burst size.
1181  */
1182 static int
1183 run_test_case(test_case_function *test_case_func)
1184 {
1185 	int ret = 0;
1186 	uint8_t dev;
1187 
1188 	/* Alloc op_params */
1189 	struct test_op_params *op_params = rte_zmalloc(NULL,
1190 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1191 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1192 			RTE_ALIGN(sizeof(struct test_op_params),
1193 				RTE_CACHE_LINE_SIZE));
1194 
1195 	/* For each device run test case function */
1196 	for (dev = 0; dev < nb_active_devs; ++dev)
1197 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
1198 
1199 	rte_free(op_params);
1200 
1201 	return ret;
1202 }
1203 
1204 static void
1205 dequeue_event_callback(uint16_t dev_id,
1206 		enum rte_bbdev_event_type event, void *cb_arg,
1207 		void *ret_param)
1208 {
1209 	int ret;
1210 	uint16_t i;
1211 	uint64_t total_time;
1212 	uint16_t deq, burst_sz, num_ops;
1213 	uint16_t queue_id = *(uint16_t *) ret_param;
1214 	struct rte_bbdev_info info;
1215 
1216 	double tb_len_bits;
1217 
1218 	struct thread_params *tp = cb_arg;
1219 
1220 	/* Find matching thread params using queue_id */
1221 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1222 		if (tp->queue_id == queue_id)
1223 			break;
1224 
1225 	if (i == MAX_QUEUES) {
1226 		printf("%s: Queue_id from interrupt details was not found!\n",
1227 				__func__);
1228 		return;
1229 	}
1230 
1231 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1232 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1233 		printf(
1234 			"Dequeue interrupt handler called for incorrect event!\n");
1235 		return;
1236 	}
1237 
1238 	burst_sz = rte_atomic16_read(&tp->burst_sz);
1239 	num_ops = tp->op_params->num_to_process;
1240 
1241 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1242 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1243 				&tp->dec_ops[
1244 					rte_atomic16_read(&tp->nb_dequeued)],
1245 				burst_sz);
1246 	else
1247 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1248 				&tp->enc_ops[
1249 					rte_atomic16_read(&tp->nb_dequeued)],
1250 				burst_sz);
1251 
1252 	if (deq < burst_sz) {
1253 		printf(
1254 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1255 			burst_sz, deq);
1256 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1257 		return;
1258 	}
1259 
1260 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1261 		rte_atomic16_add(&tp->nb_dequeued, deq);
1262 		return;
1263 	}
1264 
1265 	total_time = rte_rdtsc_precise() - tp->start_time;
1266 
1267 	rte_bbdev_info_get(dev_id, &info);
1268 
1269 	ret = TEST_SUCCESS;
1270 
1271 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1272 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1273 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1274 				tp->op_params->vector_mask);
1275 		/* get the max of iter_count for all dequeued ops */
1276 		for (i = 0; i < num_ops; ++i)
1277 			tp->iter_count = RTE_MAX(
1278 					tp->dec_ops[i]->turbo_dec.iter_count,
1279 					tp->iter_count);
1280 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1281 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1282 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1283 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1284 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1285 	}
1286 
1287 	if (ret) {
1288 		printf("Buffers validation failed\n");
1289 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1290 	}
1291 
1292 	switch (test_vector.op_type) {
1293 	case RTE_BBDEV_OP_TURBO_DEC:
1294 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1295 		break;
1296 	case RTE_BBDEV_OP_TURBO_ENC:
1297 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1298 		break;
1299 	case RTE_BBDEV_OP_NONE:
1300 		tb_len_bits = 0.0;
1301 		break;
1302 	default:
1303 		printf("Unknown op type: %d\n", test_vector.op_type);
1304 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1305 		return;
1306 	}
1307 
1308 	tp->ops_per_sec += ((double)num_ops) /
1309 			((double)total_time / (double)rte_get_tsc_hz());
1310 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1311 			((double)total_time / (double)rte_get_tsc_hz());
1312 
1313 	rte_atomic16_add(&tp->nb_dequeued, deq);
1314 }
1315 
1316 static int
1317 throughput_intr_lcore_dec(void *arg)
1318 {
1319 	struct thread_params *tp = arg;
1320 	unsigned int enqueued;
1321 	const uint16_t queue_id = tp->queue_id;
1322 	const uint16_t burst_sz = tp->op_params->burst_sz;
1323 	const uint16_t num_to_process = tp->op_params->num_to_process;
1324 	struct rte_bbdev_dec_op *ops[num_to_process];
1325 	struct test_buffers *bufs = NULL;
1326 	struct rte_bbdev_info info;
1327 	int ret, i, j;
1328 	uint16_t num_to_enq, enq;
1329 
1330 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1331 			"BURST_SIZE should be <= %u", MAX_BURST);
1332 
1333 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1334 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1335 			tp->dev_id, queue_id);
1336 
1337 	rte_bbdev_info_get(tp->dev_id, &info);
1338 
1339 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1340 			"NUM_OPS cannot exceed %u for this device",
1341 			info.drv.queue_size_lim);
1342 
1343 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1344 
1345 	rte_atomic16_clear(&tp->processing_status);
1346 	rte_atomic16_clear(&tp->nb_dequeued);
1347 
1348 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1349 		rte_pause();
1350 
1351 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1352 				num_to_process);
1353 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1354 			num_to_process);
1355 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1356 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1357 				bufs->hard_outputs, bufs->soft_outputs,
1358 				tp->op_params->ref_dec_op);
1359 
1360 	/* Set counter to validate the ordering */
1361 	for (j = 0; j < num_to_process; ++j)
1362 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1363 
1364 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1365 		for (i = 0; i < num_to_process; ++i)
1366 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1367 
1368 		tp->start_time = rte_rdtsc_precise();
1369 		for (enqueued = 0; enqueued < num_to_process;) {
1370 			num_to_enq = burst_sz;
1371 
1372 			if (unlikely(num_to_process - enqueued < num_to_enq))
1373 				num_to_enq = num_to_process - enqueued;
1374 
1375 			enq = 0;
1376 			do {
1377 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1378 					queue_id, &ops[enqueued],
1379 					num_to_enq);
1380 			} while (unlikely(num_to_enq != enq));
1381 			enqueued += enq;
1382 
1383 			/* Write to thread burst_sz current number of enqueued
1384 			 * descriptors. It ensures that proper number of
1385 			 * descriptors will be dequeued in callback
1386 			 * function - needed for last batch in case where
1387 			 * the number of operations is not a multiple of
1388 			 * burst size.
1389 			 */
1390 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1391 
1392 			/* Wait until processing of previous batch is
1393 			 * completed.
1394 			 */
1395 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1396 					(int16_t) enqueued)
1397 				rte_pause();
1398 		}
1399 		if (j != TEST_REPETITIONS - 1)
1400 			rte_atomic16_clear(&tp->nb_dequeued);
1401 	}
1402 
1403 	return TEST_SUCCESS;
1404 }
1405 
1406 static int
1407 throughput_intr_lcore_enc(void *arg)
1408 {
1409 	struct thread_params *tp = arg;
1410 	unsigned int enqueued;
1411 	const uint16_t queue_id = tp->queue_id;
1412 	const uint16_t burst_sz = tp->op_params->burst_sz;
1413 	const uint16_t num_to_process = tp->op_params->num_to_process;
1414 	struct rte_bbdev_enc_op *ops[num_to_process];
1415 	struct test_buffers *bufs = NULL;
1416 	struct rte_bbdev_info info;
1417 	int ret, i, j;
1418 	uint16_t num_to_enq, enq;
1419 
1420 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1421 			"BURST_SIZE should be <= %u", MAX_BURST);
1422 
1423 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1424 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1425 			tp->dev_id, queue_id);
1426 
1427 	rte_bbdev_info_get(tp->dev_id, &info);
1428 
1429 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1430 			"NUM_OPS cannot exceed %u for this device",
1431 			info.drv.queue_size_lim);
1432 
1433 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1434 
1435 	rte_atomic16_clear(&tp->processing_status);
1436 	rte_atomic16_clear(&tp->nb_dequeued);
1437 
1438 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1439 		rte_pause();
1440 
1441 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1442 			num_to_process);
1443 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1444 			num_to_process);
1445 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1446 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1447 				bufs->hard_outputs, tp->op_params->ref_enc_op);
1448 
1449 	/* Set counter to validate the ordering */
1450 	for (j = 0; j < num_to_process; ++j)
1451 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1452 
1453 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1454 		for (i = 0; i < num_to_process; ++i)
1455 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1456 
1457 		tp->start_time = rte_rdtsc_precise();
1458 		for (enqueued = 0; enqueued < num_to_process;) {
1459 			num_to_enq = burst_sz;
1460 
1461 			if (unlikely(num_to_process - enqueued < num_to_enq))
1462 				num_to_enq = num_to_process - enqueued;
1463 
1464 			enq = 0;
1465 			do {
1466 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1467 						queue_id, &ops[enqueued],
1468 						num_to_enq);
1469 			} while (unlikely(enq != num_to_enq));
1470 			enqueued += enq;
1471 
1472 			/* Write to thread burst_sz current number of enqueued
1473 			 * descriptors. It ensures that proper number of
1474 			 * descriptors will be dequeued in callback
1475 			 * function - needed for last batch in case where
1476 			 * the number of operations is not a multiple of
1477 			 * burst size.
1478 			 */
1479 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1480 
1481 			/* Wait until processing of previous batch is
1482 			 * completed.
1483 			 */
1484 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1485 					(int16_t) enqueued)
1486 				rte_pause();
1487 		}
1488 		if (j != TEST_REPETITIONS - 1)
1489 			rte_atomic16_clear(&tp->nb_dequeued);
1490 	}
1491 
1492 	return TEST_SUCCESS;
1493 }
1494 
1495 static int
1496 throughput_pmd_lcore_dec(void *arg)
1497 {
1498 	struct thread_params *tp = arg;
1499 	uint16_t enq, deq;
1500 	uint64_t total_time = 0, start_time;
1501 	const uint16_t queue_id = tp->queue_id;
1502 	const uint16_t burst_sz = tp->op_params->burst_sz;
1503 	const uint16_t num_ops = tp->op_params->num_to_process;
1504 	struct rte_bbdev_dec_op *ops_enq[num_ops];
1505 	struct rte_bbdev_dec_op *ops_deq[num_ops];
1506 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1507 	struct test_buffers *bufs = NULL;
1508 	int i, j, ret;
1509 	struct rte_bbdev_info info;
1510 	uint16_t num_to_enq;
1511 
1512 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1513 			"BURST_SIZE should be <= %u", MAX_BURST);
1514 
1515 	rte_bbdev_info_get(tp->dev_id, &info);
1516 
1517 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1518 			"NUM_OPS cannot exceed %u for this device",
1519 			info.drv.queue_size_lim);
1520 
1521 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1522 
1523 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1524 		rte_pause();
1525 
1526 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
1527 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
1528 
1529 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1530 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
1531 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
1532 
1533 	/* Set counter to validate the ordering */
1534 	for (j = 0; j < num_ops; ++j)
1535 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1536 
1537 	for (i = 0; i < TEST_REPETITIONS; ++i) {
1538 
1539 		for (j = 0; j < num_ops; ++j)
1540 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
1541 
1542 		start_time = rte_rdtsc_precise();
1543 
1544 		for (enq = 0, deq = 0; enq < num_ops;) {
1545 			num_to_enq = burst_sz;
1546 
1547 			if (unlikely(num_ops - enq < num_to_enq))
1548 				num_to_enq = num_ops - enq;
1549 
1550 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1551 					queue_id, &ops_enq[enq], num_to_enq);
1552 
1553 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1554 					queue_id, &ops_deq[deq], enq - deq);
1555 		}
1556 
1557 		/* dequeue the remaining */
1558 		while (deq < enq) {
1559 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1560 					queue_id, &ops_deq[deq], enq - deq);
1561 		}
1562 
1563 		total_time += rte_rdtsc_precise() - start_time;
1564 	}
1565 
1566 	tp->iter_count = 0;
1567 	/* get the max of iter_count for all dequeued ops */
1568 	for (i = 0; i < num_ops; ++i) {
1569 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
1570 				tp->iter_count);
1571 	}
1572 
1573 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1574 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
1575 				tp->op_params->vector_mask);
1576 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1577 	}
1578 
1579 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
1580 
1581 	double tb_len_bits = calc_dec_TB_size(ref_op);
1582 
1583 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1584 			((double)total_time / (double)rte_get_tsc_hz());
1585 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1586 			1000000.0) / ((double)total_time /
1587 			(double)rte_get_tsc_hz());
1588 
1589 	return TEST_SUCCESS;
1590 }
1591 
1592 static int
1593 throughput_pmd_lcore_enc(void *arg)
1594 {
1595 	struct thread_params *tp = arg;
1596 	uint16_t enq, deq;
1597 	uint64_t total_time = 0, start_time;
1598 	const uint16_t queue_id = tp->queue_id;
1599 	const uint16_t burst_sz = tp->op_params->burst_sz;
1600 	const uint16_t num_ops = tp->op_params->num_to_process;
1601 	struct rte_bbdev_enc_op *ops_enq[num_ops];
1602 	struct rte_bbdev_enc_op *ops_deq[num_ops];
1603 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1604 	struct test_buffers *bufs = NULL;
1605 	int i, j, ret;
1606 	struct rte_bbdev_info info;
1607 	uint16_t num_to_enq;
1608 
1609 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1610 			"BURST_SIZE should be <= %u", MAX_BURST);
1611 
1612 	rte_bbdev_info_get(tp->dev_id, &info);
1613 
1614 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1615 			"NUM_OPS cannot exceed %u for this device",
1616 			info.drv.queue_size_lim);
1617 
1618 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1619 
1620 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1621 		rte_pause();
1622 
1623 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
1624 			num_ops);
1625 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1626 			num_ops);
1627 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1628 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
1629 				bufs->hard_outputs, ref_op);
1630 
1631 	/* Set counter to validate the ordering */
1632 	for (j = 0; j < num_ops; ++j)
1633 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1634 
1635 	for (i = 0; i < TEST_REPETITIONS; ++i) {
1636 
1637 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1638 			for (j = 0; j < num_ops; ++j)
1639 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
1640 
1641 		start_time = rte_rdtsc_precise();
1642 
1643 		for (enq = 0, deq = 0; enq < num_ops;) {
1644 			num_to_enq = burst_sz;
1645 
1646 			if (unlikely(num_ops - enq < num_to_enq))
1647 				num_to_enq = num_ops - enq;
1648 
1649 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1650 					queue_id, &ops_enq[enq], num_to_enq);
1651 
1652 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1653 					queue_id, &ops_deq[deq], enq - deq);
1654 		}
1655 
1656 		/* dequeue the remaining */
1657 		while (deq < enq) {
1658 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1659 					queue_id, &ops_deq[deq], enq - deq);
1660 		}
1661 
1662 		total_time += rte_rdtsc_precise() - start_time;
1663 	}
1664 
1665 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1666 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
1667 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1668 	}
1669 
1670 	double tb_len_bits = calc_enc_TB_size(ref_op);
1671 
1672 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1673 			((double)total_time / (double)rte_get_tsc_hz());
1674 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
1675 			/ 1000000.0) / ((double)total_time /
1676 			(double)rte_get_tsc_hz());
1677 
1678 	return TEST_SUCCESS;
1679 }
1680 
1681 static void
1682 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
1683 {
1684 	unsigned int iter = 0;
1685 	double total_mops = 0, total_mbps = 0;
1686 
1687 	for (iter = 0; iter < used_cores; iter++) {
1688 		printf(
1689 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
1690 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1691 			t_params[iter].mbps);
1692 		total_mops += t_params[iter].ops_per_sec;
1693 		total_mbps += t_params[iter].mbps;
1694 	}
1695 	printf(
1696 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
1697 		used_cores, total_mops, total_mbps);
1698 }
1699 
1700 static void
1701 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
1702 {
1703 	unsigned int iter = 0;
1704 	double total_mops = 0, total_mbps = 0;
1705 	uint8_t iter_count = 0;
1706 
1707 	for (iter = 0; iter < used_cores; iter++) {
1708 		printf(
1709 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
1710 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1711 			t_params[iter].mbps, t_params[iter].iter_count);
1712 		total_mops += t_params[iter].ops_per_sec;
1713 		total_mbps += t_params[iter].mbps;
1714 		iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
1715 	}
1716 	printf(
1717 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
1718 		used_cores, total_mops, total_mbps, iter_count);
1719 }
1720 
1721 /*
1722  * Test function that determines how long an enqueue + dequeue of a burst
1723  * takes on available lcores.
1724  */
1725 static int
1726 throughput_test(struct active_device *ad,
1727 		struct test_op_params *op_params)
1728 {
1729 	int ret;
1730 	unsigned int lcore_id, used_cores = 0;
1731 	struct thread_params *t_params, *tp;
1732 	struct rte_bbdev_info info;
1733 	lcore_function_t *throughput_function;
1734 	uint16_t num_lcores;
1735 	const char *op_type_str;
1736 
1737 	rte_bbdev_info_get(ad->dev_id, &info);
1738 
1739 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
1740 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
1741 			test_vector.op_type);
1742 
1743 	printf(
1744 		"Throughput test: dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, int mode: %s, GHz: %lg\n",
1745 			info.dev_name, ad->nb_queues, op_params->burst_sz,
1746 			op_params->num_to_process, op_params->num_lcores,
1747 			op_type_str,
1748 			intr_enabled ? "Interrupt mode" : "PMD mode",
1749 			(double)rte_get_tsc_hz() / 1000000000.0);
1750 
1751 	/* Set number of lcores */
1752 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
1753 			? ad->nb_queues
1754 			: op_params->num_lcores;
1755 
1756 	/* Allocate memory for thread parameters structure */
1757 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
1758 			RTE_CACHE_LINE_SIZE);
1759 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
1760 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
1761 				RTE_CACHE_LINE_SIZE));
1762 
1763 	if (intr_enabled) {
1764 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1765 			throughput_function = throughput_intr_lcore_dec;
1766 		else
1767 			throughput_function = throughput_intr_lcore_enc;
1768 
1769 		/* Dequeue interrupt callback registration */
1770 		ret = rte_bbdev_callback_register(ad->dev_id,
1771 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
1772 				t_params);
1773 		if (ret < 0) {
1774 			rte_free(t_params);
1775 			return ret;
1776 		}
1777 	} else {
1778 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1779 			throughput_function = throughput_pmd_lcore_dec;
1780 		else
1781 			throughput_function = throughput_pmd_lcore_enc;
1782 	}
1783 
1784 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
1785 
1786 	/* Master core is set at first entry */
1787 	t_params[0].dev_id = ad->dev_id;
1788 	t_params[0].lcore_id = rte_lcore_id();
1789 	t_params[0].op_params = op_params;
1790 	t_params[0].queue_id = ad->queue_ids[used_cores++];
1791 	t_params[0].iter_count = 0;
1792 
1793 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1794 		if (used_cores >= num_lcores)
1795 			break;
1796 
1797 		t_params[used_cores].dev_id = ad->dev_id;
1798 		t_params[used_cores].lcore_id = lcore_id;
1799 		t_params[used_cores].op_params = op_params;
1800 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
1801 		t_params[used_cores].iter_count = 0;
1802 
1803 		rte_eal_remote_launch(throughput_function,
1804 				&t_params[used_cores++], lcore_id);
1805 	}
1806 
1807 	rte_atomic16_set(&op_params->sync, SYNC_START);
1808 	ret = throughput_function(&t_params[0]);
1809 
1810 	/* Master core is always used */
1811 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
1812 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
1813 
1814 	/* Return if test failed */
1815 	if (ret) {
1816 		rte_free(t_params);
1817 		return ret;
1818 	}
1819 
1820 	/* Print throughput if interrupts are disabled and test passed */
1821 	if (!intr_enabled) {
1822 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1823 			print_dec_throughput(t_params, num_lcores);
1824 		else
1825 			print_enc_throughput(t_params, num_lcores);
1826 		rte_free(t_params);
1827 		return ret;
1828 	}
1829 
1830 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
1831 	 * all pending operations. Skip waiting for queues which reported an
1832 	 * error using processing_status variable.
1833 	 * Wait for master lcore operations.
1834 	 */
1835 	tp = &t_params[0];
1836 	while ((rte_atomic16_read(&tp->nb_dequeued) <
1837 			op_params->num_to_process) &&
1838 			(rte_atomic16_read(&tp->processing_status) !=
1839 			TEST_FAILED))
1840 		rte_pause();
1841 
1842 	tp->ops_per_sec /= TEST_REPETITIONS;
1843 	tp->mbps /= TEST_REPETITIONS;
1844 	ret |= rte_atomic16_read(&tp->processing_status);
1845 
1846 	/* Wait for slave lcores operations */
1847 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
1848 		tp = &t_params[used_cores];
1849 
1850 		while ((rte_atomic16_read(&tp->nb_dequeued) <
1851 				op_params->num_to_process) &&
1852 				(rte_atomic16_read(&tp->processing_status) !=
1853 				TEST_FAILED))
1854 			rte_pause();
1855 
1856 		tp->ops_per_sec /= TEST_REPETITIONS;
1857 		tp->mbps /= TEST_REPETITIONS;
1858 		ret |= rte_atomic16_read(&tp->processing_status);
1859 	}
1860 
1861 	/* Print throughput if test passed */
1862 	if (!ret) {
1863 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1864 			print_dec_throughput(t_params, num_lcores);
1865 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1866 			print_enc_throughput(t_params, num_lcores);
1867 	}
1868 
1869 	rte_free(t_params);
1870 	return ret;
1871 }
1872 
1873 static int
1874 latency_test_dec(struct rte_mempool *mempool,
1875 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
1876 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
1877 		const uint16_t num_to_process, uint16_t burst_sz,
1878 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1879 {
1880 	int ret = TEST_SUCCESS;
1881 	uint16_t i, j, dequeued;
1882 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1883 	uint64_t start_time = 0, last_time = 0;
1884 
1885 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1886 		uint16_t enq = 0, deq = 0;
1887 		bool first_time = true;
1888 		last_time = 0;
1889 
1890 		if (unlikely(num_to_process - dequeued < burst_sz))
1891 			burst_sz = num_to_process - dequeued;
1892 
1893 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1894 		TEST_ASSERT_SUCCESS(ret,
1895 				"rte_bbdev_dec_op_alloc_bulk() failed");
1896 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1897 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1898 					bufs->inputs,
1899 					bufs->hard_outputs,
1900 					bufs->soft_outputs,
1901 					ref_op);
1902 
1903 		/* Set counter to validate the ordering */
1904 		for (j = 0; j < burst_sz; ++j)
1905 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1906 
1907 		start_time = rte_rdtsc_precise();
1908 
1909 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
1910 				burst_sz);
1911 		TEST_ASSERT(enq == burst_sz,
1912 				"Error enqueueing burst, expected %u, got %u",
1913 				burst_sz, enq);
1914 
1915 		/* Dequeue */
1916 		do {
1917 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1918 					&ops_deq[deq], burst_sz - deq);
1919 			if (likely(first_time && (deq > 0))) {
1920 				last_time = rte_rdtsc_precise() - start_time;
1921 				first_time = false;
1922 			}
1923 		} while (unlikely(burst_sz != deq));
1924 
1925 		*max_time = RTE_MAX(*max_time, last_time);
1926 		*min_time = RTE_MIN(*min_time, last_time);
1927 		*total_time += last_time;
1928 
1929 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1930 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
1931 					vector_mask);
1932 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1933 		}
1934 
1935 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
1936 		dequeued += deq;
1937 	}
1938 
1939 	return i;
1940 }
1941 
1942 static int
1943 latency_test_enc(struct rte_mempool *mempool,
1944 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
1945 		uint16_t dev_id, uint16_t queue_id,
1946 		const uint16_t num_to_process, uint16_t burst_sz,
1947 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1948 {
1949 	int ret = TEST_SUCCESS;
1950 	uint16_t i, j, dequeued;
1951 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1952 	uint64_t start_time = 0, last_time = 0;
1953 
1954 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1955 		uint16_t enq = 0, deq = 0;
1956 		bool first_time = true;
1957 		last_time = 0;
1958 
1959 		if (unlikely(num_to_process - dequeued < burst_sz))
1960 			burst_sz = num_to_process - dequeued;
1961 
1962 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
1963 		TEST_ASSERT_SUCCESS(ret,
1964 				"rte_bbdev_enc_op_alloc_bulk() failed");
1965 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1966 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
1967 					bufs->inputs,
1968 					bufs->hard_outputs,
1969 					ref_op);
1970 
1971 		/* Set counter to validate the ordering */
1972 		for (j = 0; j < burst_sz; ++j)
1973 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1974 
1975 		start_time = rte_rdtsc_precise();
1976 
1977 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
1978 				burst_sz);
1979 		TEST_ASSERT(enq == burst_sz,
1980 				"Error enqueueing burst, expected %u, got %u",
1981 				burst_sz, enq);
1982 
1983 		/* Dequeue */
1984 		do {
1985 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1986 					&ops_deq[deq], burst_sz - deq);
1987 			if (likely(first_time && (deq > 0))) {
1988 				last_time += rte_rdtsc_precise() - start_time;
1989 				first_time = false;
1990 			}
1991 		} while (unlikely(burst_sz != deq));
1992 
1993 		*max_time = RTE_MAX(*max_time, last_time);
1994 		*min_time = RTE_MIN(*min_time, last_time);
1995 		*total_time += last_time;
1996 
1997 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1998 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
1999 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2000 		}
2001 
2002 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2003 		dequeued += deq;
2004 	}
2005 
2006 	return i;
2007 }
2008 
2009 static int
2010 latency_test(struct active_device *ad,
2011 		struct test_op_params *op_params)
2012 {
2013 	int iter;
2014 	uint16_t burst_sz = op_params->burst_sz;
2015 	const uint16_t num_to_process = op_params->num_to_process;
2016 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2017 	const uint16_t queue_id = ad->queue_ids[0];
2018 	struct test_buffers *bufs = NULL;
2019 	struct rte_bbdev_info info;
2020 	uint64_t total_time, min_time, max_time;
2021 	const char *op_type_str;
2022 
2023 	total_time = max_time = 0;
2024 	min_time = UINT64_MAX;
2025 
2026 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2027 			"BURST_SIZE should be <= %u", MAX_BURST);
2028 
2029 	rte_bbdev_info_get(ad->dev_id, &info);
2030 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2031 
2032 	op_type_str = rte_bbdev_op_type_str(op_type);
2033 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2034 
2035 	printf(
2036 		"\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2037 			info.dev_name, burst_sz, num_to_process, op_type_str);
2038 
2039 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2040 		iter = latency_test_dec(op_params->mp, bufs,
2041 				op_params->ref_dec_op, op_params->vector_mask,
2042 				ad->dev_id, queue_id, num_to_process,
2043 				burst_sz, &total_time, &min_time, &max_time);
2044 	else
2045 		iter = latency_test_enc(op_params->mp, bufs,
2046 				op_params->ref_enc_op, ad->dev_id, queue_id,
2047 				num_to_process, burst_sz, &total_time,
2048 				&min_time, &max_time);
2049 
2050 	if (iter <= 0)
2051 		return TEST_FAILED;
2052 
2053 	printf("Operation latency:\n"
2054 			"\tavg latency: %lg cycles, %lg us\n"
2055 			"\tmin latency: %lg cycles, %lg us\n"
2056 			"\tmax latency: %lg cycles, %lg us\n",
2057 			(double)total_time / (double)iter,
2058 			(double)(total_time * 1000000) / (double)iter /
2059 			(double)rte_get_tsc_hz(), (double)min_time,
2060 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2061 			(double)max_time, (double)(max_time * 1000000) /
2062 			(double)rte_get_tsc_hz());
2063 
2064 	return TEST_SUCCESS;
2065 }
2066 
2067 #ifdef RTE_BBDEV_OFFLOAD_COST
2068 static int
2069 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2070 		struct rte_bbdev_stats *stats)
2071 {
2072 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2073 	struct rte_bbdev_stats *q_stats;
2074 
2075 	if (queue_id >= dev->data->num_queues)
2076 		return -1;
2077 
2078 	q_stats = &dev->data->queues[queue_id].queue_stats;
2079 
2080 	stats->enqueued_count = q_stats->enqueued_count;
2081 	stats->dequeued_count = q_stats->dequeued_count;
2082 	stats->enqueue_err_count = q_stats->enqueue_err_count;
2083 	stats->dequeue_err_count = q_stats->dequeue_err_count;
2084 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2085 
2086 	return 0;
2087 }
2088 
2089 static int
2090 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2091 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2092 		uint16_t queue_id, const uint16_t num_to_process,
2093 		uint16_t burst_sz, struct test_time_stats *time_st)
2094 {
2095 	int i, dequeued, ret;
2096 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2097 	uint64_t enq_start_time, deq_start_time;
2098 	uint64_t enq_sw_last_time, deq_last_time;
2099 	struct rte_bbdev_stats stats;
2100 
2101 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2102 		uint16_t enq = 0, deq = 0;
2103 
2104 		if (unlikely(num_to_process - dequeued < burst_sz))
2105 			burst_sz = num_to_process - dequeued;
2106 
2107 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2108 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2109 				burst_sz);
2110 
2111 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2112 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2113 					bufs->inputs,
2114 					bufs->hard_outputs,
2115 					bufs->soft_outputs,
2116 					ref_op);
2117 
2118 		/* Start time meas for enqueue function offload latency */
2119 		enq_start_time = rte_rdtsc_precise();
2120 		do {
2121 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
2122 					&ops_enq[enq], burst_sz - enq);
2123 		} while (unlikely(burst_sz != enq));
2124 
2125 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2126 		TEST_ASSERT_SUCCESS(ret,
2127 				"Failed to get stats for queue (%u) of device (%u)",
2128 				queue_id, dev_id);
2129 
2130 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2131 				stats.acc_offload_cycles;
2132 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2133 				enq_sw_last_time);
2134 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2135 				enq_sw_last_time);
2136 		time_st->enq_sw_total_time += enq_sw_last_time;
2137 
2138 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2139 				stats.acc_offload_cycles);
2140 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2141 				stats.acc_offload_cycles);
2142 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
2143 
2144 		/* ensure enqueue has been completed */
2145 		rte_delay_us(200);
2146 
2147 		/* Start time meas for dequeue function offload latency */
2148 		deq_start_time = rte_rdtsc_precise();
2149 		/* Dequeue one operation */
2150 		do {
2151 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2152 					&ops_deq[deq], 1);
2153 		} while (unlikely(deq != 1));
2154 
2155 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
2156 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2157 				deq_last_time);
2158 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2159 				deq_last_time);
2160 		time_st->deq_total_time += deq_last_time;
2161 
2162 		/* Dequeue remaining operations if needed*/
2163 		while (burst_sz != deq)
2164 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2165 					&ops_deq[deq], burst_sz - deq);
2166 
2167 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2168 		dequeued += deq;
2169 	}
2170 
2171 	return i;
2172 }
2173 
2174 static int
2175 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
2176 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
2177 		uint16_t queue_id, const uint16_t num_to_process,
2178 		uint16_t burst_sz, struct test_time_stats *time_st)
2179 {
2180 	int i, dequeued, ret;
2181 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2182 	uint64_t enq_start_time, deq_start_time;
2183 	uint64_t enq_sw_last_time, deq_last_time;
2184 	struct rte_bbdev_stats stats;
2185 
2186 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2187 		uint16_t enq = 0, deq = 0;
2188 
2189 		if (unlikely(num_to_process - dequeued < burst_sz))
2190 			burst_sz = num_to_process - dequeued;
2191 
2192 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2193 		TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2194 				burst_sz);
2195 
2196 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2197 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2198 					bufs->inputs,
2199 					bufs->hard_outputs,
2200 					ref_op);
2201 
2202 		/* Start time meas for enqueue function offload latency */
2203 		enq_start_time = rte_rdtsc_precise();
2204 		do {
2205 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
2206 					&ops_enq[enq], burst_sz - enq);
2207 		} while (unlikely(burst_sz != enq));
2208 
2209 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2210 		TEST_ASSERT_SUCCESS(ret,
2211 				"Failed to get stats for queue (%u) of device (%u)",
2212 				queue_id, dev_id);
2213 
2214 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2215 				stats.acc_offload_cycles;
2216 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2217 				enq_sw_last_time);
2218 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2219 				enq_sw_last_time);
2220 		time_st->enq_sw_total_time += enq_sw_last_time;
2221 
2222 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2223 				stats.acc_offload_cycles);
2224 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2225 				stats.acc_offload_cycles);
2226 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
2227 
2228 		/* ensure enqueue has been completed */
2229 		rte_delay_us(200);
2230 
2231 		/* Start time meas for dequeue function offload latency */
2232 		deq_start_time = rte_rdtsc_precise();
2233 		/* Dequeue one operation */
2234 		do {
2235 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2236 					&ops_deq[deq], 1);
2237 		} while (unlikely(deq != 1));
2238 
2239 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
2240 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2241 				deq_last_time);
2242 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2243 				deq_last_time);
2244 		time_st->deq_total_time += deq_last_time;
2245 
2246 		while (burst_sz != deq)
2247 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2248 					&ops_deq[deq], burst_sz - deq);
2249 
2250 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2251 		dequeued += deq;
2252 	}
2253 
2254 	return i;
2255 }
2256 #endif
2257 
2258 static int
2259 offload_cost_test(struct active_device *ad,
2260 		struct test_op_params *op_params)
2261 {
2262 #ifndef RTE_BBDEV_OFFLOAD_COST
2263 	RTE_SET_USED(ad);
2264 	RTE_SET_USED(op_params);
2265 	printf("Offload latency test is disabled.\n");
2266 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2267 	return TEST_SKIPPED;
2268 #else
2269 	int iter;
2270 	uint16_t burst_sz = op_params->burst_sz;
2271 	const uint16_t num_to_process = op_params->num_to_process;
2272 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2273 	const uint16_t queue_id = ad->queue_ids[0];
2274 	struct test_buffers *bufs = NULL;
2275 	struct rte_bbdev_info info;
2276 	const char *op_type_str;
2277 	struct test_time_stats time_st;
2278 
2279 	memset(&time_st, 0, sizeof(struct test_time_stats));
2280 	time_st.enq_sw_min_time = UINT64_MAX;
2281 	time_st.enq_acc_min_time = UINT64_MAX;
2282 	time_st.deq_min_time = UINT64_MAX;
2283 
2284 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2285 			"BURST_SIZE should be <= %u", MAX_BURST);
2286 
2287 	rte_bbdev_info_get(ad->dev_id, &info);
2288 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2289 
2290 	op_type_str = rte_bbdev_op_type_str(op_type);
2291 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2292 
2293 	printf(
2294 		"\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2295 			info.dev_name, burst_sz, num_to_process, op_type_str);
2296 
2297 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2298 		iter = offload_latency_test_dec(op_params->mp, bufs,
2299 				op_params->ref_dec_op, ad->dev_id, queue_id,
2300 				num_to_process, burst_sz, &time_st);
2301 	else
2302 		iter = offload_latency_test_enc(op_params->mp, bufs,
2303 				op_params->ref_enc_op, ad->dev_id, queue_id,
2304 				num_to_process, burst_sz, &time_st);
2305 
2306 	if (iter <= 0)
2307 		return TEST_FAILED;
2308 
2309 	printf("Enqueue offload cost latency:\n"
2310 			"\tDriver offload avg %lg cycles, %lg us\n"
2311 			"\tDriver offload min %lg cycles, %lg us\n"
2312 			"\tDriver offload max %lg cycles, %lg us\n"
2313 			"\tAccelerator offload avg %lg cycles, %lg us\n"
2314 			"\tAccelerator offload min %lg cycles, %lg us\n"
2315 			"\tAccelerator offload max %lg cycles, %lg us\n",
2316 			(double)time_st.enq_sw_total_time / (double)iter,
2317 			(double)(time_st.enq_sw_total_time * 1000000) /
2318 			(double)iter / (double)rte_get_tsc_hz(),
2319 			(double)time_st.enq_sw_min_time,
2320 			(double)(time_st.enq_sw_min_time * 1000000) /
2321 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
2322 			(double)(time_st.enq_sw_max_time * 1000000) /
2323 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
2324 			(double)iter,
2325 			(double)(time_st.enq_acc_total_time * 1000000) /
2326 			(double)iter / (double)rte_get_tsc_hz(),
2327 			(double)time_st.enq_acc_min_time,
2328 			(double)(time_st.enq_acc_min_time * 1000000) /
2329 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
2330 			(double)(time_st.enq_acc_max_time * 1000000) /
2331 			rte_get_tsc_hz());
2332 
2333 	printf("Dequeue offload cost latency - one op:\n"
2334 			"\tavg %lg cycles, %lg us\n"
2335 			"\tmin %lg cycles, %lg us\n"
2336 			"\tmax %lg cycles, %lg us\n",
2337 			(double)time_st.deq_total_time / (double)iter,
2338 			(double)(time_st.deq_total_time * 1000000) /
2339 			(double)iter / (double)rte_get_tsc_hz(),
2340 			(double)time_st.deq_min_time,
2341 			(double)(time_st.deq_min_time * 1000000) /
2342 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
2343 			(double)(time_st.deq_max_time * 1000000) /
2344 			rte_get_tsc_hz());
2345 
2346 	return TEST_SUCCESS;
2347 #endif
2348 }
2349 
2350 #ifdef RTE_BBDEV_OFFLOAD_COST
2351 static int
2352 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
2353 		const uint16_t num_to_process, uint16_t burst_sz,
2354 		uint64_t *deq_total_time, uint64_t *deq_min_time,
2355 		uint64_t *deq_max_time)
2356 {
2357 	int i, deq_total;
2358 	struct rte_bbdev_dec_op *ops[MAX_BURST];
2359 	uint64_t deq_start_time, deq_last_time;
2360 
2361 	/* Test deq offload latency from an empty queue */
2362 
2363 	for (i = 0, deq_total = 0; deq_total < num_to_process;
2364 			++i, deq_total += burst_sz) {
2365 		deq_start_time = rte_rdtsc_precise();
2366 
2367 		if (unlikely(num_to_process - deq_total < burst_sz))
2368 			burst_sz = num_to_process - deq_total;
2369 		rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
2370 
2371 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
2372 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2373 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2374 		*deq_total_time += deq_last_time;
2375 	}
2376 
2377 	return i;
2378 }
2379 
2380 static int
2381 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
2382 		const uint16_t num_to_process, uint16_t burst_sz,
2383 		uint64_t *deq_total_time, uint64_t *deq_min_time,
2384 		uint64_t *deq_max_time)
2385 {
2386 	int i, deq_total;
2387 	struct rte_bbdev_enc_op *ops[MAX_BURST];
2388 	uint64_t deq_start_time, deq_last_time;
2389 
2390 	/* Test deq offload latency from an empty queue */
2391 	for (i = 0, deq_total = 0; deq_total < num_to_process;
2392 			++i, deq_total += burst_sz) {
2393 		deq_start_time = rte_rdtsc_precise();
2394 
2395 		if (unlikely(num_to_process - deq_total < burst_sz))
2396 			burst_sz = num_to_process - deq_total;
2397 		rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
2398 
2399 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
2400 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2401 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2402 		*deq_total_time += deq_last_time;
2403 	}
2404 
2405 	return i;
2406 }
2407 #endif
2408 
2409 static int
2410 offload_latency_empty_q_test(struct active_device *ad,
2411 		struct test_op_params *op_params)
2412 {
2413 #ifndef RTE_BBDEV_OFFLOAD_COST
2414 	RTE_SET_USED(ad);
2415 	RTE_SET_USED(op_params);
2416 	printf("Offload latency empty dequeue test is disabled.\n");
2417 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2418 	return TEST_SKIPPED;
2419 #else
2420 	int iter;
2421 	uint64_t deq_total_time, deq_min_time, deq_max_time;
2422 	uint16_t burst_sz = op_params->burst_sz;
2423 	const uint16_t num_to_process = op_params->num_to_process;
2424 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2425 	const uint16_t queue_id = ad->queue_ids[0];
2426 	struct rte_bbdev_info info;
2427 	const char *op_type_str;
2428 
2429 	deq_total_time = deq_max_time = 0;
2430 	deq_min_time = UINT64_MAX;
2431 
2432 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2433 			"BURST_SIZE should be <= %u", MAX_BURST);
2434 
2435 	rte_bbdev_info_get(ad->dev_id, &info);
2436 
2437 	op_type_str = rte_bbdev_op_type_str(op_type);
2438 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2439 
2440 	printf(
2441 		"\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2442 			info.dev_name, burst_sz, num_to_process, op_type_str);
2443 
2444 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2445 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
2446 				num_to_process, burst_sz, &deq_total_time,
2447 				&deq_min_time, &deq_max_time);
2448 	else
2449 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
2450 				num_to_process, burst_sz, &deq_total_time,
2451 				&deq_min_time, &deq_max_time);
2452 
2453 	if (iter <= 0)
2454 		return TEST_FAILED;
2455 
2456 	printf("Empty dequeue offload\n"
2457 			"\tavg. latency: %lg cycles, %lg us\n"
2458 			"\tmin. latency: %lg cycles, %lg us\n"
2459 			"\tmax. latency: %lg cycles, %lg us\n",
2460 			(double)deq_total_time / (double)iter,
2461 			(double)(deq_total_time * 1000000) / (double)iter /
2462 			(double)rte_get_tsc_hz(), (double)deq_min_time,
2463 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
2464 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
2465 			rte_get_tsc_hz());
2466 
2467 	return TEST_SUCCESS;
2468 #endif
2469 }
2470 
2471 static int
2472 throughput_tc(void)
2473 {
2474 	return run_test_case(throughput_test);
2475 }
2476 
2477 static int
2478 offload_cost_tc(void)
2479 {
2480 	return run_test_case(offload_cost_test);
2481 }
2482 
2483 static int
2484 offload_latency_empty_q_tc(void)
2485 {
2486 	return run_test_case(offload_latency_empty_q_test);
2487 }
2488 
2489 static int
2490 latency_tc(void)
2491 {
2492 	return run_test_case(latency_test);
2493 }
2494 
2495 static int
2496 interrupt_tc(void)
2497 {
2498 	return run_test_case(throughput_test);
2499 }
2500 
2501 static struct unit_test_suite bbdev_throughput_testsuite = {
2502 	.suite_name = "BBdev Throughput Tests",
2503 	.setup = testsuite_setup,
2504 	.teardown = testsuite_teardown,
2505 	.unit_test_cases = {
2506 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
2507 		TEST_CASES_END() /**< NULL terminate unit test array */
2508 	}
2509 };
2510 
2511 static struct unit_test_suite bbdev_validation_testsuite = {
2512 	.suite_name = "BBdev Validation Tests",
2513 	.setup = testsuite_setup,
2514 	.teardown = testsuite_teardown,
2515 	.unit_test_cases = {
2516 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2517 		TEST_CASES_END() /**< NULL terminate unit test array */
2518 	}
2519 };
2520 
2521 static struct unit_test_suite bbdev_latency_testsuite = {
2522 	.suite_name = "BBdev Latency Tests",
2523 	.setup = testsuite_setup,
2524 	.teardown = testsuite_teardown,
2525 	.unit_test_cases = {
2526 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2527 		TEST_CASES_END() /**< NULL terminate unit test array */
2528 	}
2529 };
2530 
2531 static struct unit_test_suite bbdev_offload_cost_testsuite = {
2532 	.suite_name = "BBdev Offload Cost Tests",
2533 	.setup = testsuite_setup,
2534 	.teardown = testsuite_teardown,
2535 	.unit_test_cases = {
2536 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
2537 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
2538 		TEST_CASES_END() /**< NULL terminate unit test array */
2539 	}
2540 };
2541 
2542 static struct unit_test_suite bbdev_interrupt_testsuite = {
2543 	.suite_name = "BBdev Interrupt Tests",
2544 	.setup = interrupt_testsuite_setup,
2545 	.teardown = testsuite_teardown,
2546 	.unit_test_cases = {
2547 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
2548 		TEST_CASES_END() /**< NULL terminate unit test array */
2549 	}
2550 };
2551 
2552 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
2553 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
2554 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
2555 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
2556 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
2557