xref: /dpdk/app/test-mldev/test_inference_common.c (revision 86dfed2a8ed704e013f054985a92d46f07ff48d1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include <errno.h>
6 #include <stdio.h>
7 #include <unistd.h>
8 
9 #include <rte_common.h>
10 #include <rte_cycles.h>
11 #include <rte_hash_crc.h>
12 #include <rte_launch.h>
13 #include <rte_lcore.h>
14 #include <rte_malloc.h>
15 #include <rte_memzone.h>
16 #include <rte_mldev.h>
17 
18 #include "ml_common.h"
19 #include "test_inference_common.h"
20 
21 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer))
22 
23 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance)                                         \
24 	(((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0))
25 
26 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err)                                             \
27 	do {                                                                                       \
28 		FILE *fp = fopen(name, "w+");                                                      \
29 		if (fp == NULL) {                                                                  \
30 			ml_err("Unable to create file: %s, error: %s", name, strerror(errno));     \
31 			err = true;                                                                \
32 		} else {                                                                           \
33 			if (fwrite(buffer, 1, size, fp) != size) {                                 \
34 				ml_err("Error writing output, file: %s, error: %s", name,          \
35 				       strerror(errno));                                           \
36 				err = true;                                                        \
37 			}                                                                          \
38 			fclose(fp);                                                                \
39 		}                                                                                  \
40 	} while (0)
41 
42 static void
43 print_line(uint16_t len)
44 {
45 	uint16_t i;
46 
47 	for (i = 0; i < len; i++)
48 		printf("-");
49 
50 	printf("\n");
51 }
52 
53 /* Enqueue inference requests with burst size equal to 1 */
54 static int
55 ml_enqueue_single(void *arg)
56 {
57 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
58 	struct ml_request *req = NULL;
59 	struct rte_ml_op *op = NULL;
60 	struct ml_core_args *args;
61 	uint64_t model_enq = 0;
62 	uint64_t start_cycle;
63 	uint32_t burst_enq;
64 	uint32_t lcore_id;
65 	uint16_t fid;
66 	int ret;
67 
68 	lcore_id = rte_lcore_id();
69 	args = &t->args[lcore_id];
70 	args->start_cycles = 0;
71 	model_enq = 0;
72 
73 	if (args->nb_reqs == 0)
74 		return 0;
75 
76 next_rep:
77 	fid = args->start_fid;
78 
79 next_model:
80 	ret = rte_mempool_get(t->op_pool, (void **)&op);
81 	if (ret != 0)
82 		goto next_model;
83 
84 retry:
85 	ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
86 	if (ret != 0)
87 		goto retry;
88 
89 	op->model_id = t->model[fid].id;
90 	op->nb_batches = t->model[fid].nb_batches;
91 	op->mempool = t->op_pool;
92 
93 	op->input.addr = req->input;
94 	op->input.length = t->model[fid].inp_qsize;
95 	op->input.next = NULL;
96 
97 	op->output.addr = req->output;
98 	op->output.length = t->model[fid].out_qsize;
99 	op->output.next = NULL;
100 
101 	op->user_ptr = req;
102 	req->niters++;
103 	req->fid = fid;
104 
105 enqueue_req:
106 	start_cycle = rte_get_tsc_cycles();
107 	burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
108 	if (burst_enq == 0)
109 		goto enqueue_req;
110 
111 	args->start_cycles += start_cycle;
112 	fid++;
113 	if (likely(fid <= args->end_fid))
114 		goto next_model;
115 
116 	model_enq++;
117 	if (likely(model_enq < args->nb_reqs))
118 		goto next_rep;
119 
120 	return 0;
121 }
122 
123 /* Dequeue inference requests with burst size equal to 1 */
124 static int
125 ml_dequeue_single(void *arg)
126 {
127 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
128 	struct rte_ml_op_error error;
129 	struct rte_ml_op *op = NULL;
130 	struct ml_core_args *args;
131 	struct ml_request *req;
132 	uint64_t total_deq = 0;
133 	uint8_t nb_filelist;
134 	uint32_t burst_deq;
135 	uint64_t end_cycle;
136 	uint32_t lcore_id;
137 
138 	lcore_id = rte_lcore_id();
139 	args = &t->args[lcore_id];
140 	args->end_cycles = 0;
141 	nb_filelist = args->end_fid - args->start_fid + 1;
142 
143 	if (args->nb_reqs == 0)
144 		return 0;
145 
146 dequeue_req:
147 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
148 	end_cycle = rte_get_tsc_cycles();
149 
150 	if (likely(burst_deq == 1)) {
151 		total_deq += burst_deq;
152 		args->end_cycles += end_cycle;
153 		if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
154 			rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
155 			ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
156 			       error.message);
157 			t->error_count[lcore_id]++;
158 		}
159 		req = (struct ml_request *)op->user_ptr;
160 		rte_mempool_put(t->model[req->fid].io_pool, req);
161 		rte_mempool_put(t->op_pool, op);
162 	}
163 
164 	if (likely(total_deq < args->nb_reqs * nb_filelist))
165 		goto dequeue_req;
166 
167 	return 0;
168 }
169 
170 /* Enqueue inference requests with burst size greater than 1 */
171 static int
172 ml_enqueue_burst(void *arg)
173 {
174 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
175 	struct ml_core_args *args;
176 	uint64_t start_cycle;
177 	uint16_t ops_count;
178 	uint64_t model_enq;
179 	uint16_t burst_enq;
180 	uint32_t lcore_id;
181 	uint16_t pending;
182 	uint16_t idx;
183 	uint16_t fid;
184 	uint16_t i;
185 	int ret;
186 
187 	lcore_id = rte_lcore_id();
188 	args = &t->args[lcore_id];
189 	args->start_cycles = 0;
190 	model_enq = 0;
191 
192 	if (args->nb_reqs == 0)
193 		return 0;
194 
195 next_rep:
196 	fid = args->start_fid;
197 
198 next_model:
199 	ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
200 	ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
201 	if (ret != 0)
202 		goto next_model;
203 
204 retry:
205 	ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
206 	if (ret != 0)
207 		goto retry;
208 
209 	for (i = 0; i < ops_count; i++) {
210 		args->enq_ops[i]->model_id = t->model[fid].id;
211 		args->enq_ops[i]->nb_batches = t->model[fid].nb_batches;
212 		args->enq_ops[i]->mempool = t->op_pool;
213 
214 		args->enq_ops[i]->input.addr = args->reqs[i]->input;
215 		args->enq_ops[i]->input.length = t->model[fid].inp_qsize;
216 		args->enq_ops[i]->input.next = NULL;
217 
218 		args->enq_ops[i]->output.addr = args->reqs[i]->output;
219 		args->enq_ops[i]->output.length = t->model[fid].out_qsize;
220 		args->enq_ops[i]->output.next = NULL;
221 
222 		args->enq_ops[i]->user_ptr = args->reqs[i];
223 		args->reqs[i]->niters++;
224 		args->reqs[i]->fid = fid;
225 	}
226 
227 	idx = 0;
228 	pending = ops_count;
229 
230 enqueue_reqs:
231 	start_cycle = rte_get_tsc_cycles();
232 	burst_enq =
233 		rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
234 	args->start_cycles += burst_enq * start_cycle;
235 	pending = pending - burst_enq;
236 
237 	if (pending > 0) {
238 		idx = idx + burst_enq;
239 		goto enqueue_reqs;
240 	}
241 
242 	fid++;
243 	if (fid <= args->end_fid)
244 		goto next_model;
245 
246 	model_enq = model_enq + ops_count;
247 	if (model_enq < args->nb_reqs)
248 		goto next_rep;
249 
250 	return 0;
251 }
252 
253 /* Dequeue inference requests with burst size greater than 1 */
254 static int
255 ml_dequeue_burst(void *arg)
256 {
257 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
258 	struct rte_ml_op_error error;
259 	struct ml_core_args *args;
260 	struct ml_request *req;
261 	uint64_t total_deq = 0;
262 	uint16_t burst_deq = 0;
263 	uint8_t nb_filelist;
264 	uint64_t end_cycle;
265 	uint32_t lcore_id;
266 	uint32_t i;
267 
268 	lcore_id = rte_lcore_id();
269 	args = &t->args[lcore_id];
270 	args->end_cycles = 0;
271 	nb_filelist = args->end_fid - args->start_fid + 1;
272 
273 	if (args->nb_reqs == 0)
274 		return 0;
275 
276 dequeue_burst:
277 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
278 					 t->cmn.opt->burst_size);
279 	end_cycle = rte_get_tsc_cycles();
280 
281 	if (likely(burst_deq > 0)) {
282 		total_deq += burst_deq;
283 		args->end_cycles += burst_deq * end_cycle;
284 
285 		for (i = 0; i < burst_deq; i++) {
286 			if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
287 				rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
288 				ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
289 				       error.errcode, error.message);
290 				t->error_count[lcore_id]++;
291 			}
292 			req = (struct ml_request *)args->deq_ops[i]->user_ptr;
293 			if (req != NULL)
294 				rte_mempool_put(t->model[req->fid].io_pool, req);
295 		}
296 		rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
297 	}
298 
299 	if (total_deq < args->nb_reqs * nb_filelist)
300 		goto dequeue_burst;
301 
302 	return 0;
303 }
304 
305 bool
306 test_inference_cap_check(struct ml_options *opt)
307 {
308 	struct rte_ml_dev_info dev_info;
309 
310 	if (!ml_test_cap_check(opt))
311 		return false;
312 
313 	rte_ml_dev_info_get(opt->dev_id, &dev_info);
314 
315 	if (opt->queue_pairs > dev_info.max_queue_pairs) {
316 		ml_err("Insufficient capabilities: queue_pairs = %u, max_queue_pairs = %u",
317 		       opt->queue_pairs, dev_info.max_queue_pairs);
318 		return false;
319 	}
320 
321 	if (opt->queue_size > dev_info.max_desc) {
322 		ml_err("Insufficient capabilities: queue_size = %u, max_desc = %u", opt->queue_size,
323 		       dev_info.max_desc);
324 		return false;
325 	}
326 
327 	if (opt->nb_filelist > dev_info.max_models) {
328 		ml_err("Insufficient capabilities:  Filelist count exceeded device limit, count = %u (max limit = %u)",
329 		       opt->nb_filelist, dev_info.max_models);
330 		return false;
331 	}
332 
333 	return true;
334 }
335 
336 int
337 test_inference_opt_check(struct ml_options *opt)
338 {
339 	uint32_t i;
340 	int ret;
341 
342 	/* check common opts */
343 	ret = ml_test_opt_check(opt);
344 	if (ret != 0)
345 		return ret;
346 
347 	/* check file availability */
348 	for (i = 0; i < opt->nb_filelist; i++) {
349 		if (access(opt->filelist[i].model, F_OK) == -1) {
350 			ml_err("Model file not accessible: id = %u, file = %s", i,
351 			       opt->filelist[i].model);
352 			return -ENOENT;
353 		}
354 
355 		if (access(opt->filelist[i].input, F_OK) == -1) {
356 			ml_err("Input file not accessible: id = %u, file = %s", i,
357 			       opt->filelist[i].input);
358 			return -ENOENT;
359 		}
360 	}
361 
362 	if (opt->repetitions == 0) {
363 		ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
364 		return -EINVAL;
365 	}
366 
367 	if (opt->burst_size == 0) {
368 		ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
369 		return -EINVAL;
370 	}
371 
372 	if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
373 		ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
374 		       ML_TEST_MAX_POOL_SIZE);
375 		return -EINVAL;
376 	}
377 
378 	if (opt->queue_pairs == 0) {
379 		ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
380 		return -EINVAL;
381 	}
382 
383 	if (opt->queue_size == 0) {
384 		ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
385 		return -EINVAL;
386 	}
387 
388 	/* check number of available lcores. */
389 	if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
390 		ml_err("Insufficient lcores = %u\n", rte_lcore_count());
391 		ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
392 		       (opt->queue_pairs * 2 + 1));
393 		return -EINVAL;
394 	}
395 
396 	return 0;
397 }
398 
399 void
400 test_inference_opt_dump(struct ml_options *opt)
401 {
402 	uint32_t i;
403 
404 	/* dump common opts */
405 	ml_test_opt_dump(opt);
406 
407 	/* dump test opts */
408 	ml_dump("repetitions", "%" PRIu64, opt->repetitions);
409 	ml_dump("burst_size", "%u", opt->burst_size);
410 	ml_dump("queue_pairs", "%u", opt->queue_pairs);
411 	ml_dump("queue_size", "%u", opt->queue_size);
412 	ml_dump("tolerance", "%-7.3f", opt->tolerance);
413 	ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
414 
415 	if (opt->batches == 0)
416 		ml_dump("batches", "%u (default)", opt->batches);
417 	else
418 		ml_dump("batches", "%u", opt->batches);
419 
420 	ml_dump_begin("filelist");
421 	for (i = 0; i < opt->nb_filelist; i++) {
422 		ml_dump_list("model", i, opt->filelist[i].model);
423 		ml_dump_list("input", i, opt->filelist[i].input);
424 		ml_dump_list("output", i, opt->filelist[i].output);
425 		if (strcmp(opt->filelist[i].reference, "\0") != 0)
426 			ml_dump_list("reference", i, opt->filelist[i].reference);
427 	}
428 	ml_dump_end;
429 }
430 
431 int
432 test_inference_setup(struct ml_test *test, struct ml_options *opt)
433 {
434 	struct test_inference *t;
435 	void *test_inference;
436 	uint32_t lcore_id;
437 	int ret = 0;
438 	uint32_t i;
439 
440 	test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
441 					    RTE_CACHE_LINE_SIZE, opt->socket_id);
442 	if (test_inference == NULL) {
443 		ml_err("failed to allocate memory for test_model");
444 		ret = -ENOMEM;
445 		goto error;
446 	}
447 	test->test_priv = test_inference;
448 	t = ml_test_priv(test);
449 
450 	t->nb_used = 0;
451 	t->nb_valid = 0;
452 	t->cmn.result = ML_TEST_FAILED;
453 	t->cmn.opt = opt;
454 	memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
455 
456 	/* get device info */
457 	ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
458 	if (ret < 0) {
459 		ml_err("failed to get device info");
460 		goto error;
461 	}
462 
463 	if (opt->burst_size == 1) {
464 		t->enqueue = ml_enqueue_single;
465 		t->dequeue = ml_dequeue_single;
466 	} else {
467 		t->enqueue = ml_enqueue_burst;
468 		t->dequeue = ml_dequeue_burst;
469 	}
470 
471 	/* set model initial state */
472 	for (i = 0; i < opt->nb_filelist; i++)
473 		t->model[i].state = MODEL_INITIAL;
474 
475 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
476 		t->args[lcore_id].enq_ops = rte_zmalloc_socket(
477 			"ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
478 			RTE_CACHE_LINE_SIZE, opt->socket_id);
479 		t->args[lcore_id].deq_ops = rte_zmalloc_socket(
480 			"ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
481 			RTE_CACHE_LINE_SIZE, opt->socket_id);
482 		t->args[lcore_id].reqs = rte_zmalloc_socket(
483 			"ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
484 			RTE_CACHE_LINE_SIZE, opt->socket_id);
485 	}
486 
487 	for (i = 0; i < RTE_MAX_LCORE; i++) {
488 		t->args[i].start_cycles = 0;
489 		t->args[i].end_cycles = 0;
490 	}
491 
492 	return 0;
493 
494 error:
495 	if (test_inference != NULL)
496 		rte_free(test_inference);
497 
498 	return ret;
499 }
500 
501 void
502 test_inference_destroy(struct ml_test *test, struct ml_options *opt)
503 {
504 	struct test_inference *t;
505 
506 	RTE_SET_USED(opt);
507 
508 	t = ml_test_priv(test);
509 	if (t != NULL)
510 		rte_free(t);
511 }
512 
513 int
514 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
515 {
516 	struct rte_ml_dev_qp_conf qp_conf;
517 	struct test_inference *t;
518 	uint16_t qp_id;
519 	int ret;
520 
521 	t = ml_test_priv(test);
522 
523 	RTE_SET_USED(t);
524 
525 	ret = ml_test_device_configure(test, opt);
526 	if (ret != 0)
527 		return ret;
528 
529 	/* setup queue pairs */
530 	qp_conf.nb_desc = opt->queue_size;
531 	qp_conf.cb = NULL;
532 
533 	for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
534 		qp_conf.nb_desc = opt->queue_size;
535 		qp_conf.cb = NULL;
536 
537 		ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
538 		if (ret != 0) {
539 			ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
540 			       opt->dev_id, qp_id);
541 			return ret;
542 		}
543 	}
544 
545 	ret = ml_test_device_start(test, opt);
546 	if (ret != 0)
547 		goto error;
548 
549 	return 0;
550 
551 error:
552 	ml_test_device_close(test, opt);
553 
554 	return ret;
555 }
556 
557 int
558 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
559 {
560 	int ret;
561 
562 	ret = ml_test_device_stop(test, opt);
563 	if (ret != 0)
564 		goto error;
565 
566 	ret = ml_test_device_close(test, opt);
567 	if (ret != 0)
568 		return ret;
569 
570 	return 0;
571 
572 error:
573 	ml_test_device_close(test, opt);
574 
575 	return ret;
576 }
577 
578 /* Callback for IO pool create. This function would compute the fields of ml_request
579  * structure and prepare the quantized input data.
580  */
581 static void
582 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
583 {
584 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
585 	struct ml_request *req = (struct ml_request *)obj;
586 
587 	RTE_SET_USED(mp);
588 	RTE_SET_USED(obj_idx);
589 
590 	req->input = (uint8_t *)obj +
591 		     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size);
592 	req->output = req->input +
593 		      RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size);
594 	req->niters = 0;
595 
596 	/* quantize data */
597 	rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches,
598 			   t->model[t->fid].input, req->input);
599 }
600 
601 int
602 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
603 {
604 	struct test_inference *t = ml_test_priv(test);
605 	char mz_name[RTE_MEMZONE_NAMESIZE];
606 	char mp_name[RTE_MEMPOOL_NAMESIZE];
607 	const struct rte_memzone *mz;
608 	uint64_t nb_buffers;
609 	uint32_t buff_size;
610 	uint32_t mz_size;
611 	uint32_t fsize;
612 	FILE *fp;
613 	int ret;
614 
615 	/* get input buffer size */
616 	ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
617 				       &t->model[fid].inp_qsize, &t->model[fid].inp_dsize);
618 	if (ret != 0) {
619 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
620 		return ret;
621 	}
622 
623 	/* get output buffer size */
624 	ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
625 					&t->model[fid].out_qsize, &t->model[fid].out_dsize);
626 	if (ret != 0) {
627 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
628 		return ret;
629 	}
630 
631 	/* allocate buffer for user data */
632 	mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
633 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
634 		mz_size += t->model[fid].out_dsize;
635 
636 	sprintf(mz_name, "ml_user_data_%d", fid);
637 	mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
638 	if (mz == NULL) {
639 		ml_err("Memzone allocation failed for ml_user_data\n");
640 		ret = -ENOMEM;
641 		goto error;
642 	}
643 
644 	t->model[fid].input = mz->addr;
645 	t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
646 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
647 		t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
648 	else
649 		t->model[fid].reference = NULL;
650 
651 	/* load input file */
652 	fp = fopen(opt->filelist[fid].input, "r");
653 	if (fp == NULL) {
654 		ml_err("Failed to open input file : %s\n", opt->filelist[fid].input);
655 		ret = -errno;
656 		goto error;
657 	}
658 
659 	fseek(fp, 0, SEEK_END);
660 	fsize = ftell(fp);
661 	fseek(fp, 0, SEEK_SET);
662 	if (fsize != t->model[fid].inp_dsize) {
663 		ml_err("Invalid input file, size = %u (expected size = %" PRIu64 ")\n", fsize,
664 		       t->model[fid].inp_dsize);
665 		ret = -EINVAL;
666 		fclose(fp);
667 		goto error;
668 	}
669 
670 	if (fread(t->model[fid].input, 1, t->model[fid].inp_dsize, fp) != t->model[fid].inp_dsize) {
671 		ml_err("Failed to read input file : %s\n", opt->filelist[fid].input);
672 		ret = -errno;
673 		fclose(fp);
674 		goto error;
675 	}
676 	fclose(fp);
677 
678 	/* load reference file */
679 	if (t->model[fid].reference != NULL) {
680 		fp = fopen(opt->filelist[fid].reference, "r");
681 		if (fp == NULL) {
682 			ml_err("Failed to open reference file : %s\n",
683 			       opt->filelist[fid].reference);
684 			ret = -errno;
685 			goto error;
686 		}
687 
688 		if (fread(t->model[fid].reference, 1, t->model[fid].out_dsize, fp) !=
689 		    t->model[fid].out_dsize) {
690 			ml_err("Failed to read reference file : %s\n",
691 			       opt->filelist[fid].reference);
692 			ret = -errno;
693 			fclose(fp);
694 			goto error;
695 		}
696 		fclose(fp);
697 	}
698 
699 	/* create mempool for quantized input and output buffers. ml_request_initialize is
700 	 * used as a callback for object creation.
701 	 */
702 	buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) +
703 		    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) +
704 		    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size);
705 	nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
706 
707 	t->fid = fid;
708 	sprintf(mp_name, "ml_io_pool_%d", fid);
709 	t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
710 						   ml_request_initialize, test, opt->socket_id, 0);
711 	if (t->model[fid].io_pool == NULL) {
712 		ml_err("Failed to create io pool : %s\n", "ml_io_pool");
713 		ret = -ENOMEM;
714 		goto error;
715 	}
716 
717 	return 0;
718 
719 error:
720 	if (mz != NULL)
721 		rte_memzone_free(mz);
722 
723 	if (t->model[fid].io_pool != NULL) {
724 		rte_mempool_free(t->model[fid].io_pool);
725 		t->model[fid].io_pool = NULL;
726 	}
727 
728 	return ret;
729 }
730 
731 void
732 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
733 {
734 	char mz_name[RTE_MEMZONE_NAMESIZE];
735 	char mp_name[RTE_MEMPOOL_NAMESIZE];
736 	const struct rte_memzone *mz;
737 	struct rte_mempool *mp;
738 
739 	RTE_SET_USED(test);
740 	RTE_SET_USED(opt);
741 
742 	/* release user data memzone */
743 	sprintf(mz_name, "ml_user_data_%d", fid);
744 	mz = rte_memzone_lookup(mz_name);
745 	if (mz != NULL)
746 		rte_memzone_free(mz);
747 
748 	/* destroy io pool */
749 	sprintf(mp_name, "ml_io_pool_%d", fid);
750 	mp = rte_mempool_lookup(mp_name);
751 	if (mp != NULL)
752 		rte_mempool_free(mp);
753 }
754 
755 int
756 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
757 {
758 	struct test_inference *t = ml_test_priv(test);
759 
760 	/* create op pool */
761 	t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
762 					   opt->socket_id);
763 	if (t->op_pool == NULL) {
764 		ml_err("Failed to create op pool : %s\n", "ml_op_pool");
765 		return -ENOMEM;
766 	}
767 
768 	return 0;
769 }
770 
771 void
772 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
773 {
774 	struct test_inference *t = ml_test_priv(test);
775 
776 	RTE_SET_USED(opt);
777 
778 	/* release op pool */
779 	if (t->op_pool != NULL)
780 		rte_mempool_free(t->op_pool);
781 }
782 
783 static bool
784 ml_inference_validation(struct ml_test *test, struct ml_request *req)
785 {
786 	struct test_inference *t = ml_test_priv((struct ml_test *)test);
787 	struct ml_model *model;
788 	uint32_t nb_elements;
789 	uint8_t *reference;
790 	uint8_t *output;
791 	bool match;
792 	uint32_t i;
793 	uint32_t j;
794 
795 	model = &t->model[req->fid];
796 
797 	/* compare crc when tolerance is 0 */
798 	if (t->cmn.opt->tolerance == 0.0) {
799 		match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
800 			 rte_hash_crc(model->reference, model->out_dsize, 0));
801 	} else {
802 		output = model->output;
803 		reference = model->reference;
804 
805 		i = 0;
806 next_output:
807 		nb_elements =
808 			model->info.output_info[i].shape.w * model->info.output_info[i].shape.x *
809 			model->info.output_info[i].shape.y * model->info.output_info[i].shape.z;
810 		j = 0;
811 next_element:
812 		match = false;
813 		switch (model->info.output_info[i].dtype) {
814 		case RTE_ML_IO_TYPE_INT8:
815 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t),
816 						 ML_TEST_READ_TYPE(reference, int8_t),
817 						 t->cmn.opt->tolerance))
818 				match = true;
819 
820 			output += sizeof(int8_t);
821 			reference += sizeof(int8_t);
822 			break;
823 		case RTE_ML_IO_TYPE_UINT8:
824 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t),
825 						 ML_TEST_READ_TYPE(reference, uint8_t),
826 						 t->cmn.opt->tolerance))
827 				match = true;
828 
829 			output += sizeof(float);
830 			reference += sizeof(float);
831 			break;
832 		case RTE_ML_IO_TYPE_INT16:
833 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t),
834 						 ML_TEST_READ_TYPE(reference, int16_t),
835 						 t->cmn.opt->tolerance))
836 				match = true;
837 
838 			output += sizeof(int16_t);
839 			reference += sizeof(int16_t);
840 			break;
841 		case RTE_ML_IO_TYPE_UINT16:
842 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t),
843 						 ML_TEST_READ_TYPE(reference, uint16_t),
844 						 t->cmn.opt->tolerance))
845 				match = true;
846 
847 			output += sizeof(uint16_t);
848 			reference += sizeof(uint16_t);
849 			break;
850 		case RTE_ML_IO_TYPE_INT32:
851 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t),
852 						 ML_TEST_READ_TYPE(reference, int32_t),
853 						 t->cmn.opt->tolerance))
854 				match = true;
855 
856 			output += sizeof(int32_t);
857 			reference += sizeof(int32_t);
858 			break;
859 		case RTE_ML_IO_TYPE_UINT32:
860 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t),
861 						 ML_TEST_READ_TYPE(reference, uint32_t),
862 						 t->cmn.opt->tolerance))
863 				match = true;
864 
865 			output += sizeof(uint32_t);
866 			reference += sizeof(uint32_t);
867 			break;
868 		case RTE_ML_IO_TYPE_FP32:
869 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float),
870 						 ML_TEST_READ_TYPE(reference, float),
871 						 t->cmn.opt->tolerance))
872 				match = true;
873 
874 			output += sizeof(float);
875 			reference += sizeof(float);
876 			break;
877 		default: /* other types, fp8, fp16, bfloat16 */
878 			match = true;
879 		}
880 
881 		if (!match)
882 			goto done;
883 		j++;
884 		if (j < nb_elements)
885 			goto next_element;
886 
887 		i++;
888 		if (i < model->info.nb_outputs)
889 			goto next_output;
890 	}
891 done:
892 	if (match)
893 		t->nb_valid++;
894 
895 	return match;
896 }
897 
898 /* Callback for mempool object iteration. This call would dequantize output data. */
899 static void
900 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
901 {
902 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
903 	struct ml_request *req = (struct ml_request *)obj;
904 	struct ml_model *model = &t->model[req->fid];
905 	bool error = false;
906 	char *dump_path;
907 
908 	RTE_SET_USED(mp);
909 
910 	if (req->niters == 0)
911 		return;
912 
913 	t->nb_used++;
914 	rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches,
915 			     req->output, model->output);
916 
917 	if (model->reference == NULL) {
918 		t->nb_valid++;
919 		goto dump_output_pass;
920 	}
921 
922 	if (!ml_inference_validation(opaque, req))
923 		goto dump_output_fail;
924 	else
925 		goto dump_output_pass;
926 
927 dump_output_pass:
928 	if (obj_idx == 0) {
929 		/* write quantized output */
930 		if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1)
931 			return;
932 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
933 		free(dump_path);
934 		if (error)
935 			return;
936 
937 		/* write dequantized output */
938 		if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1)
939 			return;
940 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
941 		free(dump_path);
942 		if (error)
943 			return;
944 	}
945 
946 	return;
947 
948 dump_output_fail:
949 	if (t->cmn.opt->debug) {
950 		/* dump quantized output buffer */
951 		if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output,
952 				obj_idx) == -1)
953 			return;
954 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
955 		free(dump_path);
956 		if (error)
957 			return;
958 
959 		/* dump dequantized output buffer */
960 		if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output,
961 				obj_idx) == -1)
962 			return;
963 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
964 		free(dump_path);
965 		if (error)
966 			return;
967 	}
968 }
969 
970 int
971 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
972 {
973 	struct test_inference *t = ml_test_priv(test);
974 	uint64_t error_count = 0;
975 	uint32_t i;
976 
977 	RTE_SET_USED(opt);
978 
979 	/* check for errors */
980 	for (i = 0; i < RTE_MAX_LCORE; i++)
981 		error_count += t->error_count[i];
982 
983 	rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
984 
985 	if ((t->nb_used == t->nb_valid) && (error_count == 0))
986 		t->cmn.result = ML_TEST_SUCCESS;
987 	else
988 		t->cmn.result = ML_TEST_FAILED;
989 
990 	return t->cmn.result;
991 }
992 
993 int
994 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
995 			  uint16_t end_fid)
996 {
997 	struct test_inference *t = ml_test_priv(test);
998 	uint32_t lcore_id;
999 	uint32_t nb_reqs;
1000 	uint32_t id = 0;
1001 	uint32_t qp_id;
1002 
1003 	nb_reqs = opt->repetitions / opt->queue_pairs;
1004 
1005 	RTE_LCORE_FOREACH_WORKER(lcore_id)
1006 	{
1007 		if (id >= opt->queue_pairs * 2)
1008 			break;
1009 
1010 		qp_id = id / 2;
1011 		t->args[lcore_id].qp_id = qp_id;
1012 		t->args[lcore_id].nb_reqs = nb_reqs;
1013 		if (qp_id == 0)
1014 			t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
1015 
1016 		if (t->args[lcore_id].nb_reqs == 0) {
1017 			id++;
1018 			break;
1019 		}
1020 
1021 		t->args[lcore_id].start_fid = start_fid;
1022 		t->args[lcore_id].end_fid = end_fid;
1023 
1024 		if (id % 2 == 0)
1025 			rte_eal_remote_launch(t->enqueue, test, lcore_id);
1026 		else
1027 			rte_eal_remote_launch(t->dequeue, test, lcore_id);
1028 
1029 		id++;
1030 	}
1031 
1032 	return 0;
1033 }
1034 
1035 int
1036 ml_inference_stats_get(struct ml_test *test, struct ml_options *opt)
1037 {
1038 	struct test_inference *t = ml_test_priv(test);
1039 	uint64_t total_cycles = 0;
1040 	uint32_t nb_filelist;
1041 	uint64_t throughput;
1042 	uint64_t avg_e2e;
1043 	uint32_t qp_id;
1044 	uint64_t freq;
1045 	int ret;
1046 	int i;
1047 
1048 	if (!opt->stats)
1049 		return 0;
1050 
1051 	/* get xstats size */
1052 	t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0);
1053 	if (t->xstats_size >= 0) {
1054 		/* allocate for xstats_map and values */
1055 		t->xstats_map = rte_malloc(
1056 			"ml_xstats_map", t->xstats_size * sizeof(struct rte_ml_dev_xstats_map), 0);
1057 		if (t->xstats_map == NULL) {
1058 			ret = -ENOMEM;
1059 			goto error;
1060 		}
1061 
1062 		t->xstats_values =
1063 			rte_malloc("ml_xstats_values", t->xstats_size * sizeof(uint64_t), 0);
1064 		if (t->xstats_values == NULL) {
1065 			ret = -ENOMEM;
1066 			goto error;
1067 		}
1068 
1069 		ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, t->xstats_size);
1070 		if (ret != t->xstats_size) {
1071 			printf("Unable to get xstats names, ret = %d\n", ret);
1072 			ret = -1;
1073 			goto error;
1074 		}
1075 
1076 		for (i = 0; i < t->xstats_size; i++)
1077 			rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id,
1078 					      &t->xstats_values[i], 1);
1079 	}
1080 
1081 	/* print xstats*/
1082 	printf("\n");
1083 	print_line(80);
1084 	printf(" ML Device Extended Statistics\n");
1085 	print_line(80);
1086 	for (i = 0; i < t->xstats_size; i++)
1087 		printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, t->xstats_values[i]);
1088 	print_line(80);
1089 
1090 	/* release buffers */
1091 	if (t->xstats_map)
1092 		rte_free(t->xstats_map);
1093 
1094 	if (t->xstats_values)
1095 		rte_free(t->xstats_values);
1096 
1097 	/* print end-to-end stats */
1098 	freq = rte_get_tsc_hz();
1099 	for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++)
1100 		total_cycles += t->args[qp_id].end_cycles - t->args[qp_id].start_cycles;
1101 	avg_e2e = total_cycles / opt->repetitions;
1102 
1103 	if (freq == 0) {
1104 		avg_e2e = total_cycles / opt->repetitions;
1105 		printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (cycles)", avg_e2e);
1106 	} else {
1107 		avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq);
1108 		printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (ns)", avg_e2e);
1109 	}
1110 
1111 	/* print inference throughput */
1112 	if (strcmp(opt->test_name, "inference_ordered") == 0)
1113 		nb_filelist = 1;
1114 	else
1115 		nb_filelist = opt->nb_filelist;
1116 
1117 	if (freq == 0) {
1118 		throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) / total_cycles;
1119 		printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / million cycles)",
1120 		       throughput);
1121 	} else {
1122 		throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / total_cycles;
1123 		printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / second)",
1124 		       throughput);
1125 	}
1126 
1127 	print_line(80);
1128 
1129 	return 0;
1130 
1131 error:
1132 	if (t->xstats_map)
1133 		rte_free(t->xstats_map);
1134 
1135 	if (t->xstats_values)
1136 		rte_free(t->xstats_values);
1137 
1138 	return ret;
1139 }
1140