xref: /dpdk/app/test-mldev/test_inference_common.c (revision 2bf48044dca1892e571fd4964eecaacf6cb0c1c2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include <errno.h>
6 #include <unistd.h>
7 
8 #include <rte_common.h>
9 #include <rte_cycles.h>
10 #include <rte_hash_crc.h>
11 #include <rte_launch.h>
12 #include <rte_lcore.h>
13 #include <rte_malloc.h>
14 #include <rte_memzone.h>
15 #include <rte_mldev.h>
16 
17 #include "ml_common.h"
18 #include "test_inference_common.h"
19 
20 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer))
21 
22 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance)                                         \
23 	(((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0))
24 
25 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err)                                             \
26 	do {                                                                                       \
27 		FILE *fp = fopen(name, "w+");                                                      \
28 		if (fp == NULL) {                                                                  \
29 			ml_err("Unable to create file: %s, error: %s", name, strerror(errno));     \
30 			err = true;                                                                \
31 		} else {                                                                           \
32 			if (fwrite(buffer, 1, size, fp) != size) {                                 \
33 				ml_err("Error writing output, file: %s, error: %s", name,          \
34 				       strerror(errno));                                           \
35 				err = true;                                                        \
36 			}                                                                          \
37 			fclose(fp);                                                                \
38 		}                                                                                  \
39 	} while (0)
40 
41 static void
42 print_line(uint16_t len)
43 {
44 	uint16_t i;
45 
46 	for (i = 0; i < len; i++)
47 		printf("-");
48 
49 	printf("\n");
50 }
51 
52 /* Enqueue inference requests with burst size equal to 1 */
53 static int
54 ml_enqueue_single(void *arg)
55 {
56 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
57 	struct ml_request *req = NULL;
58 	struct rte_ml_op *op = NULL;
59 	struct ml_core_args *args;
60 	uint64_t model_enq = 0;
61 	uint64_t start_cycle;
62 	uint32_t burst_enq;
63 	uint32_t lcore_id;
64 	uint16_t fid;
65 	int ret;
66 
67 	lcore_id = rte_lcore_id();
68 	args = &t->args[lcore_id];
69 	args->start_cycles = 0;
70 	model_enq = 0;
71 
72 	if (args->nb_reqs == 0)
73 		return 0;
74 
75 next_rep:
76 	fid = args->start_fid;
77 
78 next_model:
79 	ret = rte_mempool_get(t->op_pool, (void **)&op);
80 	if (ret != 0)
81 		goto next_model;
82 
83 retry:
84 	ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
85 	if (ret != 0)
86 		goto retry;
87 
88 	op->model_id = t->model[fid].id;
89 	op->nb_batches = t->model[fid].nb_batches;
90 	op->mempool = t->op_pool;
91 
92 	op->input.addr = req->input;
93 	op->input.length = t->model[fid].inp_qsize;
94 	op->input.next = NULL;
95 
96 	op->output.addr = req->output;
97 	op->output.length = t->model[fid].out_qsize;
98 	op->output.next = NULL;
99 
100 	op->user_ptr = req;
101 	req->niters++;
102 	req->fid = fid;
103 
104 enqueue_req:
105 	start_cycle = rte_get_tsc_cycles();
106 	burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
107 	if (burst_enq == 0)
108 		goto enqueue_req;
109 
110 	args->start_cycles += start_cycle;
111 	fid++;
112 	if (likely(fid <= args->end_fid))
113 		goto next_model;
114 
115 	model_enq++;
116 	if (likely(model_enq < args->nb_reqs))
117 		goto next_rep;
118 
119 	return 0;
120 }
121 
122 /* Dequeue inference requests with burst size equal to 1 */
123 static int
124 ml_dequeue_single(void *arg)
125 {
126 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
127 	struct rte_ml_op_error error;
128 	struct rte_ml_op *op = NULL;
129 	struct ml_core_args *args;
130 	struct ml_request *req;
131 	uint64_t total_deq = 0;
132 	uint8_t nb_filelist;
133 	uint32_t burst_deq;
134 	uint64_t end_cycle;
135 	uint32_t lcore_id;
136 
137 	lcore_id = rte_lcore_id();
138 	args = &t->args[lcore_id];
139 	args->end_cycles = 0;
140 	nb_filelist = args->end_fid - args->start_fid + 1;
141 
142 	if (args->nb_reqs == 0)
143 		return 0;
144 
145 dequeue_req:
146 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
147 	end_cycle = rte_get_tsc_cycles();
148 
149 	if (likely(burst_deq == 1)) {
150 		total_deq += burst_deq;
151 		args->end_cycles += end_cycle;
152 		if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
153 			rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
154 			ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
155 			       error.message);
156 			t->error_count[lcore_id]++;
157 		}
158 		req = (struct ml_request *)op->user_ptr;
159 		rte_mempool_put(t->model[req->fid].io_pool, req);
160 		rte_mempool_put(t->op_pool, op);
161 	}
162 
163 	if (likely(total_deq < args->nb_reqs * nb_filelist))
164 		goto dequeue_req;
165 
166 	return 0;
167 }
168 
169 /* Enqueue inference requests with burst size greater than 1 */
170 static int
171 ml_enqueue_burst(void *arg)
172 {
173 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
174 	struct ml_core_args *args;
175 	uint64_t start_cycle;
176 	uint16_t ops_count;
177 	uint64_t model_enq;
178 	uint16_t burst_enq;
179 	uint32_t lcore_id;
180 	uint16_t pending;
181 	uint16_t idx;
182 	uint16_t fid;
183 	uint16_t i;
184 	int ret;
185 
186 	lcore_id = rte_lcore_id();
187 	args = &t->args[lcore_id];
188 	args->start_cycles = 0;
189 	model_enq = 0;
190 
191 	if (args->nb_reqs == 0)
192 		return 0;
193 
194 next_rep:
195 	fid = args->start_fid;
196 
197 next_model:
198 	ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
199 	ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
200 	if (ret != 0)
201 		goto next_model;
202 
203 retry:
204 	ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
205 	if (ret != 0)
206 		goto retry;
207 
208 	for (i = 0; i < ops_count; i++) {
209 		args->enq_ops[i]->model_id = t->model[fid].id;
210 		args->enq_ops[i]->nb_batches = t->model[fid].nb_batches;
211 		args->enq_ops[i]->mempool = t->op_pool;
212 
213 		args->enq_ops[i]->input.addr = args->reqs[i]->input;
214 		args->enq_ops[i]->input.length = t->model[fid].inp_qsize;
215 		args->enq_ops[i]->input.next = NULL;
216 
217 		args->enq_ops[i]->output.addr = args->reqs[i]->output;
218 		args->enq_ops[i]->output.length = t->model[fid].out_qsize;
219 		args->enq_ops[i]->output.next = NULL;
220 
221 		args->enq_ops[i]->user_ptr = args->reqs[i];
222 		args->reqs[i]->niters++;
223 		args->reqs[i]->fid = fid;
224 	}
225 
226 	idx = 0;
227 	pending = ops_count;
228 
229 enqueue_reqs:
230 	start_cycle = rte_get_tsc_cycles();
231 	burst_enq =
232 		rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
233 	args->start_cycles += burst_enq * start_cycle;
234 	pending = pending - burst_enq;
235 
236 	if (pending > 0) {
237 		idx = idx + burst_enq;
238 		goto enqueue_reqs;
239 	}
240 
241 	fid++;
242 	if (fid <= args->end_fid)
243 		goto next_model;
244 
245 	model_enq = model_enq + ops_count;
246 	if (model_enq < args->nb_reqs)
247 		goto next_rep;
248 
249 	return 0;
250 }
251 
252 /* Dequeue inference requests with burst size greater than 1 */
253 static int
254 ml_dequeue_burst(void *arg)
255 {
256 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
257 	struct rte_ml_op_error error;
258 	struct ml_core_args *args;
259 	struct ml_request *req;
260 	uint64_t total_deq = 0;
261 	uint16_t burst_deq = 0;
262 	uint8_t nb_filelist;
263 	uint64_t end_cycle;
264 	uint32_t lcore_id;
265 	uint32_t i;
266 
267 	lcore_id = rte_lcore_id();
268 	args = &t->args[lcore_id];
269 	args->end_cycles = 0;
270 	nb_filelist = args->end_fid - args->start_fid + 1;
271 
272 	if (args->nb_reqs == 0)
273 		return 0;
274 
275 dequeue_burst:
276 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
277 					 t->cmn.opt->burst_size);
278 	end_cycle = rte_get_tsc_cycles();
279 
280 	if (likely(burst_deq > 0)) {
281 		total_deq += burst_deq;
282 		args->end_cycles += burst_deq * end_cycle;
283 
284 		for (i = 0; i < burst_deq; i++) {
285 			if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
286 				rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
287 				ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
288 				       error.errcode, error.message);
289 				t->error_count[lcore_id]++;
290 			}
291 			req = (struct ml_request *)args->deq_ops[i]->user_ptr;
292 			if (req != NULL)
293 				rte_mempool_put(t->model[req->fid].io_pool, req);
294 		}
295 		rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
296 	}
297 
298 	if (total_deq < args->nb_reqs * nb_filelist)
299 		goto dequeue_burst;
300 
301 	return 0;
302 }
303 
304 bool
305 test_inference_cap_check(struct ml_options *opt)
306 {
307 	struct rte_ml_dev_info dev_info;
308 
309 	if (!ml_test_cap_check(opt))
310 		return false;
311 
312 	rte_ml_dev_info_get(opt->dev_id, &dev_info);
313 
314 	if (opt->queue_pairs > dev_info.max_queue_pairs) {
315 		ml_err("Insufficient capabilities: queue_pairs = %u, max_queue_pairs = %u",
316 		       opt->queue_pairs, dev_info.max_queue_pairs);
317 		return false;
318 	}
319 
320 	if (opt->queue_size > dev_info.max_desc) {
321 		ml_err("Insufficient capabilities: queue_size = %u, max_desc = %u", opt->queue_size,
322 		       dev_info.max_desc);
323 		return false;
324 	}
325 
326 	if (opt->nb_filelist > dev_info.max_models) {
327 		ml_err("Insufficient capabilities:  Filelist count exceeded device limit, count = %u (max limit = %u)",
328 		       opt->nb_filelist, dev_info.max_models);
329 		return false;
330 	}
331 
332 	return true;
333 }
334 
335 int
336 test_inference_opt_check(struct ml_options *opt)
337 {
338 	uint32_t i;
339 	int ret;
340 
341 	/* check common opts */
342 	ret = ml_test_opt_check(opt);
343 	if (ret != 0)
344 		return ret;
345 
346 	/* check file availability */
347 	for (i = 0; i < opt->nb_filelist; i++) {
348 		if (access(opt->filelist[i].model, F_OK) == -1) {
349 			ml_err("Model file not accessible: id = %u, file = %s", i,
350 			       opt->filelist[i].model);
351 			return -ENOENT;
352 		}
353 
354 		if (access(opt->filelist[i].input, F_OK) == -1) {
355 			ml_err("Input file not accessible: id = %u, file = %s", i,
356 			       opt->filelist[i].input);
357 			return -ENOENT;
358 		}
359 	}
360 
361 	if (opt->repetitions == 0) {
362 		ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
363 		return -EINVAL;
364 	}
365 
366 	if (opt->burst_size == 0) {
367 		ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
368 		return -EINVAL;
369 	}
370 
371 	if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
372 		ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
373 		       ML_TEST_MAX_POOL_SIZE);
374 		return -EINVAL;
375 	}
376 
377 	if (opt->queue_pairs == 0) {
378 		ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
379 		return -EINVAL;
380 	}
381 
382 	if (opt->queue_size == 0) {
383 		ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
384 		return -EINVAL;
385 	}
386 
387 	/* check number of available lcores. */
388 	if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
389 		ml_err("Insufficient lcores = %u\n", rte_lcore_count());
390 		ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
391 		       (opt->queue_pairs * 2 + 1));
392 		return -EINVAL;
393 	}
394 
395 	return 0;
396 }
397 
398 void
399 test_inference_opt_dump(struct ml_options *opt)
400 {
401 	uint32_t i;
402 
403 	/* dump common opts */
404 	ml_test_opt_dump(opt);
405 
406 	/* dump test opts */
407 	ml_dump("repetitions", "%" PRIu64, opt->repetitions);
408 	ml_dump("burst_size", "%u", opt->burst_size);
409 	ml_dump("queue_pairs", "%u", opt->queue_pairs);
410 	ml_dump("queue_size", "%u", opt->queue_size);
411 	ml_dump("tolerance", "%-7.3f", opt->tolerance);
412 	ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
413 
414 	if (opt->batches == 0)
415 		ml_dump("batches", "%u (default)", opt->batches);
416 	else
417 		ml_dump("batches", "%u", opt->batches);
418 
419 	ml_dump_begin("filelist");
420 	for (i = 0; i < opt->nb_filelist; i++) {
421 		ml_dump_list("model", i, opt->filelist[i].model);
422 		ml_dump_list("input", i, opt->filelist[i].input);
423 		ml_dump_list("output", i, opt->filelist[i].output);
424 		if (strcmp(opt->filelist[i].reference, "\0") != 0)
425 			ml_dump_list("reference", i, opt->filelist[i].reference);
426 	}
427 	ml_dump_end;
428 }
429 
430 int
431 test_inference_setup(struct ml_test *test, struct ml_options *opt)
432 {
433 	struct test_inference *t;
434 	void *test_inference;
435 	uint32_t lcore_id;
436 	int ret = 0;
437 	uint32_t i;
438 
439 	test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
440 					    RTE_CACHE_LINE_SIZE, opt->socket_id);
441 	if (test_inference == NULL) {
442 		ml_err("failed to allocate memory for test_model");
443 		ret = -ENOMEM;
444 		goto error;
445 	}
446 	test->test_priv = test_inference;
447 	t = ml_test_priv(test);
448 
449 	t->nb_used = 0;
450 	t->nb_valid = 0;
451 	t->cmn.result = ML_TEST_FAILED;
452 	t->cmn.opt = opt;
453 	memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
454 
455 	/* get device info */
456 	ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
457 	if (ret < 0) {
458 		ml_err("failed to get device info");
459 		goto error;
460 	}
461 
462 	if (opt->burst_size == 1) {
463 		t->enqueue = ml_enqueue_single;
464 		t->dequeue = ml_dequeue_single;
465 	} else {
466 		t->enqueue = ml_enqueue_burst;
467 		t->dequeue = ml_dequeue_burst;
468 	}
469 
470 	/* set model initial state */
471 	for (i = 0; i < opt->nb_filelist; i++)
472 		t->model[i].state = MODEL_INITIAL;
473 
474 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
475 		t->args[lcore_id].enq_ops = rte_zmalloc_socket(
476 			"ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
477 			RTE_CACHE_LINE_SIZE, opt->socket_id);
478 		t->args[lcore_id].deq_ops = rte_zmalloc_socket(
479 			"ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
480 			RTE_CACHE_LINE_SIZE, opt->socket_id);
481 		t->args[lcore_id].reqs = rte_zmalloc_socket(
482 			"ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
483 			RTE_CACHE_LINE_SIZE, opt->socket_id);
484 	}
485 
486 	for (i = 0; i < RTE_MAX_LCORE; i++) {
487 		t->args[i].start_cycles = 0;
488 		t->args[i].end_cycles = 0;
489 	}
490 
491 	return 0;
492 
493 error:
494 	if (test_inference != NULL)
495 		rte_free(test_inference);
496 
497 	return ret;
498 }
499 
500 void
501 test_inference_destroy(struct ml_test *test, struct ml_options *opt)
502 {
503 	struct test_inference *t;
504 
505 	RTE_SET_USED(opt);
506 
507 	t = ml_test_priv(test);
508 	if (t != NULL)
509 		rte_free(t);
510 }
511 
512 int
513 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
514 {
515 	struct rte_ml_dev_qp_conf qp_conf;
516 	struct test_inference *t;
517 	uint16_t qp_id;
518 	int ret;
519 
520 	t = ml_test_priv(test);
521 
522 	RTE_SET_USED(t);
523 
524 	ret = ml_test_device_configure(test, opt);
525 	if (ret != 0)
526 		return ret;
527 
528 	/* setup queue pairs */
529 	qp_conf.nb_desc = opt->queue_size;
530 	qp_conf.cb = NULL;
531 
532 	for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
533 		qp_conf.nb_desc = opt->queue_size;
534 		qp_conf.cb = NULL;
535 
536 		ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
537 		if (ret != 0) {
538 			ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
539 			       opt->dev_id, qp_id);
540 			return ret;
541 		}
542 	}
543 
544 	ret = ml_test_device_start(test, opt);
545 	if (ret != 0)
546 		goto error;
547 
548 	return 0;
549 
550 error:
551 	ml_test_device_close(test, opt);
552 
553 	return ret;
554 }
555 
556 int
557 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
558 {
559 	int ret;
560 
561 	ret = ml_test_device_stop(test, opt);
562 	if (ret != 0)
563 		goto error;
564 
565 	ret = ml_test_device_close(test, opt);
566 	if (ret != 0)
567 		return ret;
568 
569 	return 0;
570 
571 error:
572 	ml_test_device_close(test, opt);
573 
574 	return ret;
575 }
576 
577 /* Callback for IO pool create. This function would compute the fields of ml_request
578  * structure and prepare the quantized input data.
579  */
580 static void
581 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
582 {
583 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
584 	struct ml_request *req = (struct ml_request *)obj;
585 
586 	RTE_SET_USED(mp);
587 	RTE_SET_USED(obj_idx);
588 
589 	req->input = (uint8_t *)obj +
590 		     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size);
591 	req->output = req->input +
592 		      RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size);
593 	req->niters = 0;
594 
595 	/* quantize data */
596 	rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches,
597 			   t->model[t->fid].input, req->input);
598 }
599 
600 int
601 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
602 {
603 	struct test_inference *t = ml_test_priv(test);
604 	char mz_name[RTE_MEMZONE_NAMESIZE];
605 	char mp_name[RTE_MEMPOOL_NAMESIZE];
606 	const struct rte_memzone *mz;
607 	uint64_t nb_buffers;
608 	uint32_t buff_size;
609 	uint32_t mz_size;
610 	uint32_t fsize;
611 	FILE *fp;
612 	int ret;
613 
614 	/* get input buffer size */
615 	ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
616 				       &t->model[fid].inp_qsize, &t->model[fid].inp_dsize);
617 	if (ret != 0) {
618 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
619 		return ret;
620 	}
621 
622 	/* get output buffer size */
623 	ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
624 					&t->model[fid].out_qsize, &t->model[fid].out_dsize);
625 	if (ret != 0) {
626 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
627 		return ret;
628 	}
629 
630 	/* allocate buffer for user data */
631 	mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
632 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
633 		mz_size += t->model[fid].out_dsize;
634 
635 	sprintf(mz_name, "ml_user_data_%d", fid);
636 	mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
637 	if (mz == NULL) {
638 		ml_err("Memzone allocation failed for ml_user_data\n");
639 		ret = -ENOMEM;
640 		goto error;
641 	}
642 
643 	t->model[fid].input = mz->addr;
644 	t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
645 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
646 		t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
647 	else
648 		t->model[fid].reference = NULL;
649 
650 	/* load input file */
651 	fp = fopen(opt->filelist[fid].input, "r");
652 	if (fp == NULL) {
653 		ml_err("Failed to open input file : %s\n", opt->filelist[fid].input);
654 		ret = -errno;
655 		goto error;
656 	}
657 
658 	fseek(fp, 0, SEEK_END);
659 	fsize = ftell(fp);
660 	fseek(fp, 0, SEEK_SET);
661 	if (fsize != t->model[fid].inp_dsize) {
662 		ml_err("Invalid input file, size = %u (expected size = %" PRIu64 ")\n", fsize,
663 		       t->model[fid].inp_dsize);
664 		ret = -EINVAL;
665 		fclose(fp);
666 		goto error;
667 	}
668 
669 	if (fread(t->model[fid].input, 1, t->model[fid].inp_dsize, fp) != t->model[fid].inp_dsize) {
670 		ml_err("Failed to read input file : %s\n", opt->filelist[fid].input);
671 		ret = -errno;
672 		fclose(fp);
673 		goto error;
674 	}
675 	fclose(fp);
676 
677 	/* load reference file */
678 	if (t->model[fid].reference != NULL) {
679 		fp = fopen(opt->filelist[fid].reference, "r");
680 		if (fp == NULL) {
681 			ml_err("Failed to open reference file : %s\n",
682 			       opt->filelist[fid].reference);
683 			ret = -errno;
684 			goto error;
685 		}
686 
687 		if (fread(t->model[fid].reference, 1, t->model[fid].out_dsize, fp) !=
688 		    t->model[fid].out_dsize) {
689 			ml_err("Failed to read reference file : %s\n",
690 			       opt->filelist[fid].reference);
691 			ret = -errno;
692 			fclose(fp);
693 			goto error;
694 		}
695 		fclose(fp);
696 	}
697 
698 	/* create mempool for quantized input and output buffers. ml_request_initialize is
699 	 * used as a callback for object creation.
700 	 */
701 	buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) +
702 		    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) +
703 		    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size);
704 	nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
705 
706 	t->fid = fid;
707 	sprintf(mp_name, "ml_io_pool_%d", fid);
708 	t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
709 						   ml_request_initialize, test, opt->socket_id, 0);
710 	if (t->model[fid].io_pool == NULL) {
711 		ml_err("Failed to create io pool : %s\n", "ml_io_pool");
712 		ret = -ENOMEM;
713 		goto error;
714 	}
715 
716 	return 0;
717 
718 error:
719 	if (mz != NULL)
720 		rte_memzone_free(mz);
721 
722 	if (t->model[fid].io_pool != NULL) {
723 		rte_mempool_free(t->model[fid].io_pool);
724 		t->model[fid].io_pool = NULL;
725 	}
726 
727 	return ret;
728 }
729 
730 void
731 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
732 {
733 	char mz_name[RTE_MEMZONE_NAMESIZE];
734 	char mp_name[RTE_MEMPOOL_NAMESIZE];
735 	const struct rte_memzone *mz;
736 	struct rte_mempool *mp;
737 
738 	RTE_SET_USED(test);
739 	RTE_SET_USED(opt);
740 
741 	/* release user data memzone */
742 	sprintf(mz_name, "ml_user_data_%d", fid);
743 	mz = rte_memzone_lookup(mz_name);
744 	if (mz != NULL)
745 		rte_memzone_free(mz);
746 
747 	/* destroy io pool */
748 	sprintf(mp_name, "ml_io_pool_%d", fid);
749 	mp = rte_mempool_lookup(mp_name);
750 	if (mp != NULL)
751 		rte_mempool_free(mp);
752 }
753 
754 int
755 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
756 {
757 	struct test_inference *t = ml_test_priv(test);
758 
759 	/* create op pool */
760 	t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
761 					   opt->socket_id);
762 	if (t->op_pool == NULL) {
763 		ml_err("Failed to create op pool : %s\n", "ml_op_pool");
764 		return -ENOMEM;
765 	}
766 
767 	return 0;
768 }
769 
770 void
771 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
772 {
773 	struct test_inference *t = ml_test_priv(test);
774 
775 	RTE_SET_USED(opt);
776 
777 	/* release op pool */
778 	if (t->op_pool != NULL)
779 		rte_mempool_free(t->op_pool);
780 }
781 
782 static bool
783 ml_inference_validation(struct ml_test *test, struct ml_request *req)
784 {
785 	struct test_inference *t = ml_test_priv((struct ml_test *)test);
786 	struct ml_model *model;
787 	uint32_t nb_elements;
788 	uint8_t *reference;
789 	uint8_t *output;
790 	bool match;
791 	uint32_t i;
792 	uint32_t j;
793 
794 	model = &t->model[req->fid];
795 
796 	/* compare crc when tolerance is 0 */
797 	if (t->cmn.opt->tolerance == 0.0) {
798 		match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
799 			 rte_hash_crc(model->reference, model->out_dsize, 0));
800 	} else {
801 		output = model->output;
802 		reference = model->reference;
803 
804 		i = 0;
805 next_output:
806 		nb_elements =
807 			model->info.output_info[i].shape.w * model->info.output_info[i].shape.x *
808 			model->info.output_info[i].shape.y * model->info.output_info[i].shape.z;
809 		j = 0;
810 next_element:
811 		match = false;
812 		switch (model->info.output_info[i].dtype) {
813 		case RTE_ML_IO_TYPE_INT8:
814 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t),
815 						 ML_TEST_READ_TYPE(reference, int8_t),
816 						 t->cmn.opt->tolerance))
817 				match = true;
818 
819 			output += sizeof(int8_t);
820 			reference += sizeof(int8_t);
821 			break;
822 		case RTE_ML_IO_TYPE_UINT8:
823 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t),
824 						 ML_TEST_READ_TYPE(reference, uint8_t),
825 						 t->cmn.opt->tolerance))
826 				match = true;
827 
828 			output += sizeof(float);
829 			reference += sizeof(float);
830 			break;
831 		case RTE_ML_IO_TYPE_INT16:
832 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t),
833 						 ML_TEST_READ_TYPE(reference, int16_t),
834 						 t->cmn.opt->tolerance))
835 				match = true;
836 
837 			output += sizeof(int16_t);
838 			reference += sizeof(int16_t);
839 			break;
840 		case RTE_ML_IO_TYPE_UINT16:
841 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t),
842 						 ML_TEST_READ_TYPE(reference, uint16_t),
843 						 t->cmn.opt->tolerance))
844 				match = true;
845 
846 			output += sizeof(uint16_t);
847 			reference += sizeof(uint16_t);
848 			break;
849 		case RTE_ML_IO_TYPE_INT32:
850 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t),
851 						 ML_TEST_READ_TYPE(reference, int32_t),
852 						 t->cmn.opt->tolerance))
853 				match = true;
854 
855 			output += sizeof(int32_t);
856 			reference += sizeof(int32_t);
857 			break;
858 		case RTE_ML_IO_TYPE_UINT32:
859 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t),
860 						 ML_TEST_READ_TYPE(reference, uint32_t),
861 						 t->cmn.opt->tolerance))
862 				match = true;
863 
864 			output += sizeof(uint32_t);
865 			reference += sizeof(uint32_t);
866 			break;
867 		case RTE_ML_IO_TYPE_FP32:
868 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float),
869 						 ML_TEST_READ_TYPE(reference, float),
870 						 t->cmn.opt->tolerance))
871 				match = true;
872 
873 			output += sizeof(float);
874 			reference += sizeof(float);
875 			break;
876 		default: /* other types, fp8, fp16, bfloat16 */
877 			match = true;
878 		}
879 
880 		if (!match)
881 			goto done;
882 		j++;
883 		if (j < nb_elements)
884 			goto next_element;
885 
886 		i++;
887 		if (i < model->info.nb_outputs)
888 			goto next_output;
889 	}
890 done:
891 	if (match)
892 		t->nb_valid++;
893 
894 	return match;
895 }
896 
897 /* Callback for mempool object iteration. This call would dequantize output data. */
898 static void
899 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
900 {
901 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
902 	struct ml_request *req = (struct ml_request *)obj;
903 	struct ml_model *model = &t->model[req->fid];
904 	char str[PATH_MAX];
905 	bool error = false;
906 
907 	RTE_SET_USED(mp);
908 
909 	if (req->niters == 0)
910 		return;
911 
912 	t->nb_used++;
913 	rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches,
914 			     req->output, model->output);
915 
916 	if (model->reference == NULL) {
917 		t->nb_valid++;
918 		goto dump_output_pass;
919 	}
920 
921 	if (!ml_inference_validation(opaque, req))
922 		goto dump_output_fail;
923 	else
924 		goto dump_output_pass;
925 
926 dump_output_pass:
927 	if (obj_idx == 0) {
928 		/* write quantized output */
929 		snprintf(str, PATH_MAX, "%s.q", t->cmn.opt->filelist[req->fid].output);
930 		ML_OPEN_WRITE_GET_ERR(str, req->output, model->out_qsize, error);
931 		if (error)
932 			return;
933 
934 		/* write dequantized output */
935 		snprintf(str, PATH_MAX, "%s", t->cmn.opt->filelist[req->fid].output);
936 		ML_OPEN_WRITE_GET_ERR(str, model->output, model->out_dsize, error);
937 		if (error)
938 			return;
939 	}
940 
941 	return;
942 
943 dump_output_fail:
944 	if (t->cmn.opt->debug) {
945 		/* dump quantized output buffer */
946 		snprintf(str, PATH_MAX, "%s.q.%d", t->cmn.opt->filelist[req->fid].output, obj_idx);
947 		ML_OPEN_WRITE_GET_ERR(str, req->output, model->out_qsize, error);
948 		if (error)
949 			return;
950 
951 		/* dump dequantized output buffer */
952 		snprintf(str, PATH_MAX, "%s.%d", t->cmn.opt->filelist[req->fid].output, obj_idx);
953 		ML_OPEN_WRITE_GET_ERR(str, model->output, model->out_dsize, error);
954 		if (error)
955 			return;
956 	}
957 }
958 
959 int
960 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
961 {
962 	struct test_inference *t = ml_test_priv(test);
963 	uint64_t error_count = 0;
964 	uint32_t i;
965 
966 	RTE_SET_USED(opt);
967 
968 	/* check for errors */
969 	for (i = 0; i < RTE_MAX_LCORE; i++)
970 		error_count += t->error_count[i];
971 
972 	rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
973 
974 	if ((t->nb_used == t->nb_valid) && (error_count == 0))
975 		t->cmn.result = ML_TEST_SUCCESS;
976 	else
977 		t->cmn.result = ML_TEST_FAILED;
978 
979 	return t->cmn.result;
980 }
981 
982 int
983 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
984 			  uint16_t end_fid)
985 {
986 	struct test_inference *t = ml_test_priv(test);
987 	uint32_t lcore_id;
988 	uint32_t nb_reqs;
989 	uint32_t id = 0;
990 	uint32_t qp_id;
991 
992 	nb_reqs = opt->repetitions / opt->queue_pairs;
993 
994 	RTE_LCORE_FOREACH_WORKER(lcore_id)
995 	{
996 		if (id >= opt->queue_pairs * 2)
997 			break;
998 
999 		qp_id = id / 2;
1000 		t->args[lcore_id].qp_id = qp_id;
1001 		t->args[lcore_id].nb_reqs = nb_reqs;
1002 		if (qp_id == 0)
1003 			t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
1004 
1005 		if (t->args[lcore_id].nb_reqs == 0) {
1006 			id++;
1007 			break;
1008 		}
1009 
1010 		t->args[lcore_id].start_fid = start_fid;
1011 		t->args[lcore_id].end_fid = end_fid;
1012 
1013 		if (id % 2 == 0)
1014 			rte_eal_remote_launch(t->enqueue, test, lcore_id);
1015 		else
1016 			rte_eal_remote_launch(t->dequeue, test, lcore_id);
1017 
1018 		id++;
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 int
1025 ml_inference_stats_get(struct ml_test *test, struct ml_options *opt)
1026 {
1027 	struct test_inference *t = ml_test_priv(test);
1028 	uint64_t total_cycles = 0;
1029 	uint32_t nb_filelist;
1030 	uint64_t throughput;
1031 	uint64_t avg_e2e;
1032 	uint32_t qp_id;
1033 	uint64_t freq;
1034 	int ret;
1035 	int i;
1036 
1037 	if (!opt->stats)
1038 		return 0;
1039 
1040 	/* get xstats size */
1041 	t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0);
1042 	if (t->xstats_size >= 0) {
1043 		/* allocate for xstats_map and values */
1044 		t->xstats_map = rte_malloc(
1045 			"ml_xstats_map", t->xstats_size * sizeof(struct rte_ml_dev_xstats_map), 0);
1046 		if (t->xstats_map == NULL) {
1047 			ret = -ENOMEM;
1048 			goto error;
1049 		}
1050 
1051 		t->xstats_values =
1052 			rte_malloc("ml_xstats_values", t->xstats_size * sizeof(uint64_t), 0);
1053 		if (t->xstats_values == NULL) {
1054 			ret = -ENOMEM;
1055 			goto error;
1056 		}
1057 
1058 		ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, t->xstats_size);
1059 		if (ret != t->xstats_size) {
1060 			printf("Unable to get xstats names, ret = %d\n", ret);
1061 			ret = -1;
1062 			goto error;
1063 		}
1064 
1065 		for (i = 0; i < t->xstats_size; i++)
1066 			rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id,
1067 					      &t->xstats_values[i], 1);
1068 	}
1069 
1070 	/* print xstats*/
1071 	printf("\n");
1072 	print_line(80);
1073 	printf(" ML Device Extended Statistics\n");
1074 	print_line(80);
1075 	for (i = 0; i < t->xstats_size; i++)
1076 		printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, t->xstats_values[i]);
1077 	print_line(80);
1078 
1079 	/* release buffers */
1080 	if (t->xstats_map)
1081 		rte_free(t->xstats_map);
1082 
1083 	if (t->xstats_values)
1084 		rte_free(t->xstats_values);
1085 
1086 	/* print end-to-end stats */
1087 	freq = rte_get_tsc_hz();
1088 	for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++)
1089 		total_cycles += t->args[qp_id].end_cycles - t->args[qp_id].start_cycles;
1090 	avg_e2e = total_cycles / opt->repetitions;
1091 
1092 	if (freq == 0) {
1093 		avg_e2e = total_cycles / opt->repetitions;
1094 		printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (cycles)", avg_e2e);
1095 	} else {
1096 		avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq);
1097 		printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (ns)", avg_e2e);
1098 	}
1099 
1100 	/* print inference throughput */
1101 	if (strcmp(opt->test_name, "inference_ordered") == 0)
1102 		nb_filelist = 1;
1103 	else
1104 		nb_filelist = opt->nb_filelist;
1105 
1106 	if (freq == 0) {
1107 		throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) / total_cycles;
1108 		printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / million cycles)",
1109 		       throughput);
1110 	} else {
1111 		throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / total_cycles;
1112 		printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / second)",
1113 		       throughput);
1114 	}
1115 
1116 	print_line(80);
1117 
1118 	return 0;
1119 
1120 error:
1121 	if (t->xstats_map)
1122 		rte_free(t->xstats_map);
1123 
1124 	if (t->xstats_values)
1125 		rte_free(t->xstats_values);
1126 
1127 	return ret;
1128 }
1129