xref: /dpdk/app/test-mldev/test_inference_common.c (revision 5d52418fa4b9a7f28eaedc1d88ec5cf330381c0e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include <errno.h>
6 #include <stdio.h>
7 #include <unistd.h>
8 
9 #include <rte_common.h>
10 #include <rte_cycles.h>
11 #include <rte_hash_crc.h>
12 #include <rte_launch.h>
13 #include <rte_lcore.h>
14 #include <rte_malloc.h>
15 #include <rte_memzone.h>
16 #include <rte_mldev.h>
17 
18 #include "ml_common.h"
19 #include "test_inference_common.h"
20 
21 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer))
22 
23 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance) \
24 	(((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0))
25 
26 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \
27 	do { \
28 		FILE *fp = fopen(name, "w+"); \
29 		if (fp == NULL) { \
30 			ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \
31 			err = true; \
32 		} else { \
33 			if (fwrite(buffer, 1, size, fp) != size) { \
34 				ml_err("Error writing output, file: %s, error: %s", name, \
35 				       strerror(errno)); \
36 				err = true; \
37 			} \
38 			fclose(fp); \
39 		} \
40 	} while (0)
41 
42 /* Enqueue inference requests with burst size equal to 1 */
43 static int
44 ml_enqueue_single(void *arg)
45 {
46 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
47 	struct ml_request *req = NULL;
48 	struct rte_ml_op *op = NULL;
49 	struct ml_core_args *args;
50 	uint64_t model_enq = 0;
51 	uint64_t start_cycle;
52 	uint32_t burst_enq;
53 	uint32_t lcore_id;
54 	uint16_t fid;
55 	int ret;
56 
57 	lcore_id = rte_lcore_id();
58 	args = &t->args[lcore_id];
59 	args->start_cycles = 0;
60 	model_enq = 0;
61 
62 	if (args->nb_reqs == 0)
63 		return 0;
64 
65 next_rep:
66 	fid = args->start_fid;
67 
68 next_model:
69 	ret = rte_mempool_get(t->op_pool, (void **)&op);
70 	if (ret != 0)
71 		goto next_model;
72 
73 retry:
74 	ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
75 	if (ret != 0)
76 		goto retry;
77 
78 	op->model_id = t->model[fid].id;
79 	op->nb_batches = t->model[fid].nb_batches;
80 	op->mempool = t->op_pool;
81 
82 	op->input.addr = req->input;
83 	op->input.length = t->model[fid].inp_qsize;
84 	op->input.next = NULL;
85 
86 	op->output.addr = req->output;
87 	op->output.length = t->model[fid].out_qsize;
88 	op->output.next = NULL;
89 
90 	op->user_ptr = req;
91 	req->niters++;
92 	req->fid = fid;
93 
94 enqueue_req:
95 	start_cycle = rte_get_tsc_cycles();
96 	burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
97 	if (burst_enq == 0)
98 		goto enqueue_req;
99 
100 	args->start_cycles += start_cycle;
101 	fid++;
102 	if (likely(fid <= args->end_fid))
103 		goto next_model;
104 
105 	model_enq++;
106 	if (likely(model_enq < args->nb_reqs))
107 		goto next_rep;
108 
109 	return 0;
110 }
111 
112 /* Dequeue inference requests with burst size equal to 1 */
113 static int
114 ml_dequeue_single(void *arg)
115 {
116 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
117 	struct rte_ml_op_error error;
118 	struct rte_ml_op *op = NULL;
119 	struct ml_core_args *args;
120 	struct ml_request *req;
121 	uint64_t total_deq = 0;
122 	uint8_t nb_filelist;
123 	uint32_t burst_deq;
124 	uint64_t end_cycle;
125 	uint32_t lcore_id;
126 
127 	lcore_id = rte_lcore_id();
128 	args = &t->args[lcore_id];
129 	args->end_cycles = 0;
130 	nb_filelist = args->end_fid - args->start_fid + 1;
131 
132 	if (args->nb_reqs == 0)
133 		return 0;
134 
135 dequeue_req:
136 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
137 	end_cycle = rte_get_tsc_cycles();
138 
139 	if (likely(burst_deq == 1)) {
140 		total_deq += burst_deq;
141 		args->end_cycles += end_cycle;
142 		if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
143 			rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
144 			ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
145 			       error.message);
146 			t->error_count[lcore_id]++;
147 		}
148 		req = (struct ml_request *)op->user_ptr;
149 		rte_mempool_put(t->model[req->fid].io_pool, req);
150 		rte_mempool_put(t->op_pool, op);
151 	}
152 
153 	if (likely(total_deq < args->nb_reqs * nb_filelist))
154 		goto dequeue_req;
155 
156 	return 0;
157 }
158 
159 /* Enqueue inference requests with burst size greater than 1 */
160 static int
161 ml_enqueue_burst(void *arg)
162 {
163 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
164 	struct ml_core_args *args;
165 	uint64_t start_cycle;
166 	uint16_t ops_count;
167 	uint64_t model_enq;
168 	uint16_t burst_enq;
169 	uint32_t lcore_id;
170 	uint16_t pending;
171 	uint16_t idx;
172 	uint16_t fid;
173 	uint16_t i;
174 	int ret;
175 
176 	lcore_id = rte_lcore_id();
177 	args = &t->args[lcore_id];
178 	args->start_cycles = 0;
179 	model_enq = 0;
180 
181 	if (args->nb_reqs == 0)
182 		return 0;
183 
184 next_rep:
185 	fid = args->start_fid;
186 
187 next_model:
188 	ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
189 	ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
190 	if (ret != 0)
191 		goto next_model;
192 
193 retry:
194 	ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
195 	if (ret != 0)
196 		goto retry;
197 
198 	for (i = 0; i < ops_count; i++) {
199 		args->enq_ops[i]->model_id = t->model[fid].id;
200 		args->enq_ops[i]->nb_batches = t->model[fid].nb_batches;
201 		args->enq_ops[i]->mempool = t->op_pool;
202 
203 		args->enq_ops[i]->input.addr = args->reqs[i]->input;
204 		args->enq_ops[i]->input.length = t->model[fid].inp_qsize;
205 		args->enq_ops[i]->input.next = NULL;
206 
207 		args->enq_ops[i]->output.addr = args->reqs[i]->output;
208 		args->enq_ops[i]->output.length = t->model[fid].out_qsize;
209 		args->enq_ops[i]->output.next = NULL;
210 
211 		args->enq_ops[i]->user_ptr = args->reqs[i];
212 		args->reqs[i]->niters++;
213 		args->reqs[i]->fid = fid;
214 	}
215 
216 	idx = 0;
217 	pending = ops_count;
218 
219 enqueue_reqs:
220 	start_cycle = rte_get_tsc_cycles();
221 	burst_enq =
222 		rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
223 	args->start_cycles += burst_enq * start_cycle;
224 	pending = pending - burst_enq;
225 
226 	if (pending > 0) {
227 		idx = idx + burst_enq;
228 		goto enqueue_reqs;
229 	}
230 
231 	fid++;
232 	if (fid <= args->end_fid)
233 		goto next_model;
234 
235 	model_enq = model_enq + ops_count;
236 	if (model_enq < args->nb_reqs)
237 		goto next_rep;
238 
239 	return 0;
240 }
241 
242 /* Dequeue inference requests with burst size greater than 1 */
243 static int
244 ml_dequeue_burst(void *arg)
245 {
246 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
247 	struct rte_ml_op_error error;
248 	struct ml_core_args *args;
249 	struct ml_request *req;
250 	uint64_t total_deq = 0;
251 	uint16_t burst_deq = 0;
252 	uint8_t nb_filelist;
253 	uint64_t end_cycle;
254 	uint32_t lcore_id;
255 	uint32_t i;
256 
257 	lcore_id = rte_lcore_id();
258 	args = &t->args[lcore_id];
259 	args->end_cycles = 0;
260 	nb_filelist = args->end_fid - args->start_fid + 1;
261 
262 	if (args->nb_reqs == 0)
263 		return 0;
264 
265 dequeue_burst:
266 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
267 					 t->cmn.opt->burst_size);
268 	end_cycle = rte_get_tsc_cycles();
269 
270 	if (likely(burst_deq > 0)) {
271 		total_deq += burst_deq;
272 		args->end_cycles += burst_deq * end_cycle;
273 
274 		for (i = 0; i < burst_deq; i++) {
275 			if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
276 				rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
277 				ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
278 				       error.errcode, error.message);
279 				t->error_count[lcore_id]++;
280 			}
281 			req = (struct ml_request *)args->deq_ops[i]->user_ptr;
282 			if (req != NULL)
283 				rte_mempool_put(t->model[req->fid].io_pool, req);
284 		}
285 		rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
286 	}
287 
288 	if (total_deq < args->nb_reqs * nb_filelist)
289 		goto dequeue_burst;
290 
291 	return 0;
292 }
293 
294 bool
295 test_inference_cap_check(struct ml_options *opt)
296 {
297 	struct rte_ml_dev_info dev_info;
298 
299 	if (!ml_test_cap_check(opt))
300 		return false;
301 
302 	rte_ml_dev_info_get(opt->dev_id, &dev_info);
303 
304 	if (opt->queue_pairs > dev_info.max_queue_pairs) {
305 		ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)",
306 		       opt->queue_pairs, dev_info.max_queue_pairs);
307 		return false;
308 	}
309 
310 	if (opt->queue_size > dev_info.max_desc) {
311 		ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)",
312 		       opt->queue_size, dev_info.max_desc);
313 		return false;
314 	}
315 
316 	if (opt->nb_filelist > dev_info.max_models) {
317 		ml_err("Insufficient capabilities:  Filelist count exceeded device limit, count = %u > (max limit = %u)",
318 		       opt->nb_filelist, dev_info.max_models);
319 		return false;
320 	}
321 
322 	return true;
323 }
324 
325 int
326 test_inference_opt_check(struct ml_options *opt)
327 {
328 	uint32_t i;
329 	int ret;
330 
331 	/* check common opts */
332 	ret = ml_test_opt_check(opt);
333 	if (ret != 0)
334 		return ret;
335 
336 	/* check for at least one filelist */
337 	if (opt->nb_filelist == 0) {
338 		ml_err("Filelist empty, need at least one filelist to run the test\n");
339 		return -EINVAL;
340 	}
341 
342 	/* check file availability */
343 	for (i = 0; i < opt->nb_filelist; i++) {
344 		if (access(opt->filelist[i].model, F_OK) == -1) {
345 			ml_err("Model file not accessible: id = %u, file = %s", i,
346 			       opt->filelist[i].model);
347 			return -ENOENT;
348 		}
349 
350 		if (access(opt->filelist[i].input, F_OK) == -1) {
351 			ml_err("Input file not accessible: id = %u, file = %s", i,
352 			       opt->filelist[i].input);
353 			return -ENOENT;
354 		}
355 	}
356 
357 	if (opt->repetitions == 0) {
358 		ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
359 		return -EINVAL;
360 	}
361 
362 	if (opt->burst_size == 0) {
363 		ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
364 		return -EINVAL;
365 	}
366 
367 	if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
368 		ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
369 		       ML_TEST_MAX_POOL_SIZE);
370 		return -EINVAL;
371 	}
372 
373 	if (opt->queue_pairs == 0) {
374 		ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
375 		return -EINVAL;
376 	}
377 
378 	if (opt->queue_size == 0) {
379 		ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
380 		return -EINVAL;
381 	}
382 
383 	/* check number of available lcores. */
384 	if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
385 		ml_err("Insufficient lcores = %u\n", rte_lcore_count());
386 		ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
387 		       (opt->queue_pairs * 2 + 1));
388 		return -EINVAL;
389 	}
390 
391 	return 0;
392 }
393 
394 void
395 test_inference_opt_dump(struct ml_options *opt)
396 {
397 	uint32_t i;
398 
399 	/* dump common opts */
400 	ml_test_opt_dump(opt);
401 
402 	/* dump test opts */
403 	ml_dump("repetitions", "%" PRIu64, opt->repetitions);
404 	ml_dump("burst_size", "%u", opt->burst_size);
405 	ml_dump("queue_pairs", "%u", opt->queue_pairs);
406 	ml_dump("queue_size", "%u", opt->queue_size);
407 	ml_dump("tolerance", "%-7.3f", opt->tolerance);
408 	ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
409 
410 	if (opt->batches == 0)
411 		ml_dump("batches", "%u (default batch size)", opt->batches);
412 	else
413 		ml_dump("batches", "%u", opt->batches);
414 
415 	ml_dump_begin("filelist");
416 	for (i = 0; i < opt->nb_filelist; i++) {
417 		ml_dump_list("model", i, opt->filelist[i].model);
418 		ml_dump_list("input", i, opt->filelist[i].input);
419 		ml_dump_list("output", i, opt->filelist[i].output);
420 		if (strcmp(opt->filelist[i].reference, "\0") != 0)
421 			ml_dump_list("reference", i, opt->filelist[i].reference);
422 	}
423 	ml_dump_end;
424 }
425 
426 int
427 test_inference_setup(struct ml_test *test, struct ml_options *opt)
428 {
429 	struct test_inference *t;
430 	void *test_inference;
431 	uint32_t lcore_id;
432 	int ret = 0;
433 	uint32_t i;
434 
435 	test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
436 					    RTE_CACHE_LINE_SIZE, opt->socket_id);
437 	if (test_inference == NULL) {
438 		ml_err("failed to allocate memory for test_model");
439 		ret = -ENOMEM;
440 		goto error;
441 	}
442 	test->test_priv = test_inference;
443 	t = ml_test_priv(test);
444 
445 	t->nb_used = 0;
446 	t->nb_valid = 0;
447 	t->cmn.result = ML_TEST_FAILED;
448 	t->cmn.opt = opt;
449 	memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
450 
451 	/* get device info */
452 	ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
453 	if (ret < 0) {
454 		ml_err("failed to get device info");
455 		goto error;
456 	}
457 
458 	if (opt->burst_size == 1) {
459 		t->enqueue = ml_enqueue_single;
460 		t->dequeue = ml_dequeue_single;
461 	} else {
462 		t->enqueue = ml_enqueue_burst;
463 		t->dequeue = ml_dequeue_burst;
464 	}
465 
466 	/* set model initial state */
467 	for (i = 0; i < opt->nb_filelist; i++)
468 		t->model[i].state = MODEL_INITIAL;
469 
470 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
471 		t->args[lcore_id].enq_ops = rte_zmalloc_socket(
472 			"ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
473 			RTE_CACHE_LINE_SIZE, opt->socket_id);
474 		t->args[lcore_id].deq_ops = rte_zmalloc_socket(
475 			"ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
476 			RTE_CACHE_LINE_SIZE, opt->socket_id);
477 		t->args[lcore_id].reqs = rte_zmalloc_socket(
478 			"ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
479 			RTE_CACHE_LINE_SIZE, opt->socket_id);
480 	}
481 
482 	for (i = 0; i < RTE_MAX_LCORE; i++) {
483 		t->args[i].start_cycles = 0;
484 		t->args[i].end_cycles = 0;
485 	}
486 
487 	return 0;
488 
489 error:
490 	rte_free(test_inference);
491 
492 	return ret;
493 }
494 
495 void
496 test_inference_destroy(struct ml_test *test, struct ml_options *opt)
497 {
498 	struct test_inference *t;
499 
500 	RTE_SET_USED(opt);
501 
502 	t = ml_test_priv(test);
503 	rte_free(t);
504 }
505 
506 int
507 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
508 {
509 	struct rte_ml_dev_qp_conf qp_conf;
510 	struct test_inference *t;
511 	uint16_t qp_id;
512 	int ret;
513 
514 	t = ml_test_priv(test);
515 
516 	RTE_SET_USED(t);
517 
518 	ret = ml_test_device_configure(test, opt);
519 	if (ret != 0)
520 		return ret;
521 
522 	/* setup queue pairs */
523 	qp_conf.nb_desc = opt->queue_size;
524 	qp_conf.cb = NULL;
525 
526 	for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
527 		qp_conf.nb_desc = opt->queue_size;
528 		qp_conf.cb = NULL;
529 
530 		ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
531 		if (ret != 0) {
532 			ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
533 			       opt->dev_id, qp_id);
534 			return ret;
535 		}
536 	}
537 
538 	ret = ml_test_device_start(test, opt);
539 	if (ret != 0)
540 		goto error;
541 
542 	return 0;
543 
544 error:
545 	ml_test_device_close(test, opt);
546 
547 	return ret;
548 }
549 
550 int
551 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
552 {
553 	int ret;
554 
555 	ret = ml_test_device_stop(test, opt);
556 	if (ret != 0)
557 		goto error;
558 
559 	ret = ml_test_device_close(test, opt);
560 	if (ret != 0)
561 		return ret;
562 
563 	return 0;
564 
565 error:
566 	ml_test_device_close(test, opt);
567 
568 	return ret;
569 }
570 
571 /* Callback for IO pool create. This function would compute the fields of ml_request
572  * structure and prepare the quantized input data.
573  */
574 static void
575 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
576 {
577 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
578 	struct ml_request *req = (struct ml_request *)obj;
579 
580 	RTE_SET_USED(mp);
581 	RTE_SET_USED(obj_idx);
582 
583 	req->input = (uint8_t *)obj +
584 		     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size);
585 	req->output = req->input +
586 		      RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size);
587 	req->niters = 0;
588 
589 	/* quantize data */
590 	rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches,
591 			   t->model[t->fid].input, req->input);
592 }
593 
594 int
595 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
596 {
597 	struct test_inference *t = ml_test_priv(test);
598 	char mz_name[RTE_MEMZONE_NAMESIZE];
599 	char mp_name[RTE_MEMPOOL_NAMESIZE];
600 	const struct rte_memzone *mz;
601 	uint64_t nb_buffers;
602 	char *buffer = NULL;
603 	uint32_t buff_size;
604 	uint32_t mz_size;
605 	size_t fsize;
606 	int ret;
607 
608 	/* get input buffer size */
609 	ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
610 				       &t->model[fid].inp_qsize, &t->model[fid].inp_dsize);
611 	if (ret != 0) {
612 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
613 		return ret;
614 	}
615 
616 	/* get output buffer size */
617 	ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,
618 					&t->model[fid].out_qsize, &t->model[fid].out_dsize);
619 	if (ret != 0) {
620 		ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model);
621 		return ret;
622 	}
623 
624 	/* allocate buffer for user data */
625 	mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
626 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
627 		mz_size += t->model[fid].out_dsize;
628 
629 	sprintf(mz_name, "ml_user_data_%d", fid);
630 	mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
631 	if (mz == NULL) {
632 		ml_err("Memzone allocation failed for ml_user_data\n");
633 		ret = -ENOMEM;
634 		goto error;
635 	}
636 
637 	t->model[fid].input = mz->addr;
638 	t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
639 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
640 		t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
641 	else
642 		t->model[fid].reference = NULL;
643 
644 	/* load input file */
645 	ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer);
646 	if (ret != 0)
647 		goto error;
648 
649 	if (fsize == t->model[fid].inp_dsize) {
650 		rte_memcpy(t->model[fid].input, buffer, fsize);
651 		free(buffer);
652 	} else {
653 		ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize,
654 		       t->model[fid].inp_dsize);
655 		ret = -EINVAL;
656 		goto error;
657 	}
658 
659 	/* load reference file */
660 	buffer = NULL;
661 	if (t->model[fid].reference != NULL) {
662 		ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer);
663 		if (ret != 0)
664 			goto error;
665 
666 		if (fsize == t->model[fid].out_dsize) {
667 			rte_memcpy(t->model[fid].reference, buffer, fsize);
668 			free(buffer);
669 		} else {
670 			ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n",
671 			       fsize, t->model[fid].out_dsize);
672 			ret = -EINVAL;
673 			goto error;
674 		}
675 	}
676 
677 	/* create mempool for quantized input and output buffers. ml_request_initialize is
678 	 * used as a callback for object creation.
679 	 */
680 	buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) +
681 		    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) +
682 		    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size);
683 	nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
684 
685 	t->fid = fid;
686 	sprintf(mp_name, "ml_io_pool_%d", fid);
687 	t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
688 						   ml_request_initialize, test, opt->socket_id, 0);
689 	if (t->model[fid].io_pool == NULL) {
690 		ml_err("Failed to create io pool : %s\n", "ml_io_pool");
691 		ret = -ENOMEM;
692 		goto error;
693 	}
694 
695 	return 0;
696 
697 error:
698 	if (mz != NULL)
699 		rte_memzone_free(mz);
700 
701 	if (t->model[fid].io_pool != NULL) {
702 		rte_mempool_free(t->model[fid].io_pool);
703 		t->model[fid].io_pool = NULL;
704 	}
705 
706 	free(buffer);
707 
708 	return ret;
709 }
710 
711 void
712 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
713 {
714 	char mz_name[RTE_MEMZONE_NAMESIZE];
715 	char mp_name[RTE_MEMPOOL_NAMESIZE];
716 	const struct rte_memzone *mz;
717 	struct rte_mempool *mp;
718 
719 	RTE_SET_USED(test);
720 	RTE_SET_USED(opt);
721 
722 	/* release user data memzone */
723 	sprintf(mz_name, "ml_user_data_%d", fid);
724 	mz = rte_memzone_lookup(mz_name);
725 	if (mz != NULL)
726 		rte_memzone_free(mz);
727 
728 	/* destroy io pool */
729 	sprintf(mp_name, "ml_io_pool_%d", fid);
730 	mp = rte_mempool_lookup(mp_name);
731 	rte_mempool_free(mp);
732 }
733 
734 int
735 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
736 {
737 	struct test_inference *t = ml_test_priv(test);
738 
739 	/* create op pool */
740 	t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
741 					   opt->socket_id);
742 	if (t->op_pool == NULL) {
743 		ml_err("Failed to create op pool : %s\n", "ml_op_pool");
744 		return -ENOMEM;
745 	}
746 
747 	return 0;
748 }
749 
750 void
751 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
752 {
753 	struct test_inference *t = ml_test_priv(test);
754 
755 	RTE_SET_USED(opt);
756 
757 	/* release op pool */
758 	rte_mempool_free(t->op_pool);
759 }
760 
761 static bool
762 ml_inference_validation(struct ml_test *test, struct ml_request *req)
763 {
764 	struct test_inference *t = ml_test_priv((struct ml_test *)test);
765 	struct ml_model *model;
766 	uint32_t nb_elements;
767 	uint8_t *reference;
768 	uint8_t *output;
769 	bool match;
770 	uint32_t i;
771 	uint32_t j;
772 
773 	model = &t->model[req->fid];
774 
775 	/* compare crc when tolerance is 0 */
776 	if (t->cmn.opt->tolerance == 0.0) {
777 		match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
778 			 rte_hash_crc(model->reference, model->out_dsize, 0));
779 	} else {
780 		output = model->output;
781 		reference = model->reference;
782 
783 		i = 0;
784 next_output:
785 		nb_elements =
786 			model->info.output_info[i].shape.w * model->info.output_info[i].shape.x *
787 			model->info.output_info[i].shape.y * model->info.output_info[i].shape.z;
788 		j = 0;
789 next_element:
790 		match = false;
791 		switch (model->info.output_info[i].dtype) {
792 		case RTE_ML_IO_TYPE_INT8:
793 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t),
794 						 ML_TEST_READ_TYPE(reference, int8_t),
795 						 t->cmn.opt->tolerance))
796 				match = true;
797 
798 			output += sizeof(int8_t);
799 			reference += sizeof(int8_t);
800 			break;
801 		case RTE_ML_IO_TYPE_UINT8:
802 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t),
803 						 ML_TEST_READ_TYPE(reference, uint8_t),
804 						 t->cmn.opt->tolerance))
805 				match = true;
806 
807 			output += sizeof(float);
808 			reference += sizeof(float);
809 			break;
810 		case RTE_ML_IO_TYPE_INT16:
811 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t),
812 						 ML_TEST_READ_TYPE(reference, int16_t),
813 						 t->cmn.opt->tolerance))
814 				match = true;
815 
816 			output += sizeof(int16_t);
817 			reference += sizeof(int16_t);
818 			break;
819 		case RTE_ML_IO_TYPE_UINT16:
820 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t),
821 						 ML_TEST_READ_TYPE(reference, uint16_t),
822 						 t->cmn.opt->tolerance))
823 				match = true;
824 
825 			output += sizeof(uint16_t);
826 			reference += sizeof(uint16_t);
827 			break;
828 		case RTE_ML_IO_TYPE_INT32:
829 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t),
830 						 ML_TEST_READ_TYPE(reference, int32_t),
831 						 t->cmn.opt->tolerance))
832 				match = true;
833 
834 			output += sizeof(int32_t);
835 			reference += sizeof(int32_t);
836 			break;
837 		case RTE_ML_IO_TYPE_UINT32:
838 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t),
839 						 ML_TEST_READ_TYPE(reference, uint32_t),
840 						 t->cmn.opt->tolerance))
841 				match = true;
842 
843 			output += sizeof(uint32_t);
844 			reference += sizeof(uint32_t);
845 			break;
846 		case RTE_ML_IO_TYPE_FP32:
847 			if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float),
848 						 ML_TEST_READ_TYPE(reference, float),
849 						 t->cmn.opt->tolerance))
850 				match = true;
851 
852 			output += sizeof(float);
853 			reference += sizeof(float);
854 			break;
855 		default: /* other types, fp8, fp16, bfloat16 */
856 			match = true;
857 		}
858 
859 		if (!match)
860 			goto done;
861 		j++;
862 		if (j < nb_elements)
863 			goto next_element;
864 
865 		i++;
866 		if (i < model->info.nb_outputs)
867 			goto next_output;
868 	}
869 done:
870 	return match;
871 }
872 
873 /* Callback for mempool object iteration. This call would dequantize output data. */
874 static void
875 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
876 {
877 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
878 	struct ml_request *req = (struct ml_request *)obj;
879 	struct ml_model *model = &t->model[req->fid];
880 	bool error = false;
881 	char *dump_path;
882 
883 	RTE_SET_USED(mp);
884 
885 	if (req->niters == 0)
886 		return;
887 
888 	t->nb_used++;
889 	rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches,
890 			     req->output, model->output);
891 
892 	if (model->reference == NULL)
893 		goto dump_output_pass;
894 
895 	if (!ml_inference_validation(opaque, req))
896 		goto dump_output_fail;
897 	else
898 		goto dump_output_pass;
899 
900 dump_output_pass:
901 	if (obj_idx == 0) {
902 		/* write quantized output */
903 		if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1)
904 			return;
905 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
906 		free(dump_path);
907 		if (error)
908 			return;
909 
910 		/* write dequantized output */
911 		if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1)
912 			return;
913 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
914 		free(dump_path);
915 		if (error)
916 			return;
917 	}
918 	t->nb_valid++;
919 
920 	return;
921 
922 dump_output_fail:
923 	if (t->cmn.opt->debug) {
924 		/* dump quantized output buffer */
925 		if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output,
926 			     obj_idx) == -1)
927 			return;
928 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
929 		free(dump_path);
930 		if (error)
931 			return;
932 
933 		/* dump dequantized output buffer */
934 		if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) ==
935 		    -1)
936 			return;
937 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
938 		free(dump_path);
939 		if (error)
940 			return;
941 	}
942 }
943 
944 int
945 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
946 {
947 	struct test_inference *t = ml_test_priv(test);
948 	uint64_t error_count = 0;
949 	uint32_t i;
950 
951 	RTE_SET_USED(opt);
952 
953 	/* check for errors */
954 	for (i = 0; i < RTE_MAX_LCORE; i++)
955 		error_count += t->error_count[i];
956 
957 	rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
958 
959 	if ((t->nb_used == t->nb_valid) && (error_count == 0))
960 		t->cmn.result = ML_TEST_SUCCESS;
961 	else
962 		t->cmn.result = ML_TEST_FAILED;
963 
964 	return t->cmn.result;
965 }
966 
967 int
968 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
969 			  uint16_t end_fid)
970 {
971 	struct test_inference *t = ml_test_priv(test);
972 	uint32_t lcore_id;
973 	uint32_t nb_reqs;
974 	uint32_t id = 0;
975 	uint32_t qp_id;
976 
977 	nb_reqs = opt->repetitions / opt->queue_pairs;
978 
979 	RTE_LCORE_FOREACH_WORKER(lcore_id)
980 	{
981 		if (id >= opt->queue_pairs * 2)
982 			break;
983 
984 		qp_id = id / 2;
985 		t->args[lcore_id].qp_id = qp_id;
986 		t->args[lcore_id].nb_reqs = nb_reqs;
987 		if (qp_id == 0)
988 			t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
989 
990 		if (t->args[lcore_id].nb_reqs == 0) {
991 			id++;
992 			break;
993 		}
994 
995 		t->args[lcore_id].start_fid = start_fid;
996 		t->args[lcore_id].end_fid = end_fid;
997 
998 		if (id % 2 == 0)
999 			rte_eal_remote_launch(t->enqueue, test, lcore_id);
1000 		else
1001 			rte_eal_remote_launch(t->dequeue, test, lcore_id);
1002 
1003 		id++;
1004 	}
1005 
1006 	return 0;
1007 }
1008