xref: /dpdk/app/test-mldev/test_inference_common.c (revision da7e701151ea8b742d4c38ace3e4fefd1b4507fc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include <errno.h>
6 #include <math.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 
10 #include <rte_common.h>
11 #include <rte_cycles.h>
12 #include <rte_hash_crc.h>
13 #include <rte_launch.h>
14 #include <rte_lcore.h>
15 #include <rte_malloc.h>
16 #include <rte_memzone.h>
17 #include <rte_mldev.h>
18 
19 #include "ml_common.h"
20 #include "test_inference_common.h"
21 
22 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \
23 	do { \
24 		FILE *fp = fopen(name, "w+"); \
25 		if (fp == NULL) { \
26 			ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \
27 			err = true; \
28 		} else { \
29 			if (fwrite(buffer, 1, size, fp) != size) { \
30 				ml_err("Error writing output, file: %s, error: %s", name, \
31 				       strerror(errno)); \
32 				err = true; \
33 			} \
34 			fclose(fp); \
35 		} \
36 	} while (0)
37 
38 /* Enqueue inference requests with burst size equal to 1 */
39 static int
40 ml_enqueue_single(void *arg)
41 {
42 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
43 	struct ml_request *req = NULL;
44 	struct rte_ml_op *op = NULL;
45 	struct ml_core_args *args;
46 	uint64_t model_enq = 0;
47 	uint64_t start_cycle;
48 	uint32_t burst_enq;
49 	uint32_t lcore_id;
50 	uint64_t offset;
51 	uint64_t bufsz;
52 	uint16_t fid;
53 	uint32_t i;
54 	int ret;
55 
56 	lcore_id = rte_lcore_id();
57 	args = &t->args[lcore_id];
58 	args->start_cycles = 0;
59 	model_enq = 0;
60 
61 	if (args->nb_reqs == 0)
62 		return 0;
63 
64 next_rep:
65 	fid = args->start_fid;
66 
67 next_model:
68 	ret = rte_mempool_get(t->op_pool, (void **)&op);
69 	if (ret != 0)
70 		goto next_model;
71 
72 retry_req:
73 	ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
74 	if (ret != 0)
75 		goto retry_req;
76 
77 retry_inp_segs:
78 	ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->inp_buf_segs,
79 				   t->model[fid].info.nb_inputs);
80 	if (ret != 0)
81 		goto retry_inp_segs;
82 
83 retry_out_segs:
84 	ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->out_buf_segs,
85 				   t->model[fid].info.nb_outputs);
86 	if (ret != 0)
87 		goto retry_out_segs;
88 
89 	op->model_id = t->model[fid].id;
90 	op->nb_batches = t->model[fid].info.min_batches;
91 	op->mempool = t->op_pool;
92 	op->input = req->inp_buf_segs;
93 	op->output = req->out_buf_segs;
94 	op->user_ptr = req;
95 
96 	if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
97 		op->input[0]->addr = req->input;
98 		op->input[0]->iova_addr = rte_mem_virt2iova(req->input);
99 		op->input[0]->length = t->model[fid].inp_qsize;
100 		op->input[0]->next = NULL;
101 
102 		op->output[0]->addr = req->output;
103 		op->output[0]->iova_addr = rte_mem_virt2iova(req->output);
104 		op->output[0]->length = t->model[fid].out_qsize;
105 		op->output[0]->next = NULL;
106 	} else {
107 		offset = 0;
108 		for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
109 			bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
110 					       t->cmn.dev_info.align_size);
111 			op->input[i]->addr = req->input + offset;
112 			op->input[i]->iova_addr = rte_mem_virt2iova(req->input + offset);
113 			op->input[i]->length = bufsz;
114 			op->input[i]->next = NULL;
115 			offset += bufsz;
116 		}
117 
118 		offset = 0;
119 		for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
120 			bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
121 					       t->cmn.dev_info.align_size);
122 			op->output[i]->addr = req->output + offset;
123 			op->output[i]->iova_addr = rte_mem_virt2iova(req->output + offset);
124 			op->output[i]->length = bufsz;
125 			op->output[i]->next = NULL;
126 			offset += bufsz;
127 		}
128 	}
129 
130 	req->niters++;
131 	req->fid = fid;
132 
133 enqueue_req:
134 	start_cycle = rte_get_tsc_cycles();
135 	burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
136 	if (burst_enq == 0)
137 		goto enqueue_req;
138 
139 	args->start_cycles += start_cycle;
140 	fid++;
141 	if (likely(fid <= args->end_fid))
142 		goto next_model;
143 
144 	model_enq++;
145 	if (likely(model_enq < args->nb_reqs))
146 		goto next_rep;
147 
148 	return 0;
149 }
150 
151 /* Dequeue inference requests with burst size equal to 1 */
152 static int
153 ml_dequeue_single(void *arg)
154 {
155 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
156 	struct rte_ml_op_error error;
157 	struct rte_ml_op *op = NULL;
158 	struct ml_core_args *args;
159 	struct ml_request *req;
160 	uint64_t total_deq = 0;
161 	uint8_t nb_filelist;
162 	uint32_t burst_deq;
163 	uint64_t end_cycle;
164 	uint32_t lcore_id;
165 
166 	lcore_id = rte_lcore_id();
167 	args = &t->args[lcore_id];
168 	args->end_cycles = 0;
169 	nb_filelist = args->end_fid - args->start_fid + 1;
170 
171 	if (args->nb_reqs == 0)
172 		return 0;
173 
174 dequeue_req:
175 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
176 	end_cycle = rte_get_tsc_cycles();
177 
178 	if (likely(burst_deq == 1)) {
179 		total_deq += burst_deq;
180 		args->end_cycles += end_cycle;
181 		if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
182 			rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
183 			ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
184 			       error.message);
185 			t->error_count[lcore_id]++;
186 		}
187 		req = (struct ml_request *)op->user_ptr;
188 		rte_mempool_put(t->model[req->fid].io_pool, req);
189 		rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->input,
190 				     t->model[req->fid].info.nb_inputs);
191 		rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->output,
192 				     t->model[req->fid].info.nb_outputs);
193 		rte_mempool_put(t->op_pool, op);
194 	}
195 
196 	if (likely(total_deq < args->nb_reqs * nb_filelist))
197 		goto dequeue_req;
198 
199 	return 0;
200 }
201 
202 /* Enqueue inference requests with burst size greater than 1 */
203 static int
204 ml_enqueue_burst(void *arg)
205 {
206 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
207 	struct ml_core_args *args;
208 	uint64_t start_cycle;
209 	uint16_t ops_count;
210 	uint64_t model_enq;
211 	uint16_t burst_enq;
212 	uint32_t lcore_id;
213 	uint16_t pending;
214 	uint64_t offset;
215 	uint64_t bufsz;
216 	uint16_t idx;
217 	uint16_t fid;
218 	uint16_t i;
219 	uint16_t j;
220 	int ret;
221 
222 	lcore_id = rte_lcore_id();
223 	args = &t->args[lcore_id];
224 	args->start_cycles = 0;
225 	model_enq = 0;
226 
227 	if (args->nb_reqs == 0)
228 		return 0;
229 
230 next_rep:
231 	fid = args->start_fid;
232 
233 next_model:
234 	ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
235 	ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
236 	if (ret != 0)
237 		goto next_model;
238 
239 retry_reqs:
240 	ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
241 	if (ret != 0)
242 		goto retry_reqs;
243 
244 	for (i = 0; i < ops_count; i++) {
245 retry_inp_segs:
246 		ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->inp_buf_segs,
247 					   t->model[fid].info.nb_inputs);
248 		if (ret != 0)
249 			goto retry_inp_segs;
250 
251 retry_out_segs:
252 		ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->out_buf_segs,
253 					   t->model[fid].info.nb_outputs);
254 		if (ret != 0)
255 			goto retry_out_segs;
256 
257 		args->enq_ops[i]->model_id = t->model[fid].id;
258 		args->enq_ops[i]->nb_batches = t->model[fid].info.min_batches;
259 		args->enq_ops[i]->mempool = t->op_pool;
260 		args->enq_ops[i]->input = args->reqs[i]->inp_buf_segs;
261 		args->enq_ops[i]->output = args->reqs[i]->out_buf_segs;
262 		args->enq_ops[i]->user_ptr = args->reqs[i];
263 
264 		if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
265 			args->enq_ops[i]->input[0]->addr = args->reqs[i]->input;
266 			args->enq_ops[i]->input[0]->iova_addr =
267 				rte_mem_virt2iova(args->reqs[i]->input);
268 			args->enq_ops[i]->input[0]->length = t->model[fid].inp_qsize;
269 			args->enq_ops[i]->input[0]->next = NULL;
270 
271 			args->enq_ops[i]->output[0]->addr = args->reqs[i]->output;
272 			args->enq_ops[i]->output[0]->iova_addr =
273 				rte_mem_virt2iova(args->reqs[i]->output);
274 			args->enq_ops[i]->output[0]->length = t->model[fid].out_qsize;
275 			args->enq_ops[i]->output[0]->next = NULL;
276 		} else {
277 			offset = 0;
278 			for (j = 0; j < t->model[fid].info.nb_inputs; j++) {
279 				bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
280 						       t->cmn.dev_info.align_size);
281 
282 				args->enq_ops[i]->input[j]->addr = args->reqs[i]->input + offset;
283 				args->enq_ops[i]->input[j]->iova_addr =
284 					rte_mem_virt2iova(args->reqs[i]->input + offset);
285 				args->enq_ops[i]->input[j]->length = t->model[fid].inp_qsize;
286 				args->enq_ops[i]->input[j]->next = NULL;
287 				offset += bufsz;
288 			}
289 
290 			offset = 0;
291 			for (j = 0; j < t->model[fid].info.nb_outputs; j++) {
292 				bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
293 						       t->cmn.dev_info.align_size);
294 				args->enq_ops[i]->output[j]->addr = args->reqs[i]->output + offset;
295 				args->enq_ops[i]->output[j]->iova_addr =
296 					rte_mem_virt2iova(args->reqs[i]->output + offset);
297 				args->enq_ops[i]->output[j]->length = t->model[fid].out_qsize;
298 				args->enq_ops[i]->output[j]->next = NULL;
299 				offset += bufsz;
300 			}
301 		}
302 
303 		args->reqs[i]->niters++;
304 		args->reqs[i]->fid = fid;
305 	}
306 
307 	idx = 0;
308 	pending = ops_count;
309 
310 enqueue_reqs:
311 	start_cycle = rte_get_tsc_cycles();
312 	burst_enq =
313 		rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
314 	args->start_cycles += burst_enq * start_cycle;
315 	pending = pending - burst_enq;
316 
317 	if (pending > 0) {
318 		idx = idx + burst_enq;
319 		goto enqueue_reqs;
320 	}
321 
322 	fid++;
323 	if (fid <= args->end_fid)
324 		goto next_model;
325 
326 	model_enq = model_enq + ops_count;
327 	if (model_enq < args->nb_reqs)
328 		goto next_rep;
329 
330 	return 0;
331 }
332 
333 /* Dequeue inference requests with burst size greater than 1 */
334 static int
335 ml_dequeue_burst(void *arg)
336 {
337 	struct test_inference *t = ml_test_priv((struct ml_test *)arg);
338 	struct rte_ml_op_error error;
339 	struct ml_core_args *args;
340 	struct ml_request *req;
341 	uint64_t total_deq = 0;
342 	uint16_t burst_deq = 0;
343 	uint8_t nb_filelist;
344 	uint64_t end_cycle;
345 	uint32_t lcore_id;
346 	uint32_t i;
347 
348 	lcore_id = rte_lcore_id();
349 	args = &t->args[lcore_id];
350 	args->end_cycles = 0;
351 	nb_filelist = args->end_fid - args->start_fid + 1;
352 
353 	if (args->nb_reqs == 0)
354 		return 0;
355 
356 dequeue_burst:
357 	burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
358 					 t->cmn.opt->burst_size);
359 	end_cycle = rte_get_tsc_cycles();
360 
361 	if (likely(burst_deq > 0)) {
362 		total_deq += burst_deq;
363 		args->end_cycles += burst_deq * end_cycle;
364 
365 		for (i = 0; i < burst_deq; i++) {
366 			if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
367 				rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
368 				ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
369 				       error.errcode, error.message);
370 				t->error_count[lcore_id]++;
371 			}
372 			req = (struct ml_request *)args->deq_ops[i]->user_ptr;
373 			if (req != NULL) {
374 				rte_mempool_put(t->model[req->fid].io_pool, req);
375 				rte_mempool_put_bulk(t->buf_seg_pool,
376 						     (void **)args->deq_ops[i]->input,
377 						     t->model[req->fid].info.nb_inputs);
378 				rte_mempool_put_bulk(t->buf_seg_pool,
379 						     (void **)args->deq_ops[i]->output,
380 						     t->model[req->fid].info.nb_outputs);
381 			}
382 		}
383 		rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
384 	}
385 
386 	if (total_deq < args->nb_reqs * nb_filelist)
387 		goto dequeue_burst;
388 
389 	return 0;
390 }
391 
392 bool
393 test_inference_cap_check(struct ml_options *opt)
394 {
395 	struct rte_ml_dev_info dev_info;
396 
397 	if (!ml_test_cap_check(opt))
398 		return false;
399 
400 	rte_ml_dev_info_get(opt->dev_id, &dev_info);
401 
402 	if (opt->queue_pairs > dev_info.max_queue_pairs) {
403 		ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)",
404 		       opt->queue_pairs, dev_info.max_queue_pairs);
405 		return false;
406 	}
407 
408 	if (opt->queue_size > dev_info.max_desc) {
409 		ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)",
410 		       opt->queue_size, dev_info.max_desc);
411 		return false;
412 	}
413 
414 	if (opt->nb_filelist > dev_info.max_models) {
415 		ml_err("Insufficient capabilities:  Filelist count exceeded device limit, count = %u > (max limit = %u)",
416 		       opt->nb_filelist, dev_info.max_models);
417 		return false;
418 	}
419 
420 	if (dev_info.max_io < ML_TEST_MAX_IO_SIZE) {
421 		ml_err("Insufficient capabilities:  Max I/O, count = %u > (max limit = %u)",
422 		       ML_TEST_MAX_IO_SIZE, dev_info.max_io);
423 		return false;
424 	}
425 
426 	return true;
427 }
428 
429 int
430 test_inference_opt_check(struct ml_options *opt)
431 {
432 	uint32_t i;
433 	int ret;
434 
435 	/* check common opts */
436 	ret = ml_test_opt_check(opt);
437 	if (ret != 0)
438 		return ret;
439 
440 	/* check for at least one filelist */
441 	if (opt->nb_filelist == 0) {
442 		ml_err("Filelist empty, need at least one filelist to run the test\n");
443 		return -EINVAL;
444 	}
445 
446 	/* check file availability */
447 	for (i = 0; i < opt->nb_filelist; i++) {
448 		if (access(opt->filelist[i].model, F_OK) == -1) {
449 			ml_err("Model file not accessible: id = %u, file = %s", i,
450 			       opt->filelist[i].model);
451 			return -ENOENT;
452 		}
453 
454 		if (access(opt->filelist[i].input, F_OK) == -1) {
455 			ml_err("Input file not accessible: id = %u, file = %s", i,
456 			       opt->filelist[i].input);
457 			return -ENOENT;
458 		}
459 	}
460 
461 	if (opt->repetitions == 0) {
462 		ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
463 		return -EINVAL;
464 	}
465 
466 	if (opt->burst_size == 0) {
467 		ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
468 		return -EINVAL;
469 	}
470 
471 	if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
472 		ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
473 		       ML_TEST_MAX_POOL_SIZE);
474 		return -EINVAL;
475 	}
476 
477 	if (opt->queue_pairs == 0) {
478 		ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
479 		return -EINVAL;
480 	}
481 
482 	if (opt->queue_size == 0) {
483 		ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
484 		return -EINVAL;
485 	}
486 
487 	/* check number of available lcores. */
488 	if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
489 		ml_err("Insufficient lcores = %u\n", rte_lcore_count());
490 		ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
491 		       (opt->queue_pairs * 2 + 1));
492 		return -EINVAL;
493 	}
494 
495 	return 0;
496 }
497 
498 void
499 test_inference_opt_dump(struct ml_options *opt)
500 {
501 	uint32_t i;
502 
503 	/* dump common opts */
504 	ml_test_opt_dump(opt);
505 
506 	/* dump test opts */
507 	ml_dump("repetitions", "%" PRIu64, opt->repetitions);
508 	ml_dump("burst_size", "%u", opt->burst_size);
509 	ml_dump("queue_pairs", "%u", opt->queue_pairs);
510 	ml_dump("queue_size", "%u", opt->queue_size);
511 	ml_dump("tolerance", "%-7.3f", opt->tolerance);
512 	ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
513 
514 	ml_dump_begin("filelist");
515 	for (i = 0; i < opt->nb_filelist; i++) {
516 		ml_dump_list("model", i, opt->filelist[i].model);
517 		ml_dump_list("input", i, opt->filelist[i].input);
518 		ml_dump_list("output", i, opt->filelist[i].output);
519 		if (strcmp(opt->filelist[i].reference, "\0") != 0)
520 			ml_dump_list("reference", i, opt->filelist[i].reference);
521 	}
522 	ml_dump_end;
523 }
524 
525 int
526 test_inference_setup(struct ml_test *test, struct ml_options *opt)
527 {
528 	struct test_inference *t;
529 	void *test_inference;
530 	uint32_t lcore_id;
531 	int ret = 0;
532 	uint32_t i;
533 
534 	test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
535 					    RTE_CACHE_LINE_SIZE, opt->socket_id);
536 	if (test_inference == NULL) {
537 		ml_err("failed to allocate memory for test_model");
538 		ret = -ENOMEM;
539 		goto error;
540 	}
541 	test->test_priv = test_inference;
542 	t = ml_test_priv(test);
543 
544 	t->nb_used = 0;
545 	t->nb_valid = 0;
546 	t->cmn.result = ML_TEST_FAILED;
547 	t->cmn.opt = opt;
548 	memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
549 
550 	/* get device info */
551 	ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
552 	if (ret < 0) {
553 		ml_err("failed to get device info");
554 		goto error;
555 	}
556 
557 	if (opt->burst_size == 1) {
558 		t->enqueue = ml_enqueue_single;
559 		t->dequeue = ml_dequeue_single;
560 	} else {
561 		t->enqueue = ml_enqueue_burst;
562 		t->dequeue = ml_dequeue_burst;
563 	}
564 
565 	/* set model initial state */
566 	for (i = 0; i < opt->nb_filelist; i++)
567 		t->model[i].state = MODEL_INITIAL;
568 
569 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
570 		t->args[lcore_id].enq_ops = rte_zmalloc_socket(
571 			"ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
572 			RTE_CACHE_LINE_SIZE, opt->socket_id);
573 		t->args[lcore_id].deq_ops = rte_zmalloc_socket(
574 			"ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
575 			RTE_CACHE_LINE_SIZE, opt->socket_id);
576 		t->args[lcore_id].reqs = rte_zmalloc_socket(
577 			"ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
578 			RTE_CACHE_LINE_SIZE, opt->socket_id);
579 	}
580 
581 	for (i = 0; i < RTE_MAX_LCORE; i++) {
582 		t->args[i].start_cycles = 0;
583 		t->args[i].end_cycles = 0;
584 	}
585 
586 	return 0;
587 
588 error:
589 	rte_free(test_inference);
590 
591 	return ret;
592 }
593 
594 void
595 test_inference_destroy(struct ml_test *test, struct ml_options *opt)
596 {
597 	struct test_inference *t;
598 	uint32_t lcore_id;
599 
600 	RTE_SET_USED(opt);
601 
602 	t = ml_test_priv(test);
603 
604 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
605 		rte_free(t->args[lcore_id].enq_ops);
606 		rte_free(t->args[lcore_id].deq_ops);
607 		rte_free(t->args[lcore_id].reqs);
608 	}
609 
610 	rte_free(t);
611 }
612 
613 int
614 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
615 {
616 	struct rte_ml_dev_qp_conf qp_conf;
617 	struct test_inference *t;
618 	uint16_t qp_id;
619 	int ret;
620 
621 	t = ml_test_priv(test);
622 
623 	RTE_SET_USED(t);
624 
625 	ret = ml_test_device_configure(test, opt);
626 	if (ret != 0)
627 		return ret;
628 
629 	/* setup queue pairs */
630 	qp_conf.nb_desc = opt->queue_size;
631 	qp_conf.cb = NULL;
632 
633 	for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
634 		qp_conf.nb_desc = opt->queue_size;
635 		qp_conf.cb = NULL;
636 
637 		ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
638 		if (ret != 0) {
639 			ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
640 			       opt->dev_id, qp_id);
641 			return ret;
642 		}
643 	}
644 
645 	ret = ml_test_device_start(test, opt);
646 	if (ret != 0)
647 		goto error;
648 
649 	return 0;
650 
651 error:
652 	ml_test_device_close(test, opt);
653 
654 	return ret;
655 }
656 
657 int
658 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
659 {
660 	int ret;
661 
662 	ret = ml_test_device_stop(test, opt);
663 	if (ret != 0)
664 		goto error;
665 
666 	ret = ml_test_device_close(test, opt);
667 	if (ret != 0)
668 		return ret;
669 
670 	return 0;
671 
672 error:
673 	ml_test_device_close(test, opt);
674 
675 	return ret;
676 }
677 
678 /* Callback for IO pool create. This function would compute the fields of ml_request
679  * structure and prepare the quantized input data.
680  */
681 static void
682 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
683 {
684 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
685 	struct ml_request *req = (struct ml_request *)obj;
686 	struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
687 	struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
688 	struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
689 	struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
690 	uint64_t offset;
691 	uint64_t bufsz;
692 	uint32_t i;
693 
694 	RTE_SET_USED(mp);
695 	RTE_SET_USED(obj_idx);
696 
697 	req->input = (uint8_t *)obj +
698 		     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size);
699 	req->output =
700 		req->input + RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.align_size);
701 	req->niters = 0;
702 
703 	if (t->model[t->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
704 		dbuff_seg[0].addr = t->model[t->fid].input;
705 		dbuff_seg[0].iova_addr = rte_mem_virt2iova(t->model[t->fid].input);
706 		dbuff_seg[0].length = t->model[t->fid].inp_dsize;
707 		dbuff_seg[0].next = NULL;
708 		d_segs[0] = &dbuff_seg[0];
709 
710 		qbuff_seg[0].addr = req->input;
711 		qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->input);
712 		qbuff_seg[0].length = t->model[t->fid].inp_qsize;
713 		qbuff_seg[0].next = NULL;
714 		q_segs[0] = &qbuff_seg[0];
715 	} else {
716 		offset = 0;
717 		for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
718 			bufsz = t->model[t->fid].info.input_info[i].nb_elements * sizeof(float);
719 			dbuff_seg[i].addr = t->model[t->fid].input + offset;
720 			dbuff_seg[i].iova_addr = rte_mem_virt2iova(t->model[t->fid].input + offset);
721 			dbuff_seg[i].length = bufsz;
722 			dbuff_seg[i].next = NULL;
723 			d_segs[i] = &dbuff_seg[i];
724 			offset += bufsz;
725 		}
726 
727 		offset = 0;
728 		for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
729 			bufsz = RTE_ALIGN_CEIL(t->model[t->fid].info.input_info[i].size,
730 					       t->cmn.dev_info.align_size);
731 			qbuff_seg[i].addr = req->input + offset;
732 			qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->input + offset);
733 			qbuff_seg[i].length = bufsz;
734 			qbuff_seg[i].next = NULL;
735 			q_segs[i] = &qbuff_seg[i];
736 			offset += bufsz;
737 		}
738 	}
739 
740 	/* quantize data */
741 	rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, d_segs, q_segs);
742 }
743 
744 int
745 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
746 {
747 	struct test_inference *t = ml_test_priv(test);
748 	char mz_name[RTE_MEMZONE_NAMESIZE];
749 	char mp_name[RTE_MEMPOOL_NAMESIZE];
750 	const struct rte_memzone *mz;
751 	uint64_t nb_buffers;
752 	char *buffer = NULL;
753 	uint32_t buff_size;
754 	uint32_t mz_size;
755 	size_t fsize;
756 	uint32_t i;
757 	int ret;
758 
759 	/* get input buffer size */
760 	t->model[fid].inp_qsize = 0;
761 	for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
762 		if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
763 			t->model[fid].inp_qsize += t->model[fid].info.input_info[i].size;
764 		else
765 			t->model[fid].inp_qsize += RTE_ALIGN_CEIL(
766 				t->model[fid].info.input_info[i].size, t->cmn.dev_info.align_size);
767 	}
768 
769 	/* get output buffer size */
770 	t->model[fid].out_qsize = 0;
771 	for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
772 		if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
773 			t->model[fid].out_qsize += t->model[fid].info.output_info[i].size;
774 		else
775 			t->model[fid].out_qsize += RTE_ALIGN_CEIL(
776 				t->model[fid].info.output_info[i].size, t->cmn.dev_info.align_size);
777 	}
778 
779 	t->model[fid].inp_dsize = 0;
780 	for (i = 0; i < t->model[fid].info.nb_inputs; i++)
781 		t->model[fid].inp_dsize +=
782 			t->model[fid].info.input_info[i].nb_elements * sizeof(float);
783 
784 	t->model[fid].out_dsize = 0;
785 	for (i = 0; i < t->model[fid].info.nb_outputs; i++)
786 		t->model[fid].out_dsize +=
787 			t->model[fid].info.output_info[i].nb_elements * sizeof(float);
788 
789 	/* allocate buffer for user data */
790 	mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
791 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
792 		mz_size += t->model[fid].out_dsize;
793 
794 	sprintf(mz_name, "ml_user_data_%d", fid);
795 	mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
796 	if (mz == NULL) {
797 		ml_err("Memzone allocation failed for ml_user_data\n");
798 		ret = -ENOMEM;
799 		goto error;
800 	}
801 
802 	t->model[fid].input = mz->addr;
803 	t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
804 	if (strcmp(opt->filelist[fid].reference, "\0") != 0)
805 		t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
806 	else
807 		t->model[fid].reference = NULL;
808 
809 	/* load input file */
810 	ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer);
811 	if (ret != 0)
812 		goto error;
813 
814 	if (fsize == t->model[fid].inp_dsize) {
815 		rte_memcpy(t->model[fid].input, buffer, fsize);
816 		free(buffer);
817 	} else {
818 		ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize,
819 		       t->model[fid].inp_dsize);
820 		ret = -EINVAL;
821 		goto error;
822 	}
823 
824 	/* load reference file */
825 	buffer = NULL;
826 	if (t->model[fid].reference != NULL) {
827 		ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer);
828 		if (ret != 0)
829 			goto error;
830 
831 		if (fsize == t->model[fid].out_dsize) {
832 			rte_memcpy(t->model[fid].reference, buffer, fsize);
833 			free(buffer);
834 		} else {
835 			ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n",
836 			       fsize, t->model[fid].out_dsize);
837 			ret = -EINVAL;
838 			goto error;
839 		}
840 	}
841 
842 	/* create mempool for quantized input and output buffers. ml_request_initialize is
843 	 * used as a callback for object creation.
844 	 */
845 	buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size) +
846 		    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.align_size) +
847 		    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.align_size);
848 	nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
849 
850 	t->fid = fid;
851 	sprintf(mp_name, "ml_io_pool_%d", fid);
852 	t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
853 						   ml_request_initialize, test, opt->socket_id, 0);
854 	if (t->model[fid].io_pool == NULL) {
855 		ml_err("Failed to create io pool : %s\n", "ml_io_pool");
856 		ret = -ENOMEM;
857 		goto error;
858 	}
859 
860 	return 0;
861 
862 error:
863 	if (mz != NULL)
864 		rte_memzone_free(mz);
865 
866 	if (t->model[fid].io_pool != NULL) {
867 		rte_mempool_free(t->model[fid].io_pool);
868 		t->model[fid].io_pool = NULL;
869 	}
870 
871 	free(buffer);
872 
873 	return ret;
874 }
875 
876 void
877 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
878 {
879 	char mz_name[RTE_MEMZONE_NAMESIZE];
880 	char mp_name[RTE_MEMPOOL_NAMESIZE];
881 	const struct rte_memzone *mz;
882 	struct rte_mempool *mp;
883 
884 	RTE_SET_USED(test);
885 	RTE_SET_USED(opt);
886 
887 	/* release user data memzone */
888 	sprintf(mz_name, "ml_user_data_%d", fid);
889 	mz = rte_memzone_lookup(mz_name);
890 	if (mz != NULL)
891 		rte_memzone_free(mz);
892 
893 	/* destroy io pool */
894 	sprintf(mp_name, "ml_io_pool_%d", fid);
895 	mp = rte_mempool_lookup(mp_name);
896 	rte_mempool_free(mp);
897 }
898 
899 int
900 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
901 {
902 	struct test_inference *t = ml_test_priv(test);
903 
904 	/* create op pool */
905 	t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
906 					   opt->socket_id);
907 	if (t->op_pool == NULL) {
908 		ml_err("Failed to create op pool : %s\n", "ml_op_pool");
909 		return -ENOMEM;
910 	}
911 
912 	/* create buf_segs pool of with element of uint8_t. external buffers are attached to the
913 	 * buf_segs while queuing inference requests.
914 	 */
915 	t->buf_seg_pool = rte_mempool_create("ml_test_mbuf_pool", ML_TEST_MAX_POOL_SIZE * 2,
916 					     sizeof(struct rte_ml_buff_seg), 0, 0, NULL, NULL, NULL,
917 					     NULL, opt->socket_id, 0);
918 	if (t->buf_seg_pool == NULL) {
919 		ml_err("Failed to create buf_segs pool : %s\n", "ml_test_mbuf_pool");
920 		rte_ml_op_pool_free(t->op_pool);
921 		return -ENOMEM;
922 	}
923 
924 	return 0;
925 }
926 
927 void
928 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
929 {
930 	struct test_inference *t = ml_test_priv(test);
931 
932 	RTE_SET_USED(opt);
933 
934 	/* release op pool */
935 	rte_mempool_free(t->op_pool);
936 
937 	/* release buf_segs pool */
938 	rte_mempool_free(t->buf_seg_pool);
939 }
940 
941 static bool
942 ml_inference_validation(struct ml_test *test, struct ml_request *req)
943 {
944 	struct test_inference *t = ml_test_priv((struct ml_test *)test);
945 	struct ml_model *model;
946 	float *reference;
947 	float *output;
948 	float deviation;
949 	bool match;
950 	uint32_t i;
951 	uint32_t j;
952 
953 	model = &t->model[req->fid];
954 
955 	/* compare crc when tolerance is 0 */
956 	if (t->cmn.opt->tolerance == 0.0) {
957 		match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
958 			 rte_hash_crc(model->reference, model->out_dsize, 0));
959 	} else {
960 		output = (float *)model->output;
961 		reference = (float *)model->reference;
962 
963 		i = 0;
964 next_output:
965 		j = 0;
966 next_element:
967 		match = false;
968 		if ((*reference == 0) && (*output == 0))
969 			deviation = 0;
970 		else
971 			deviation = 100 * fabs(*output - *reference) / fabs(*reference);
972 		if (deviation <= t->cmn.opt->tolerance)
973 			match = true;
974 		else
975 			ml_err("id = %d, element = %d, output = %f, reference = %f, deviation = %f %%\n",
976 			       i, j, *output, *reference, deviation);
977 
978 		output++;
979 		reference++;
980 
981 		if (!match)
982 			goto done;
983 
984 		j++;
985 		if (j < model->info.output_info[i].nb_elements)
986 			goto next_element;
987 
988 		i++;
989 		if (i < model->info.nb_outputs)
990 			goto next_output;
991 	}
992 done:
993 	return match;
994 }
995 
996 /* Callback for mempool object iteration. This call would dequantize output data. */
997 static void
998 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
999 {
1000 	struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
1001 	struct ml_request *req = (struct ml_request *)obj;
1002 	struct ml_model *model = &t->model[req->fid];
1003 	bool error = false;
1004 	char *dump_path;
1005 
1006 	struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
1007 	struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
1008 	struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
1009 	struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
1010 	uint64_t offset;
1011 	uint64_t bufsz;
1012 	uint32_t i;
1013 
1014 	RTE_SET_USED(mp);
1015 
1016 	if (req->niters == 0)
1017 		return;
1018 
1019 	t->nb_used++;
1020 
1021 	if (t->model[req->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
1022 		qbuff_seg[0].addr = req->output;
1023 		qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->output);
1024 		qbuff_seg[0].length = t->model[req->fid].out_qsize;
1025 		qbuff_seg[0].next = NULL;
1026 		q_segs[0] = &qbuff_seg[0];
1027 
1028 		dbuff_seg[0].addr = model->output;
1029 		dbuff_seg[0].iova_addr = rte_mem_virt2iova(model->output);
1030 		dbuff_seg[0].length = t->model[req->fid].out_dsize;
1031 		dbuff_seg[0].next = NULL;
1032 		d_segs[0] = &dbuff_seg[0];
1033 	} else {
1034 		offset = 0;
1035 		for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
1036 			bufsz = RTE_ALIGN_CEIL(t->model[req->fid].info.output_info[i].size,
1037 					       t->cmn.dev_info.align_size);
1038 			qbuff_seg[i].addr = req->output + offset;
1039 			qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->output + offset);
1040 			qbuff_seg[i].length = bufsz;
1041 			qbuff_seg[i].next = NULL;
1042 			q_segs[i] = &qbuff_seg[i];
1043 			offset += bufsz;
1044 		}
1045 
1046 		offset = 0;
1047 		for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
1048 			bufsz = t->model[req->fid].info.output_info[i].nb_elements * sizeof(float);
1049 			dbuff_seg[i].addr = model->output + offset;
1050 			dbuff_seg[i].iova_addr = rte_mem_virt2iova(model->output + offset);
1051 			dbuff_seg[i].length = bufsz;
1052 			dbuff_seg[i].next = NULL;
1053 			d_segs[i] = &dbuff_seg[i];
1054 			offset += bufsz;
1055 		}
1056 	}
1057 
1058 	rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, q_segs, d_segs);
1059 
1060 	if (model->reference == NULL)
1061 		goto dump_output_pass;
1062 
1063 	if (!ml_inference_validation(opaque, req))
1064 		goto dump_output_fail;
1065 	else
1066 		goto dump_output_pass;
1067 
1068 dump_output_pass:
1069 	if (obj_idx == 0) {
1070 		/* write quantized output */
1071 		if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1)
1072 			return;
1073 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
1074 		free(dump_path);
1075 		if (error)
1076 			return;
1077 
1078 		/* write dequantized output */
1079 		if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1)
1080 			return;
1081 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
1082 		free(dump_path);
1083 		if (error)
1084 			return;
1085 	}
1086 	t->nb_valid++;
1087 
1088 	return;
1089 
1090 dump_output_fail:
1091 	if (t->cmn.opt->debug) {
1092 		/* dump quantized output buffer */
1093 		if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output,
1094 			     obj_idx) == -1)
1095 			return;
1096 		ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
1097 		free(dump_path);
1098 		if (error)
1099 			return;
1100 
1101 		/* dump dequantized output buffer */
1102 		if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) ==
1103 		    -1)
1104 			return;
1105 		ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
1106 		free(dump_path);
1107 		if (error)
1108 			return;
1109 	}
1110 }
1111 
1112 int
1113 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
1114 {
1115 	struct test_inference *t = ml_test_priv(test);
1116 	uint64_t error_count = 0;
1117 	uint32_t i;
1118 
1119 	RTE_SET_USED(opt);
1120 
1121 	/* check for errors */
1122 	for (i = 0; i < RTE_MAX_LCORE; i++)
1123 		error_count += t->error_count[i];
1124 
1125 	rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
1126 
1127 	if ((t->nb_used == t->nb_valid) && (error_count == 0))
1128 		t->cmn.result = ML_TEST_SUCCESS;
1129 	else
1130 		t->cmn.result = ML_TEST_FAILED;
1131 
1132 	return t->cmn.result;
1133 }
1134 
1135 int
1136 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
1137 			  uint16_t end_fid)
1138 {
1139 	struct test_inference *t = ml_test_priv(test);
1140 	uint32_t lcore_id;
1141 	uint32_t nb_reqs;
1142 	uint32_t id = 0;
1143 	uint32_t qp_id;
1144 
1145 	nb_reqs = opt->repetitions / opt->queue_pairs;
1146 
1147 	RTE_LCORE_FOREACH_WORKER(lcore_id)
1148 	{
1149 		if (id >= opt->queue_pairs * 2)
1150 			break;
1151 
1152 		qp_id = id / 2;
1153 		t->args[lcore_id].qp_id = qp_id;
1154 		t->args[lcore_id].nb_reqs = nb_reqs;
1155 		if (qp_id == 0)
1156 			t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
1157 
1158 		if (t->args[lcore_id].nb_reqs == 0) {
1159 			id++;
1160 			break;
1161 		}
1162 
1163 		t->args[lcore_id].start_fid = start_fid;
1164 		t->args[lcore_id].end_fid = end_fid;
1165 
1166 		if (id % 2 == 0)
1167 			rte_eal_remote_launch(t->enqueue, test, lcore_id);
1168 		else
1169 			rte_eal_remote_launch(t->dequeue, test, lcore_id);
1170 
1171 		id++;
1172 	}
1173 
1174 	return 0;
1175 }
1176