1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2022 Marvell.
3 */
4
5 #include <errno.h>
6 #include <math.h>
7 #include <stdio.h>
8 #include <unistd.h>
9
10 #include <rte_common.h>
11 #include <rte_cycles.h>
12 #include <rte_hash_crc.h>
13 #include <rte_launch.h>
14 #include <rte_lcore.h>
15 #include <rte_malloc.h>
16 #include <rte_memzone.h>
17 #include <rte_mldev.h>
18
19 #include "ml_common.h"
20 #include "test_inference_common.h"
21
22 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \
23 do { \
24 FILE *fp = fopen(name, "w+"); \
25 if (fp == NULL) { \
26 ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \
27 err = true; \
28 } else { \
29 if (fwrite(buffer, 1, size, fp) != size) { \
30 ml_err("Error writing output, file: %s, error: %s", name, \
31 strerror(errno)); \
32 err = true; \
33 } \
34 fclose(fp); \
35 } \
36 } while (0)
37
38 /* Enqueue inference requests with burst size equal to 1 */
39 static int
ml_enqueue_single(void * arg)40 ml_enqueue_single(void *arg)
41 {
42 struct test_inference *t = ml_test_priv((struct ml_test *)arg);
43 struct ml_request *req = NULL;
44 struct rte_ml_op *op = NULL;
45 struct ml_core_args *args;
46 uint64_t model_enq = 0;
47 uint64_t start_cycle;
48 uint32_t burst_enq;
49 uint32_t lcore_id;
50 uint64_t offset;
51 uint64_t bufsz;
52 uint16_t fid;
53 uint32_t i;
54 int ret;
55
56 lcore_id = rte_lcore_id();
57 args = &t->args[lcore_id];
58 args->start_cycles = 0;
59 model_enq = 0;
60
61 if (args->nb_reqs == 0)
62 return 0;
63
64 next_rep:
65 fid = args->start_fid;
66
67 next_model:
68 ret = rte_mempool_get(t->op_pool, (void **)&op);
69 if (ret != 0)
70 goto next_model;
71
72 retry_req:
73 ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
74 if (ret != 0)
75 goto retry_req;
76
77 retry_inp_segs:
78 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->inp_buf_segs,
79 t->model[fid].info.nb_inputs);
80 if (ret != 0)
81 goto retry_inp_segs;
82
83 retry_out_segs:
84 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->out_buf_segs,
85 t->model[fid].info.nb_outputs);
86 if (ret != 0)
87 goto retry_out_segs;
88
89 op->model_id = t->model[fid].id;
90 op->nb_batches = t->model[fid].info.min_batches;
91 op->mempool = t->op_pool;
92 op->input = req->inp_buf_segs;
93 op->output = req->out_buf_segs;
94 op->user_ptr = req;
95
96 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
97 op->input[0]->addr = req->input;
98 op->input[0]->iova_addr = rte_mem_virt2iova(req->input);
99 op->input[0]->length = t->model[fid].inp_qsize;
100 op->input[0]->next = NULL;
101
102 op->output[0]->addr = req->output;
103 op->output[0]->iova_addr = rte_mem_virt2iova(req->output);
104 op->output[0]->length = t->model[fid].out_qsize;
105 op->output[0]->next = NULL;
106 } else {
107 offset = 0;
108 for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
109 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
110 t->cmn.dev_info.align_size);
111 op->input[i]->addr = req->input + offset;
112 op->input[i]->iova_addr = rte_mem_virt2iova(req->input + offset);
113 op->input[i]->length = bufsz;
114 op->input[i]->next = NULL;
115 offset += bufsz;
116 }
117
118 offset = 0;
119 for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
120 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
121 t->cmn.dev_info.align_size);
122 op->output[i]->addr = req->output + offset;
123 op->output[i]->iova_addr = rte_mem_virt2iova(req->output + offset);
124 op->output[i]->length = bufsz;
125 op->output[i]->next = NULL;
126 offset += bufsz;
127 }
128 }
129
130 req->niters++;
131 req->fid = fid;
132
133 enqueue_req:
134 start_cycle = rte_get_tsc_cycles();
135 burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
136 if (burst_enq == 0)
137 goto enqueue_req;
138
139 args->start_cycles += start_cycle;
140 fid++;
141 if (likely(fid <= args->end_fid))
142 goto next_model;
143
144 model_enq++;
145 if (likely(model_enq < args->nb_reqs))
146 goto next_rep;
147
148 return 0;
149 }
150
151 /* Dequeue inference requests with burst size equal to 1 */
152 static int
ml_dequeue_single(void * arg)153 ml_dequeue_single(void *arg)
154 {
155 struct test_inference *t = ml_test_priv((struct ml_test *)arg);
156 struct rte_ml_op_error error;
157 struct rte_ml_op *op = NULL;
158 struct ml_core_args *args;
159 struct ml_request *req;
160 uint64_t total_deq = 0;
161 uint8_t nb_filelist;
162 uint32_t burst_deq;
163 uint64_t end_cycle;
164 uint32_t lcore_id;
165
166 lcore_id = rte_lcore_id();
167 args = &t->args[lcore_id];
168 args->end_cycles = 0;
169 nb_filelist = args->end_fid - args->start_fid + 1;
170
171 if (args->nb_reqs == 0)
172 return 0;
173
174 dequeue_req:
175 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
176 end_cycle = rte_get_tsc_cycles();
177
178 if (likely(burst_deq == 1)) {
179 total_deq += burst_deq;
180 args->end_cycles += end_cycle;
181 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
182 rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
183 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
184 error.message);
185 t->error_count[lcore_id]++;
186 }
187 req = (struct ml_request *)op->user_ptr;
188 rte_mempool_put(t->model[req->fid].io_pool, req);
189 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->input,
190 t->model[req->fid].info.nb_inputs);
191 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->output,
192 t->model[req->fid].info.nb_outputs);
193 rte_mempool_put(t->op_pool, op);
194 }
195
196 if (likely(total_deq < args->nb_reqs * nb_filelist))
197 goto dequeue_req;
198
199 return 0;
200 }
201
202 /* Enqueue inference requests with burst size greater than 1 */
203 static int
ml_enqueue_burst(void * arg)204 ml_enqueue_burst(void *arg)
205 {
206 struct test_inference *t = ml_test_priv((struct ml_test *)arg);
207 struct ml_core_args *args;
208 uint64_t start_cycle;
209 uint16_t ops_count;
210 uint64_t model_enq;
211 uint16_t burst_enq;
212 uint32_t lcore_id;
213 uint16_t pending;
214 uint64_t offset;
215 uint64_t bufsz;
216 uint16_t idx;
217 uint16_t fid;
218 uint16_t i;
219 uint16_t j;
220 int ret;
221
222 lcore_id = rte_lcore_id();
223 args = &t->args[lcore_id];
224 args->start_cycles = 0;
225 model_enq = 0;
226
227 if (args->nb_reqs == 0)
228 return 0;
229
230 next_rep:
231 fid = args->start_fid;
232
233 next_model:
234 ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
235 ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
236 if (ret != 0)
237 goto next_model;
238
239 retry_reqs:
240 ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
241 if (ret != 0)
242 goto retry_reqs;
243
244 for (i = 0; i < ops_count; i++) {
245 retry_inp_segs:
246 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->inp_buf_segs,
247 t->model[fid].info.nb_inputs);
248 if (ret != 0)
249 goto retry_inp_segs;
250
251 retry_out_segs:
252 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->out_buf_segs,
253 t->model[fid].info.nb_outputs);
254 if (ret != 0)
255 goto retry_out_segs;
256
257 args->enq_ops[i]->model_id = t->model[fid].id;
258 args->enq_ops[i]->nb_batches = t->model[fid].info.min_batches;
259 args->enq_ops[i]->mempool = t->op_pool;
260 args->enq_ops[i]->input = args->reqs[i]->inp_buf_segs;
261 args->enq_ops[i]->output = args->reqs[i]->out_buf_segs;
262 args->enq_ops[i]->user_ptr = args->reqs[i];
263
264 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
265 args->enq_ops[i]->input[0]->addr = args->reqs[i]->input;
266 args->enq_ops[i]->input[0]->iova_addr =
267 rte_mem_virt2iova(args->reqs[i]->input);
268 args->enq_ops[i]->input[0]->length = t->model[fid].inp_qsize;
269 args->enq_ops[i]->input[0]->next = NULL;
270
271 args->enq_ops[i]->output[0]->addr = args->reqs[i]->output;
272 args->enq_ops[i]->output[0]->iova_addr =
273 rte_mem_virt2iova(args->reqs[i]->output);
274 args->enq_ops[i]->output[0]->length = t->model[fid].out_qsize;
275 args->enq_ops[i]->output[0]->next = NULL;
276 } else {
277 offset = 0;
278 for (j = 0; j < t->model[fid].info.nb_inputs; j++) {
279 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
280 t->cmn.dev_info.align_size);
281
282 args->enq_ops[i]->input[j]->addr = args->reqs[i]->input + offset;
283 args->enq_ops[i]->input[j]->iova_addr =
284 rte_mem_virt2iova(args->reqs[i]->input + offset);
285 args->enq_ops[i]->input[j]->length = t->model[fid].inp_qsize;
286 args->enq_ops[i]->input[j]->next = NULL;
287 offset += bufsz;
288 }
289
290 offset = 0;
291 for (j = 0; j < t->model[fid].info.nb_outputs; j++) {
292 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
293 t->cmn.dev_info.align_size);
294 args->enq_ops[i]->output[j]->addr = args->reqs[i]->output + offset;
295 args->enq_ops[i]->output[j]->iova_addr =
296 rte_mem_virt2iova(args->reqs[i]->output + offset);
297 args->enq_ops[i]->output[j]->length = t->model[fid].out_qsize;
298 args->enq_ops[i]->output[j]->next = NULL;
299 offset += bufsz;
300 }
301 }
302
303 args->reqs[i]->niters++;
304 args->reqs[i]->fid = fid;
305 }
306
307 idx = 0;
308 pending = ops_count;
309
310 enqueue_reqs:
311 start_cycle = rte_get_tsc_cycles();
312 burst_enq =
313 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
314 args->start_cycles += burst_enq * start_cycle;
315 pending = pending - burst_enq;
316
317 if (pending > 0) {
318 idx = idx + burst_enq;
319 goto enqueue_reqs;
320 }
321
322 fid++;
323 if (fid <= args->end_fid)
324 goto next_model;
325
326 model_enq = model_enq + ops_count;
327 if (model_enq < args->nb_reqs)
328 goto next_rep;
329
330 return 0;
331 }
332
333 /* Dequeue inference requests with burst size greater than 1 */
334 static int
ml_dequeue_burst(void * arg)335 ml_dequeue_burst(void *arg)
336 {
337 struct test_inference *t = ml_test_priv((struct ml_test *)arg);
338 struct rte_ml_op_error error;
339 struct ml_core_args *args;
340 struct ml_request *req;
341 uint64_t total_deq = 0;
342 uint16_t burst_deq = 0;
343 uint8_t nb_filelist;
344 uint64_t end_cycle;
345 uint32_t lcore_id;
346 uint32_t i;
347
348 lcore_id = rte_lcore_id();
349 args = &t->args[lcore_id];
350 args->end_cycles = 0;
351 nb_filelist = args->end_fid - args->start_fid + 1;
352
353 if (args->nb_reqs == 0)
354 return 0;
355
356 dequeue_burst:
357 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
358 t->cmn.opt->burst_size);
359 end_cycle = rte_get_tsc_cycles();
360
361 if (likely(burst_deq > 0)) {
362 total_deq += burst_deq;
363 args->end_cycles += burst_deq * end_cycle;
364
365 for (i = 0; i < burst_deq; i++) {
366 if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
367 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
368 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
369 error.errcode, error.message);
370 t->error_count[lcore_id]++;
371 }
372 req = (struct ml_request *)args->deq_ops[i]->user_ptr;
373 if (req != NULL) {
374 rte_mempool_put(t->model[req->fid].io_pool, req);
375 rte_mempool_put_bulk(t->buf_seg_pool,
376 (void **)args->deq_ops[i]->input,
377 t->model[req->fid].info.nb_inputs);
378 rte_mempool_put_bulk(t->buf_seg_pool,
379 (void **)args->deq_ops[i]->output,
380 t->model[req->fid].info.nb_outputs);
381 }
382 }
383 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
384 }
385
386 if (total_deq < args->nb_reqs * nb_filelist)
387 goto dequeue_burst;
388
389 return 0;
390 }
391
392 bool
test_inference_cap_check(struct ml_options * opt)393 test_inference_cap_check(struct ml_options *opt)
394 {
395 struct rte_ml_dev_info dev_info;
396
397 if (!ml_test_cap_check(opt))
398 return false;
399
400 rte_ml_dev_info_get(opt->dev_id, &dev_info);
401
402 if (opt->queue_pairs > dev_info.max_queue_pairs) {
403 ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)",
404 opt->queue_pairs, dev_info.max_queue_pairs);
405 return false;
406 }
407
408 if (opt->queue_size > dev_info.max_desc) {
409 ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)",
410 opt->queue_size, dev_info.max_desc);
411 return false;
412 }
413
414 if (opt->nb_filelist > dev_info.max_models) {
415 ml_err("Insufficient capabilities: Filelist count exceeded device limit, count = %u > (max limit = %u)",
416 opt->nb_filelist, dev_info.max_models);
417 return false;
418 }
419
420 if (dev_info.max_io < ML_TEST_MAX_IO_SIZE) {
421 ml_err("Insufficient capabilities: Max I/O, count = %u > (max limit = %u)",
422 ML_TEST_MAX_IO_SIZE, dev_info.max_io);
423 return false;
424 }
425
426 return true;
427 }
428
429 int
test_inference_opt_check(struct ml_options * opt)430 test_inference_opt_check(struct ml_options *opt)
431 {
432 uint32_t i;
433 int ret;
434
435 /* check common opts */
436 ret = ml_test_opt_check(opt);
437 if (ret != 0)
438 return ret;
439
440 /* check for at least one filelist */
441 if (opt->nb_filelist == 0) {
442 ml_err("Filelist empty, need at least one filelist to run the test\n");
443 return -EINVAL;
444 }
445
446 /* check file availability */
447 for (i = 0; i < opt->nb_filelist; i++) {
448 if (access(opt->filelist[i].model, F_OK) == -1) {
449 ml_err("Model file not accessible: id = %u, file = %s", i,
450 opt->filelist[i].model);
451 return -ENOENT;
452 }
453
454 if (access(opt->filelist[i].input, F_OK) == -1) {
455 ml_err("Input file not accessible: id = %u, file = %s", i,
456 opt->filelist[i].input);
457 return -ENOENT;
458 }
459 }
460
461 if (opt->repetitions == 0) {
462 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
463 return -EINVAL;
464 }
465
466 if (opt->burst_size == 0) {
467 ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
468 return -EINVAL;
469 }
470
471 if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
472 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
473 ML_TEST_MAX_POOL_SIZE);
474 return -EINVAL;
475 }
476
477 if (opt->queue_pairs == 0) {
478 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
479 return -EINVAL;
480 }
481
482 if (opt->queue_size == 0) {
483 ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
484 return -EINVAL;
485 }
486
487 /* check number of available lcores. */
488 if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
489 ml_err("Insufficient lcores = %u\n", rte_lcore_count());
490 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
491 (opt->queue_pairs * 2 + 1));
492 return -EINVAL;
493 }
494
495 return 0;
496 }
497
498 void
test_inference_opt_dump(struct ml_options * opt)499 test_inference_opt_dump(struct ml_options *opt)
500 {
501 uint32_t i;
502
503 /* dump common opts */
504 ml_test_opt_dump(opt);
505
506 /* dump test opts */
507 ml_dump("repetitions", "%" PRIu64, opt->repetitions);
508 ml_dump("burst_size", "%u", opt->burst_size);
509 ml_dump("queue_pairs", "%u", opt->queue_pairs);
510 ml_dump("queue_size", "%u", opt->queue_size);
511 ml_dump("tolerance", "%-7.3f", opt->tolerance);
512 ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
513
514 ml_dump_begin("filelist");
515 for (i = 0; i < opt->nb_filelist; i++) {
516 ml_dump_list("model", i, opt->filelist[i].model);
517 ml_dump_list("input", i, opt->filelist[i].input);
518 ml_dump_list("output", i, opt->filelist[i].output);
519 if (strcmp(opt->filelist[i].reference, "\0") != 0)
520 ml_dump_list("reference", i, opt->filelist[i].reference);
521 }
522 ml_dump_end;
523 }
524
525 int
test_inference_setup(struct ml_test * test,struct ml_options * opt)526 test_inference_setup(struct ml_test *test, struct ml_options *opt)
527 {
528 struct test_inference *t;
529 void *test_inference;
530 uint32_t lcore_id;
531 int ret = 0;
532 uint32_t i;
533
534 test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
535 RTE_CACHE_LINE_SIZE, opt->socket_id);
536 if (test_inference == NULL) {
537 ml_err("failed to allocate memory for test_model");
538 ret = -ENOMEM;
539 goto error;
540 }
541 test->test_priv = test_inference;
542 t = ml_test_priv(test);
543
544 t->nb_used = 0;
545 t->nb_valid = 0;
546 t->cmn.result = ML_TEST_FAILED;
547 t->cmn.opt = opt;
548 memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
549
550 /* get device info */
551 ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
552 if (ret < 0) {
553 ml_err("failed to get device info");
554 goto error;
555 }
556
557 if (opt->burst_size == 1) {
558 t->enqueue = ml_enqueue_single;
559 t->dequeue = ml_dequeue_single;
560 } else {
561 t->enqueue = ml_enqueue_burst;
562 t->dequeue = ml_dequeue_burst;
563 }
564
565 /* set model initial state */
566 for (i = 0; i < opt->nb_filelist; i++)
567 t->model[i].state = MODEL_INITIAL;
568
569 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
570 t->args[lcore_id].enq_ops = rte_zmalloc_socket(
571 "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
572 RTE_CACHE_LINE_SIZE, opt->socket_id);
573 t->args[lcore_id].deq_ops = rte_zmalloc_socket(
574 "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
575 RTE_CACHE_LINE_SIZE, opt->socket_id);
576 t->args[lcore_id].reqs = rte_zmalloc_socket(
577 "ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
578 RTE_CACHE_LINE_SIZE, opt->socket_id);
579 }
580
581 for (i = 0; i < RTE_MAX_LCORE; i++) {
582 t->args[i].start_cycles = 0;
583 t->args[i].end_cycles = 0;
584 }
585
586 return 0;
587
588 error:
589 rte_free(test_inference);
590
591 return ret;
592 }
593
594 void
test_inference_destroy(struct ml_test * test,struct ml_options * opt)595 test_inference_destroy(struct ml_test *test, struct ml_options *opt)
596 {
597 struct test_inference *t;
598 uint32_t lcore_id;
599
600 RTE_SET_USED(opt);
601
602 t = ml_test_priv(test);
603
604 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
605 rte_free(t->args[lcore_id].enq_ops);
606 rte_free(t->args[lcore_id].deq_ops);
607 rte_free(t->args[lcore_id].reqs);
608 }
609
610 rte_free(t);
611 }
612
613 int
ml_inference_mldev_setup(struct ml_test * test,struct ml_options * opt)614 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
615 {
616 struct rte_ml_dev_qp_conf qp_conf;
617 struct test_inference *t;
618 uint16_t qp_id;
619 int ret;
620
621 t = ml_test_priv(test);
622
623 RTE_SET_USED(t);
624
625 ret = ml_test_device_configure(test, opt);
626 if (ret != 0)
627 return ret;
628
629 /* setup queue pairs */
630 qp_conf.nb_desc = opt->queue_size;
631 qp_conf.cb = NULL;
632
633 for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
634 qp_conf.nb_desc = opt->queue_size;
635 qp_conf.cb = NULL;
636
637 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
638 if (ret != 0) {
639 ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
640 opt->dev_id, qp_id);
641 return ret;
642 }
643 }
644
645 ret = ml_test_device_start(test, opt);
646 if (ret != 0)
647 goto error;
648
649 return 0;
650
651 error:
652 ml_test_device_close(test, opt);
653
654 return ret;
655 }
656
657 int
ml_inference_mldev_destroy(struct ml_test * test,struct ml_options * opt)658 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
659 {
660 int ret;
661
662 ret = ml_test_device_stop(test, opt);
663 if (ret != 0)
664 goto error;
665
666 ret = ml_test_device_close(test, opt);
667 if (ret != 0)
668 return ret;
669
670 return 0;
671
672 error:
673 ml_test_device_close(test, opt);
674
675 return ret;
676 }
677
678 /* Callback for IO pool create. This function would compute the fields of ml_request
679 * structure and prepare the quantized input data.
680 */
681 static void
ml_request_initialize(struct rte_mempool * mp,void * opaque,void * obj,unsigned int obj_idx)682 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
683 {
684 struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
685 struct ml_request *req = (struct ml_request *)obj;
686 struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
687 struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
688 struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
689 struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
690 uint64_t offset;
691 uint64_t bufsz;
692 uint32_t i;
693
694 RTE_SET_USED(mp);
695 RTE_SET_USED(obj_idx);
696
697 req->input = (uint8_t *)obj +
698 RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size);
699 req->output =
700 req->input + RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.align_size);
701 req->niters = 0;
702
703 if (t->model[t->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
704 dbuff_seg[0].addr = t->model[t->fid].input;
705 dbuff_seg[0].iova_addr = rte_mem_virt2iova(t->model[t->fid].input);
706 dbuff_seg[0].length = t->model[t->fid].inp_dsize;
707 dbuff_seg[0].next = NULL;
708 d_segs[0] = &dbuff_seg[0];
709
710 qbuff_seg[0].addr = req->input;
711 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->input);
712 qbuff_seg[0].length = t->model[t->fid].inp_qsize;
713 qbuff_seg[0].next = NULL;
714 q_segs[0] = &qbuff_seg[0];
715 } else {
716 offset = 0;
717 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
718 bufsz = t->model[t->fid].info.input_info[i].nb_elements * sizeof(float);
719 dbuff_seg[i].addr = t->model[t->fid].input + offset;
720 dbuff_seg[i].iova_addr = rte_mem_virt2iova(t->model[t->fid].input + offset);
721 dbuff_seg[i].length = bufsz;
722 dbuff_seg[i].next = NULL;
723 d_segs[i] = &dbuff_seg[i];
724 offset += bufsz;
725 }
726
727 offset = 0;
728 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
729 bufsz = RTE_ALIGN_CEIL(t->model[t->fid].info.input_info[i].size,
730 t->cmn.dev_info.align_size);
731 qbuff_seg[i].addr = req->input + offset;
732 qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->input + offset);
733 qbuff_seg[i].length = bufsz;
734 qbuff_seg[i].next = NULL;
735 q_segs[i] = &qbuff_seg[i];
736 offset += bufsz;
737 }
738 }
739
740 /* quantize data */
741 rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, d_segs, q_segs);
742 }
743
744 int
ml_inference_iomem_setup(struct ml_test * test,struct ml_options * opt,uint16_t fid)745 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
746 {
747 struct test_inference *t = ml_test_priv(test);
748 char mz_name[RTE_MEMZONE_NAMESIZE];
749 char mp_name[RTE_MEMPOOL_NAMESIZE];
750 const struct rte_memzone *mz;
751 uint64_t nb_buffers;
752 char *buffer = NULL;
753 uint32_t buff_size;
754 uint32_t mz_size;
755 size_t fsize;
756 uint32_t i;
757 int ret;
758
759 /* get input buffer size */
760 t->model[fid].inp_qsize = 0;
761 for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
762 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
763 t->model[fid].inp_qsize += t->model[fid].info.input_info[i].size;
764 else
765 t->model[fid].inp_qsize += RTE_ALIGN_CEIL(
766 t->model[fid].info.input_info[i].size, t->cmn.dev_info.align_size);
767 }
768
769 /* get output buffer size */
770 t->model[fid].out_qsize = 0;
771 for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
772 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
773 t->model[fid].out_qsize += t->model[fid].info.output_info[i].size;
774 else
775 t->model[fid].out_qsize += RTE_ALIGN_CEIL(
776 t->model[fid].info.output_info[i].size, t->cmn.dev_info.align_size);
777 }
778
779 t->model[fid].inp_dsize = 0;
780 for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
781 if (opt->quantized_io)
782 t->model[fid].inp_dsize += t->model[fid].info.input_info[i].size;
783 else
784 t->model[fid].inp_dsize +=
785 t->model[fid].info.input_info[i].nb_elements * sizeof(float);
786 }
787
788 t->model[fid].out_dsize = 0;
789 for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
790 if (opt->quantized_io)
791 t->model[fid].out_dsize += t->model[fid].info.output_info[i].size;
792 else
793 t->model[fid].out_dsize +=
794 t->model[fid].info.output_info[i].nb_elements * sizeof(float);
795 }
796
797 /* allocate buffer for user data */
798 mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
799 if (strcmp(opt->filelist[fid].reference, "\0") != 0)
800 mz_size += t->model[fid].out_dsize;
801
802 sprintf(mz_name, "ml_user_data_%d", fid);
803 mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
804 if (mz == NULL) {
805 ml_err("Memzone allocation failed for ml_user_data\n");
806 ret = -ENOMEM;
807 goto error;
808 }
809
810 t->model[fid].input = mz->addr;
811 t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
812 if (strcmp(opt->filelist[fid].reference, "\0") != 0)
813 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
814 else
815 t->model[fid].reference = NULL;
816
817 /* load input file */
818 ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer);
819 if (ret != 0)
820 goto error;
821
822 if (fsize == t->model[fid].inp_dsize) {
823 rte_memcpy(t->model[fid].input, buffer, fsize);
824 free(buffer);
825 } else {
826 ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize,
827 t->model[fid].inp_dsize);
828 ret = -EINVAL;
829 free(buffer);
830 goto error;
831 }
832
833 /* load reference file */
834 buffer = NULL;
835 if (t->model[fid].reference != NULL) {
836 ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer);
837 if (ret != 0)
838 goto error;
839
840 if (fsize == t->model[fid].out_dsize) {
841 rte_memcpy(t->model[fid].reference, buffer, fsize);
842 free(buffer);
843 } else {
844 ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n",
845 fsize, t->model[fid].out_dsize);
846 ret = -EINVAL;
847 free(buffer);
848 goto error;
849 }
850 }
851
852 /* create mempool for quantized input and output buffers. ml_request_initialize is
853 * used as a callback for object creation.
854 */
855 buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size) +
856 RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.align_size) +
857 RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.align_size);
858 nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
859
860 t->fid = fid;
861 sprintf(mp_name, "ml_io_pool_%d", fid);
862 t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
863 ml_request_initialize, test, opt->socket_id, 0);
864 if (t->model[fid].io_pool == NULL) {
865 ml_err("Failed to create io pool : %s\n", "ml_io_pool");
866 ret = -ENOMEM;
867 goto error;
868 }
869
870 return 0;
871
872 error:
873 rte_memzone_free(mz);
874
875 if (t->model[fid].io_pool != NULL) {
876 rte_mempool_free(t->model[fid].io_pool);
877 t->model[fid].io_pool = NULL;
878 }
879
880 return ret;
881 }
882
883 void
ml_inference_iomem_destroy(struct ml_test * test,struct ml_options * opt,uint16_t fid)884 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
885 {
886 char mz_name[RTE_MEMZONE_NAMESIZE];
887 char mp_name[RTE_MEMPOOL_NAMESIZE];
888 const struct rte_memzone *mz;
889 struct rte_mempool *mp;
890
891 RTE_SET_USED(test);
892 RTE_SET_USED(opt);
893
894 /* release user data memzone */
895 sprintf(mz_name, "ml_user_data_%d", fid);
896 mz = rte_memzone_lookup(mz_name);
897 rte_memzone_free(mz);
898
899 /* destroy io pool */
900 sprintf(mp_name, "ml_io_pool_%d", fid);
901 mp = rte_mempool_lookup(mp_name);
902 rte_mempool_free(mp);
903 }
904
905 int
ml_inference_mem_setup(struct ml_test * test,struct ml_options * opt)906 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
907 {
908 struct test_inference *t = ml_test_priv(test);
909
910 /* create op pool */
911 t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
912 opt->socket_id);
913 if (t->op_pool == NULL) {
914 ml_err("Failed to create op pool : %s\n", "ml_op_pool");
915 return -ENOMEM;
916 }
917
918 /* create buf_segs pool of with element of uint8_t. external buffers are attached to the
919 * buf_segs while queuing inference requests.
920 */
921 t->buf_seg_pool = rte_mempool_create("ml_test_mbuf_pool", ML_TEST_MAX_POOL_SIZE * 2,
922 sizeof(struct rte_ml_buff_seg), 0, 0, NULL, NULL, NULL,
923 NULL, opt->socket_id, 0);
924 if (t->buf_seg_pool == NULL) {
925 ml_err("Failed to create buf_segs pool : %s\n", "ml_test_mbuf_pool");
926 rte_ml_op_pool_free(t->op_pool);
927 return -ENOMEM;
928 }
929
930 return 0;
931 }
932
933 void
ml_inference_mem_destroy(struct ml_test * test,struct ml_options * opt)934 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
935 {
936 struct test_inference *t = ml_test_priv(test);
937
938 RTE_SET_USED(opt);
939
940 /* release op pool */
941 rte_mempool_free(t->op_pool);
942
943 /* release buf_segs pool */
944 rte_mempool_free(t->buf_seg_pool);
945 }
946
947 static bool
ml_inference_validation(struct ml_test * test,struct ml_request * req)948 ml_inference_validation(struct ml_test *test, struct ml_request *req)
949 {
950 struct test_inference *t = ml_test_priv((struct ml_test *)test);
951 struct ml_model *model;
952 float *reference;
953 float *output;
954 float deviation;
955 bool match;
956 uint32_t i;
957 uint32_t j;
958
959 model = &t->model[req->fid];
960
961 /* compare crc when tolerance is 0 */
962 if (t->cmn.opt->tolerance == 0.0) {
963 match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
964 rte_hash_crc(model->reference, model->out_dsize, 0));
965 } else {
966 output = (float *)model->output;
967 reference = (float *)model->reference;
968
969 i = 0;
970 next_output:
971 j = 0;
972 next_element:
973 match = false;
974 if ((*reference == 0) && (*output == 0))
975 deviation = 0;
976 else
977 deviation = 100 * fabs(*output - *reference) / fabs(*reference);
978 if (deviation <= t->cmn.opt->tolerance)
979 match = true;
980 else
981 ml_err("id = %d, element = %d, output = %f, reference = %f, deviation = %f %%\n",
982 i, j, *output, *reference, deviation);
983
984 output++;
985 reference++;
986
987 if (!match)
988 goto done;
989
990 j++;
991 if (j < model->info.output_info[i].nb_elements)
992 goto next_element;
993
994 i++;
995 if (i < model->info.nb_outputs)
996 goto next_output;
997 }
998 done:
999 return match;
1000 }
1001
1002 /* Callback for mempool object iteration. This call would dequantize output data. */
1003 static void
ml_request_finish(struct rte_mempool * mp,void * opaque,void * obj,unsigned int obj_idx)1004 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
1005 {
1006 struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
1007 struct ml_request *req = (struct ml_request *)obj;
1008 struct ml_model *model = &t->model[req->fid];
1009 bool error = false;
1010 char *dump_path;
1011
1012 struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
1013 struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
1014 struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
1015 struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
1016 uint64_t offset;
1017 uint64_t bufsz;
1018 uint32_t i;
1019
1020 RTE_SET_USED(mp);
1021
1022 if (req->niters == 0)
1023 return;
1024
1025 t->nb_used++;
1026
1027 if (t->model[req->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
1028 qbuff_seg[0].addr = req->output;
1029 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->output);
1030 qbuff_seg[0].length = t->model[req->fid].out_qsize;
1031 qbuff_seg[0].next = NULL;
1032 q_segs[0] = &qbuff_seg[0];
1033
1034 dbuff_seg[0].addr = model->output;
1035 dbuff_seg[0].iova_addr = rte_mem_virt2iova(model->output);
1036 dbuff_seg[0].length = t->model[req->fid].out_dsize;
1037 dbuff_seg[0].next = NULL;
1038 d_segs[0] = &dbuff_seg[0];
1039 } else {
1040 offset = 0;
1041 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
1042 bufsz = RTE_ALIGN_CEIL(t->model[req->fid].info.output_info[i].size,
1043 t->cmn.dev_info.align_size);
1044 qbuff_seg[i].addr = req->output + offset;
1045 qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->output + offset);
1046 qbuff_seg[i].length = bufsz;
1047 qbuff_seg[i].next = NULL;
1048 q_segs[i] = &qbuff_seg[i];
1049 offset += bufsz;
1050 }
1051
1052 offset = 0;
1053 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
1054 bufsz = t->model[req->fid].info.output_info[i].nb_elements * sizeof(float);
1055 dbuff_seg[i].addr = model->output + offset;
1056 dbuff_seg[i].iova_addr = rte_mem_virt2iova(model->output + offset);
1057 dbuff_seg[i].length = bufsz;
1058 dbuff_seg[i].next = NULL;
1059 d_segs[i] = &dbuff_seg[i];
1060 offset += bufsz;
1061 }
1062 }
1063
1064 rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, q_segs, d_segs);
1065
1066 if (model->reference == NULL)
1067 goto dump_output_pass;
1068
1069 if (!ml_inference_validation(opaque, req))
1070 goto dump_output_fail;
1071 else
1072 goto dump_output_pass;
1073
1074 dump_output_pass:
1075 if (obj_idx == 0) {
1076 /* write quantized output */
1077 if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1)
1078 return;
1079 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
1080 free(dump_path);
1081 if (error)
1082 return;
1083
1084 /* write dequantized output */
1085 if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1)
1086 return;
1087 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
1088 free(dump_path);
1089 if (error)
1090 return;
1091 }
1092 t->nb_valid++;
1093
1094 return;
1095
1096 dump_output_fail:
1097 if (t->cmn.opt->debug) {
1098 /* dump quantized output buffer */
1099 if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output,
1100 obj_idx) == -1)
1101 return;
1102 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
1103 free(dump_path);
1104 if (error)
1105 return;
1106
1107 /* dump dequantized output buffer */
1108 if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) ==
1109 -1)
1110 return;
1111 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
1112 free(dump_path);
1113 if (error)
1114 return;
1115 }
1116 }
1117
1118 int
ml_inference_result(struct ml_test * test,struct ml_options * opt,uint16_t fid)1119 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
1120 {
1121 struct test_inference *t = ml_test_priv(test);
1122 uint64_t error_count = 0;
1123 uint32_t i;
1124
1125 RTE_SET_USED(opt);
1126
1127 /* check for errors */
1128 for (i = 0; i < RTE_MAX_LCORE; i++)
1129 error_count += t->error_count[i];
1130
1131 rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
1132
1133 if ((t->nb_used == t->nb_valid) && (error_count == 0))
1134 t->cmn.result = ML_TEST_SUCCESS;
1135 else
1136 t->cmn.result = ML_TEST_FAILED;
1137
1138 return t->cmn.result;
1139 }
1140
1141 int
ml_inference_launch_cores(struct ml_test * test,struct ml_options * opt,uint16_t start_fid,uint16_t end_fid)1142 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
1143 uint16_t end_fid)
1144 {
1145 struct test_inference *t = ml_test_priv(test);
1146 uint32_t lcore_id;
1147 uint32_t nb_reqs;
1148 uint32_t id = 0;
1149 uint32_t qp_id;
1150
1151 nb_reqs = opt->repetitions / opt->queue_pairs;
1152
1153 RTE_LCORE_FOREACH_WORKER(lcore_id)
1154 {
1155 if (id >= opt->queue_pairs * 2)
1156 break;
1157
1158 qp_id = id / 2;
1159 t->args[lcore_id].qp_id = qp_id;
1160 t->args[lcore_id].nb_reqs = nb_reqs;
1161 if (qp_id == 0)
1162 t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
1163
1164 if (t->args[lcore_id].nb_reqs == 0) {
1165 id++;
1166 break;
1167 }
1168
1169 t->args[lcore_id].start_fid = start_fid;
1170 t->args[lcore_id].end_fid = end_fid;
1171
1172 if (id % 2 == 0)
1173 rte_eal_remote_launch(t->enqueue, test, lcore_id);
1174 else
1175 rte_eal_remote_launch(t->dequeue, test, lcore_id);
1176
1177 id++;
1178 }
1179
1180 return 0;
1181 }
1182