1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include <errno.h> 6 #include <math.h> 7 #include <stdio.h> 8 #include <unistd.h> 9 10 #include <rte_common.h> 11 #include <rte_cycles.h> 12 #include <rte_hash_crc.h> 13 #include <rte_launch.h> 14 #include <rte_lcore.h> 15 #include <rte_malloc.h> 16 #include <rte_memzone.h> 17 #include <rte_mldev.h> 18 19 #include "ml_common.h" 20 #include "test_inference_common.h" 21 22 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \ 23 do { \ 24 FILE *fp = fopen(name, "w+"); \ 25 if (fp == NULL) { \ 26 ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \ 27 err = true; \ 28 } else { \ 29 if (fwrite(buffer, 1, size, fp) != size) { \ 30 ml_err("Error writing output, file: %s, error: %s", name, \ 31 strerror(errno)); \ 32 err = true; \ 33 } \ 34 fclose(fp); \ 35 } \ 36 } while (0) 37 38 /* Enqueue inference requests with burst size equal to 1 */ 39 static int 40 ml_enqueue_single(void *arg) 41 { 42 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 43 struct ml_request *req = NULL; 44 struct rte_ml_op *op = NULL; 45 struct ml_core_args *args; 46 uint64_t model_enq = 0; 47 uint64_t start_cycle; 48 uint32_t burst_enq; 49 uint32_t lcore_id; 50 uint64_t offset; 51 uint64_t bufsz; 52 uint16_t fid; 53 uint32_t i; 54 int ret; 55 56 lcore_id = rte_lcore_id(); 57 args = &t->args[lcore_id]; 58 args->start_cycles = 0; 59 model_enq = 0; 60 61 if (args->nb_reqs == 0) 62 return 0; 63 64 next_rep: 65 fid = args->start_fid; 66 67 next_model: 68 ret = rte_mempool_get(t->op_pool, (void **)&op); 69 if (ret != 0) 70 goto next_model; 71 72 retry_req: 73 ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req); 74 if (ret != 0) 75 goto retry_req; 76 77 retry_inp_segs: 78 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->inp_buf_segs, 79 t->model[fid].info.nb_inputs); 80 if (ret != 0) 81 goto retry_inp_segs; 82 83 retry_out_segs: 84 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->out_buf_segs, 85 t->model[fid].info.nb_outputs); 86 if (ret != 0) 87 goto retry_out_segs; 88 89 op->model_id = t->model[fid].id; 90 op->nb_batches = t->model[fid].info.min_batches; 91 op->mempool = t->op_pool; 92 op->input = req->inp_buf_segs; 93 op->output = req->out_buf_segs; 94 op->user_ptr = req; 95 96 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) { 97 op->input[0]->addr = req->input; 98 op->input[0]->iova_addr = rte_mem_virt2iova(req->input); 99 op->input[0]->length = t->model[fid].inp_qsize; 100 op->input[0]->next = NULL; 101 102 op->output[0]->addr = req->output; 103 op->output[0]->iova_addr = rte_mem_virt2iova(req->output); 104 op->output[0]->length = t->model[fid].out_qsize; 105 op->output[0]->next = NULL; 106 } else { 107 offset = 0; 108 for (i = 0; i < t->model[fid].info.nb_inputs; i++) { 109 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size, 110 t->cmn.dev_info.align_size); 111 op->input[i]->addr = req->input + offset; 112 op->input[i]->iova_addr = rte_mem_virt2iova(req->input + offset); 113 op->input[i]->length = bufsz; 114 op->input[i]->next = NULL; 115 offset += bufsz; 116 } 117 118 offset = 0; 119 for (i = 0; i < t->model[fid].info.nb_outputs; i++) { 120 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size, 121 t->cmn.dev_info.align_size); 122 op->output[i]->addr = req->output + offset; 123 op->output[i]->iova_addr = rte_mem_virt2iova(req->output + offset); 124 op->output[i]->length = bufsz; 125 op->output[i]->next = NULL; 126 offset += bufsz; 127 } 128 } 129 130 req->niters++; 131 req->fid = fid; 132 133 enqueue_req: 134 start_cycle = rte_get_tsc_cycles(); 135 burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 136 if (burst_enq == 0) 137 goto enqueue_req; 138 139 args->start_cycles += start_cycle; 140 fid++; 141 if (likely(fid <= args->end_fid)) 142 goto next_model; 143 144 model_enq++; 145 if (likely(model_enq < args->nb_reqs)) 146 goto next_rep; 147 148 return 0; 149 } 150 151 /* Dequeue inference requests with burst size equal to 1 */ 152 static int 153 ml_dequeue_single(void *arg) 154 { 155 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 156 struct rte_ml_op_error error; 157 struct rte_ml_op *op = NULL; 158 struct ml_core_args *args; 159 struct ml_request *req; 160 uint64_t total_deq = 0; 161 uint8_t nb_filelist; 162 uint32_t burst_deq; 163 uint64_t end_cycle; 164 uint32_t lcore_id; 165 166 lcore_id = rte_lcore_id(); 167 args = &t->args[lcore_id]; 168 args->end_cycles = 0; 169 nb_filelist = args->end_fid - args->start_fid + 1; 170 171 if (args->nb_reqs == 0) 172 return 0; 173 174 dequeue_req: 175 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 176 end_cycle = rte_get_tsc_cycles(); 177 178 if (likely(burst_deq == 1)) { 179 total_deq += burst_deq; 180 args->end_cycles += end_cycle; 181 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) { 182 rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error); 183 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode, 184 error.message); 185 t->error_count[lcore_id]++; 186 } 187 req = (struct ml_request *)op->user_ptr; 188 rte_mempool_put(t->model[req->fid].io_pool, req); 189 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->input, 190 t->model[req->fid].info.nb_inputs); 191 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->output, 192 t->model[req->fid].info.nb_outputs); 193 rte_mempool_put(t->op_pool, op); 194 } 195 196 if (likely(total_deq < args->nb_reqs * nb_filelist)) 197 goto dequeue_req; 198 199 return 0; 200 } 201 202 /* Enqueue inference requests with burst size greater than 1 */ 203 static int 204 ml_enqueue_burst(void *arg) 205 { 206 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 207 struct ml_core_args *args; 208 uint64_t start_cycle; 209 uint16_t ops_count; 210 uint64_t model_enq; 211 uint16_t burst_enq; 212 uint32_t lcore_id; 213 uint16_t pending; 214 uint64_t offset; 215 uint64_t bufsz; 216 uint16_t idx; 217 uint16_t fid; 218 uint16_t i; 219 uint16_t j; 220 int ret; 221 222 lcore_id = rte_lcore_id(); 223 args = &t->args[lcore_id]; 224 args->start_cycles = 0; 225 model_enq = 0; 226 227 if (args->nb_reqs == 0) 228 return 0; 229 230 next_rep: 231 fid = args->start_fid; 232 233 next_model: 234 ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq); 235 ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count); 236 if (ret != 0) 237 goto next_model; 238 239 retry_reqs: 240 ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count); 241 if (ret != 0) 242 goto retry_reqs; 243 244 for (i = 0; i < ops_count; i++) { 245 retry_inp_segs: 246 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->inp_buf_segs, 247 t->model[fid].info.nb_inputs); 248 if (ret != 0) 249 goto retry_inp_segs; 250 251 retry_out_segs: 252 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->out_buf_segs, 253 t->model[fid].info.nb_outputs); 254 if (ret != 0) 255 goto retry_out_segs; 256 257 args->enq_ops[i]->model_id = t->model[fid].id; 258 args->enq_ops[i]->nb_batches = t->model[fid].info.min_batches; 259 args->enq_ops[i]->mempool = t->op_pool; 260 args->enq_ops[i]->input = args->reqs[i]->inp_buf_segs; 261 args->enq_ops[i]->output = args->reqs[i]->out_buf_segs; 262 args->enq_ops[i]->user_ptr = args->reqs[i]; 263 264 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) { 265 args->enq_ops[i]->input[0]->addr = args->reqs[i]->input; 266 args->enq_ops[i]->input[0]->iova_addr = 267 rte_mem_virt2iova(args->reqs[i]->input); 268 args->enq_ops[i]->input[0]->length = t->model[fid].inp_qsize; 269 args->enq_ops[i]->input[0]->next = NULL; 270 271 args->enq_ops[i]->output[0]->addr = args->reqs[i]->output; 272 args->enq_ops[i]->output[0]->iova_addr = 273 rte_mem_virt2iova(args->reqs[i]->output); 274 args->enq_ops[i]->output[0]->length = t->model[fid].out_qsize; 275 args->enq_ops[i]->output[0]->next = NULL; 276 } else { 277 offset = 0; 278 for (j = 0; j < t->model[fid].info.nb_inputs; j++) { 279 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size, 280 t->cmn.dev_info.align_size); 281 282 args->enq_ops[i]->input[j]->addr = args->reqs[i]->input + offset; 283 args->enq_ops[i]->input[j]->iova_addr = 284 rte_mem_virt2iova(args->reqs[i]->input + offset); 285 args->enq_ops[i]->input[j]->length = t->model[fid].inp_qsize; 286 args->enq_ops[i]->input[j]->next = NULL; 287 offset += bufsz; 288 } 289 290 offset = 0; 291 for (j = 0; j < t->model[fid].info.nb_outputs; j++) { 292 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size, 293 t->cmn.dev_info.align_size); 294 args->enq_ops[i]->output[j]->addr = args->reqs[i]->output + offset; 295 args->enq_ops[i]->output[j]->iova_addr = 296 rte_mem_virt2iova(args->reqs[i]->output + offset); 297 args->enq_ops[i]->output[j]->length = t->model[fid].out_qsize; 298 args->enq_ops[i]->output[j]->next = NULL; 299 offset += bufsz; 300 } 301 } 302 303 args->reqs[i]->niters++; 304 args->reqs[i]->fid = fid; 305 } 306 307 idx = 0; 308 pending = ops_count; 309 310 enqueue_reqs: 311 start_cycle = rte_get_tsc_cycles(); 312 burst_enq = 313 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending); 314 args->start_cycles += burst_enq * start_cycle; 315 pending = pending - burst_enq; 316 317 if (pending > 0) { 318 idx = idx + burst_enq; 319 goto enqueue_reqs; 320 } 321 322 fid++; 323 if (fid <= args->end_fid) 324 goto next_model; 325 326 model_enq = model_enq + ops_count; 327 if (model_enq < args->nb_reqs) 328 goto next_rep; 329 330 return 0; 331 } 332 333 /* Dequeue inference requests with burst size greater than 1 */ 334 static int 335 ml_dequeue_burst(void *arg) 336 { 337 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 338 struct rte_ml_op_error error; 339 struct ml_core_args *args; 340 struct ml_request *req; 341 uint64_t total_deq = 0; 342 uint16_t burst_deq = 0; 343 uint8_t nb_filelist; 344 uint64_t end_cycle; 345 uint32_t lcore_id; 346 uint32_t i; 347 348 lcore_id = rte_lcore_id(); 349 args = &t->args[lcore_id]; 350 args->end_cycles = 0; 351 nb_filelist = args->end_fid - args->start_fid + 1; 352 353 if (args->nb_reqs == 0) 354 return 0; 355 356 dequeue_burst: 357 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops, 358 t->cmn.opt->burst_size); 359 end_cycle = rte_get_tsc_cycles(); 360 361 if (likely(burst_deq > 0)) { 362 total_deq += burst_deq; 363 args->end_cycles += burst_deq * end_cycle; 364 365 for (i = 0; i < burst_deq; i++) { 366 if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) { 367 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error); 368 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", 369 error.errcode, error.message); 370 t->error_count[lcore_id]++; 371 } 372 req = (struct ml_request *)args->deq_ops[i]->user_ptr; 373 if (req != NULL) { 374 rte_mempool_put(t->model[req->fid].io_pool, req); 375 rte_mempool_put_bulk(t->buf_seg_pool, 376 (void **)args->deq_ops[i]->input, 377 t->model[req->fid].info.nb_inputs); 378 rte_mempool_put_bulk(t->buf_seg_pool, 379 (void **)args->deq_ops[i]->output, 380 t->model[req->fid].info.nb_outputs); 381 } 382 } 383 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq); 384 } 385 386 if (total_deq < args->nb_reqs * nb_filelist) 387 goto dequeue_burst; 388 389 return 0; 390 } 391 392 bool 393 test_inference_cap_check(struct ml_options *opt) 394 { 395 struct rte_ml_dev_info dev_info; 396 397 if (!ml_test_cap_check(opt)) 398 return false; 399 400 rte_ml_dev_info_get(opt->dev_id, &dev_info); 401 402 if (opt->queue_pairs > dev_info.max_queue_pairs) { 403 ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)", 404 opt->queue_pairs, dev_info.max_queue_pairs); 405 return false; 406 } 407 408 if (opt->queue_size > dev_info.max_desc) { 409 ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)", 410 opt->queue_size, dev_info.max_desc); 411 return false; 412 } 413 414 if (opt->nb_filelist > dev_info.max_models) { 415 ml_err("Insufficient capabilities: Filelist count exceeded device limit, count = %u > (max limit = %u)", 416 opt->nb_filelist, dev_info.max_models); 417 return false; 418 } 419 420 if (dev_info.max_io < ML_TEST_MAX_IO_SIZE) { 421 ml_err("Insufficient capabilities: Max I/O, count = %u > (max limit = %u)", 422 ML_TEST_MAX_IO_SIZE, dev_info.max_io); 423 return false; 424 } 425 426 return true; 427 } 428 429 int 430 test_inference_opt_check(struct ml_options *opt) 431 { 432 uint32_t i; 433 int ret; 434 435 /* check common opts */ 436 ret = ml_test_opt_check(opt); 437 if (ret != 0) 438 return ret; 439 440 /* check for at least one filelist */ 441 if (opt->nb_filelist == 0) { 442 ml_err("Filelist empty, need at least one filelist to run the test\n"); 443 return -EINVAL; 444 } 445 446 /* check file availability */ 447 for (i = 0; i < opt->nb_filelist; i++) { 448 if (access(opt->filelist[i].model, F_OK) == -1) { 449 ml_err("Model file not accessible: id = %u, file = %s", i, 450 opt->filelist[i].model); 451 return -ENOENT; 452 } 453 454 if (access(opt->filelist[i].input, F_OK) == -1) { 455 ml_err("Input file not accessible: id = %u, file = %s", i, 456 opt->filelist[i].input); 457 return -ENOENT; 458 } 459 } 460 461 if (opt->repetitions == 0) { 462 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions); 463 return -EINVAL; 464 } 465 466 if (opt->burst_size == 0) { 467 ml_err("Invalid option, burst_size = %u\n", opt->burst_size); 468 return -EINVAL; 469 } 470 471 if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) { 472 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size, 473 ML_TEST_MAX_POOL_SIZE); 474 return -EINVAL; 475 } 476 477 if (opt->queue_pairs == 0) { 478 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs); 479 return -EINVAL; 480 } 481 482 if (opt->queue_size == 0) { 483 ml_err("Invalid option, queue_size = %u\n", opt->queue_size); 484 return -EINVAL; 485 } 486 487 /* check number of available lcores. */ 488 if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) { 489 ml_err("Insufficient lcores = %u\n", rte_lcore_count()); 490 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs, 491 (opt->queue_pairs * 2 + 1)); 492 return -EINVAL; 493 } 494 495 return 0; 496 } 497 498 void 499 test_inference_opt_dump(struct ml_options *opt) 500 { 501 uint32_t i; 502 503 /* dump common opts */ 504 ml_test_opt_dump(opt); 505 506 /* dump test opts */ 507 ml_dump("repetitions", "%" PRIu64, opt->repetitions); 508 ml_dump("burst_size", "%u", opt->burst_size); 509 ml_dump("queue_pairs", "%u", opt->queue_pairs); 510 ml_dump("queue_size", "%u", opt->queue_size); 511 ml_dump("tolerance", "%-7.3f", opt->tolerance); 512 ml_dump("stats", "%s", (opt->stats ? "true" : "false")); 513 514 ml_dump_begin("filelist"); 515 for (i = 0; i < opt->nb_filelist; i++) { 516 ml_dump_list("model", i, opt->filelist[i].model); 517 ml_dump_list("input", i, opt->filelist[i].input); 518 ml_dump_list("output", i, opt->filelist[i].output); 519 if (strcmp(opt->filelist[i].reference, "\0") != 0) 520 ml_dump_list("reference", i, opt->filelist[i].reference); 521 } 522 ml_dump_end; 523 } 524 525 int 526 test_inference_setup(struct ml_test *test, struct ml_options *opt) 527 { 528 struct test_inference *t; 529 void *test_inference; 530 uint32_t lcore_id; 531 int ret = 0; 532 uint32_t i; 533 534 test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference), 535 RTE_CACHE_LINE_SIZE, opt->socket_id); 536 if (test_inference == NULL) { 537 ml_err("failed to allocate memory for test_model"); 538 ret = -ENOMEM; 539 goto error; 540 } 541 test->test_priv = test_inference; 542 t = ml_test_priv(test); 543 544 t->nb_used = 0; 545 t->nb_valid = 0; 546 t->cmn.result = ML_TEST_FAILED; 547 t->cmn.opt = opt; 548 memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t)); 549 550 /* get device info */ 551 ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info); 552 if (ret < 0) { 553 ml_err("failed to get device info"); 554 goto error; 555 } 556 557 if (opt->burst_size == 1) { 558 t->enqueue = ml_enqueue_single; 559 t->dequeue = ml_dequeue_single; 560 } else { 561 t->enqueue = ml_enqueue_burst; 562 t->dequeue = ml_dequeue_burst; 563 } 564 565 /* set model initial state */ 566 for (i = 0; i < opt->nb_filelist; i++) 567 t->model[i].state = MODEL_INITIAL; 568 569 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 570 t->args[lcore_id].enq_ops = rte_zmalloc_socket( 571 "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 572 RTE_CACHE_LINE_SIZE, opt->socket_id); 573 t->args[lcore_id].deq_ops = rte_zmalloc_socket( 574 "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 575 RTE_CACHE_LINE_SIZE, opt->socket_id); 576 t->args[lcore_id].reqs = rte_zmalloc_socket( 577 "ml_test_requests", opt->burst_size * sizeof(struct ml_request *), 578 RTE_CACHE_LINE_SIZE, opt->socket_id); 579 } 580 581 for (i = 0; i < RTE_MAX_LCORE; i++) { 582 t->args[i].start_cycles = 0; 583 t->args[i].end_cycles = 0; 584 } 585 586 return 0; 587 588 error: 589 rte_free(test_inference); 590 591 return ret; 592 } 593 594 void 595 test_inference_destroy(struct ml_test *test, struct ml_options *opt) 596 { 597 struct test_inference *t; 598 uint32_t lcore_id; 599 600 RTE_SET_USED(opt); 601 602 t = ml_test_priv(test); 603 604 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 605 rte_free(t->args[lcore_id].enq_ops); 606 rte_free(t->args[lcore_id].deq_ops); 607 rte_free(t->args[lcore_id].reqs); 608 } 609 610 rte_free(t); 611 } 612 613 int 614 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt) 615 { 616 struct rte_ml_dev_qp_conf qp_conf; 617 struct test_inference *t; 618 uint16_t qp_id; 619 int ret; 620 621 t = ml_test_priv(test); 622 623 RTE_SET_USED(t); 624 625 ret = ml_test_device_configure(test, opt); 626 if (ret != 0) 627 return ret; 628 629 /* setup queue pairs */ 630 qp_conf.nb_desc = opt->queue_size; 631 qp_conf.cb = NULL; 632 633 for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) { 634 qp_conf.nb_desc = opt->queue_size; 635 qp_conf.cb = NULL; 636 637 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id); 638 if (ret != 0) { 639 ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n", 640 opt->dev_id, qp_id); 641 return ret; 642 } 643 } 644 645 ret = ml_test_device_start(test, opt); 646 if (ret != 0) 647 goto error; 648 649 return 0; 650 651 error: 652 ml_test_device_close(test, opt); 653 654 return ret; 655 } 656 657 int 658 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt) 659 { 660 int ret; 661 662 ret = ml_test_device_stop(test, opt); 663 if (ret != 0) 664 goto error; 665 666 ret = ml_test_device_close(test, opt); 667 if (ret != 0) 668 return ret; 669 670 return 0; 671 672 error: 673 ml_test_device_close(test, opt); 674 675 return ret; 676 } 677 678 /* Callback for IO pool create. This function would compute the fields of ml_request 679 * structure and prepare the quantized input data. 680 */ 681 static void 682 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 683 { 684 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 685 struct ml_request *req = (struct ml_request *)obj; 686 struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE]; 687 struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE]; 688 struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE]; 689 struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE]; 690 uint64_t offset; 691 uint64_t bufsz; 692 uint32_t i; 693 694 RTE_SET_USED(mp); 695 RTE_SET_USED(obj_idx); 696 697 req->input = (uint8_t *)obj + 698 RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size); 699 req->output = 700 req->input + RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.align_size); 701 req->niters = 0; 702 703 if (t->model[t->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) { 704 dbuff_seg[0].addr = t->model[t->fid].input; 705 dbuff_seg[0].iova_addr = rte_mem_virt2iova(t->model[t->fid].input); 706 dbuff_seg[0].length = t->model[t->fid].inp_dsize; 707 dbuff_seg[0].next = NULL; 708 d_segs[0] = &dbuff_seg[0]; 709 710 qbuff_seg[0].addr = req->input; 711 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->input); 712 qbuff_seg[0].length = t->model[t->fid].inp_qsize; 713 qbuff_seg[0].next = NULL; 714 q_segs[0] = &qbuff_seg[0]; 715 } else { 716 offset = 0; 717 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) { 718 bufsz = t->model[t->fid].info.input_info[i].nb_elements * sizeof(float); 719 dbuff_seg[i].addr = t->model[t->fid].input + offset; 720 dbuff_seg[i].iova_addr = rte_mem_virt2iova(t->model[t->fid].input + offset); 721 dbuff_seg[i].length = bufsz; 722 dbuff_seg[i].next = NULL; 723 d_segs[i] = &dbuff_seg[i]; 724 offset += bufsz; 725 } 726 727 offset = 0; 728 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) { 729 bufsz = RTE_ALIGN_CEIL(t->model[t->fid].info.input_info[i].size, 730 t->cmn.dev_info.align_size); 731 qbuff_seg[i].addr = req->input + offset; 732 qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->input + offset); 733 qbuff_seg[i].length = bufsz; 734 qbuff_seg[i].next = NULL; 735 q_segs[i] = &qbuff_seg[i]; 736 offset += bufsz; 737 } 738 } 739 740 /* quantize data */ 741 rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, d_segs, q_segs); 742 } 743 744 int 745 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid) 746 { 747 struct test_inference *t = ml_test_priv(test); 748 char mz_name[RTE_MEMZONE_NAMESIZE]; 749 char mp_name[RTE_MEMPOOL_NAMESIZE]; 750 const struct rte_memzone *mz; 751 uint64_t nb_buffers; 752 char *buffer = NULL; 753 uint32_t buff_size; 754 uint32_t mz_size; 755 size_t fsize; 756 uint32_t i; 757 int ret; 758 759 /* get input buffer size */ 760 t->model[fid].inp_qsize = 0; 761 for (i = 0; i < t->model[fid].info.nb_inputs; i++) { 762 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) 763 t->model[fid].inp_qsize += t->model[fid].info.input_info[i].size; 764 else 765 t->model[fid].inp_qsize += RTE_ALIGN_CEIL( 766 t->model[fid].info.input_info[i].size, t->cmn.dev_info.align_size); 767 } 768 769 /* get output buffer size */ 770 t->model[fid].out_qsize = 0; 771 for (i = 0; i < t->model[fid].info.nb_outputs; i++) { 772 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) 773 t->model[fid].out_qsize += t->model[fid].info.output_info[i].size; 774 else 775 t->model[fid].out_qsize += RTE_ALIGN_CEIL( 776 t->model[fid].info.output_info[i].size, t->cmn.dev_info.align_size); 777 } 778 779 t->model[fid].inp_dsize = 0; 780 for (i = 0; i < t->model[fid].info.nb_inputs; i++) { 781 if (opt->quantized_io) 782 t->model[fid].inp_dsize += t->model[fid].info.input_info[i].size; 783 else 784 t->model[fid].inp_dsize += 785 t->model[fid].info.input_info[i].nb_elements * sizeof(float); 786 } 787 788 t->model[fid].out_dsize = 0; 789 for (i = 0; i < t->model[fid].info.nb_outputs; i++) { 790 if (opt->quantized_io) 791 t->model[fid].out_dsize += t->model[fid].info.output_info[i].size; 792 else 793 t->model[fid].out_dsize += 794 t->model[fid].info.output_info[i].nb_elements * sizeof(float); 795 } 796 797 /* allocate buffer for user data */ 798 mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize; 799 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 800 mz_size += t->model[fid].out_dsize; 801 802 sprintf(mz_name, "ml_user_data_%d", fid); 803 mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0); 804 if (mz == NULL) { 805 ml_err("Memzone allocation failed for ml_user_data\n"); 806 ret = -ENOMEM; 807 goto error; 808 } 809 810 t->model[fid].input = mz->addr; 811 t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize; 812 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 813 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize; 814 else 815 t->model[fid].reference = NULL; 816 817 /* load input file */ 818 ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer); 819 if (ret != 0) 820 goto error; 821 822 if (fsize == t->model[fid].inp_dsize) { 823 rte_memcpy(t->model[fid].input, buffer, fsize); 824 free(buffer); 825 } else { 826 ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize, 827 t->model[fid].inp_dsize); 828 ret = -EINVAL; 829 free(buffer); 830 goto error; 831 } 832 833 /* load reference file */ 834 buffer = NULL; 835 if (t->model[fid].reference != NULL) { 836 ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer); 837 if (ret != 0) 838 goto error; 839 840 if (fsize == t->model[fid].out_dsize) { 841 rte_memcpy(t->model[fid].reference, buffer, fsize); 842 free(buffer); 843 } else { 844 ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n", 845 fsize, t->model[fid].out_dsize); 846 ret = -EINVAL; 847 free(buffer); 848 goto error; 849 } 850 } 851 852 /* create mempool for quantized input and output buffers. ml_request_initialize is 853 * used as a callback for object creation. 854 */ 855 buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size) + 856 RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.align_size) + 857 RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.align_size); 858 nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions); 859 860 t->fid = fid; 861 sprintf(mp_name, "ml_io_pool_%d", fid); 862 t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL, 863 ml_request_initialize, test, opt->socket_id, 0); 864 if (t->model[fid].io_pool == NULL) { 865 ml_err("Failed to create io pool : %s\n", "ml_io_pool"); 866 ret = -ENOMEM; 867 goto error; 868 } 869 870 return 0; 871 872 error: 873 if (mz != NULL) 874 rte_memzone_free(mz); 875 876 if (t->model[fid].io_pool != NULL) { 877 rte_mempool_free(t->model[fid].io_pool); 878 t->model[fid].io_pool = NULL; 879 } 880 881 return ret; 882 } 883 884 void 885 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid) 886 { 887 char mz_name[RTE_MEMZONE_NAMESIZE]; 888 char mp_name[RTE_MEMPOOL_NAMESIZE]; 889 const struct rte_memzone *mz; 890 struct rte_mempool *mp; 891 892 RTE_SET_USED(test); 893 RTE_SET_USED(opt); 894 895 /* release user data memzone */ 896 sprintf(mz_name, "ml_user_data_%d", fid); 897 mz = rte_memzone_lookup(mz_name); 898 if (mz != NULL) 899 rte_memzone_free(mz); 900 901 /* destroy io pool */ 902 sprintf(mp_name, "ml_io_pool_%d", fid); 903 mp = rte_mempool_lookup(mp_name); 904 rte_mempool_free(mp); 905 } 906 907 int 908 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt) 909 { 910 struct test_inference *t = ml_test_priv(test); 911 912 /* create op pool */ 913 t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0, 914 opt->socket_id); 915 if (t->op_pool == NULL) { 916 ml_err("Failed to create op pool : %s\n", "ml_op_pool"); 917 return -ENOMEM; 918 } 919 920 /* create buf_segs pool of with element of uint8_t. external buffers are attached to the 921 * buf_segs while queuing inference requests. 922 */ 923 t->buf_seg_pool = rte_mempool_create("ml_test_mbuf_pool", ML_TEST_MAX_POOL_SIZE * 2, 924 sizeof(struct rte_ml_buff_seg), 0, 0, NULL, NULL, NULL, 925 NULL, opt->socket_id, 0); 926 if (t->buf_seg_pool == NULL) { 927 ml_err("Failed to create buf_segs pool : %s\n", "ml_test_mbuf_pool"); 928 rte_ml_op_pool_free(t->op_pool); 929 return -ENOMEM; 930 } 931 932 return 0; 933 } 934 935 void 936 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt) 937 { 938 struct test_inference *t = ml_test_priv(test); 939 940 RTE_SET_USED(opt); 941 942 /* release op pool */ 943 rte_mempool_free(t->op_pool); 944 945 /* release buf_segs pool */ 946 rte_mempool_free(t->buf_seg_pool); 947 } 948 949 static bool 950 ml_inference_validation(struct ml_test *test, struct ml_request *req) 951 { 952 struct test_inference *t = ml_test_priv((struct ml_test *)test); 953 struct ml_model *model; 954 float *reference; 955 float *output; 956 float deviation; 957 bool match; 958 uint32_t i; 959 uint32_t j; 960 961 model = &t->model[req->fid]; 962 963 /* compare crc when tolerance is 0 */ 964 if (t->cmn.opt->tolerance == 0.0) { 965 match = (rte_hash_crc(model->output, model->out_dsize, 0) == 966 rte_hash_crc(model->reference, model->out_dsize, 0)); 967 } else { 968 output = (float *)model->output; 969 reference = (float *)model->reference; 970 971 i = 0; 972 next_output: 973 j = 0; 974 next_element: 975 match = false; 976 if ((*reference == 0) && (*output == 0)) 977 deviation = 0; 978 else 979 deviation = 100 * fabs(*output - *reference) / fabs(*reference); 980 if (deviation <= t->cmn.opt->tolerance) 981 match = true; 982 else 983 ml_err("id = %d, element = %d, output = %f, reference = %f, deviation = %f %%\n", 984 i, j, *output, *reference, deviation); 985 986 output++; 987 reference++; 988 989 if (!match) 990 goto done; 991 992 j++; 993 if (j < model->info.output_info[i].nb_elements) 994 goto next_element; 995 996 i++; 997 if (i < model->info.nb_outputs) 998 goto next_output; 999 } 1000 done: 1001 return match; 1002 } 1003 1004 /* Callback for mempool object iteration. This call would dequantize output data. */ 1005 static void 1006 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 1007 { 1008 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 1009 struct ml_request *req = (struct ml_request *)obj; 1010 struct ml_model *model = &t->model[req->fid]; 1011 bool error = false; 1012 char *dump_path; 1013 1014 struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE]; 1015 struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE]; 1016 struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE]; 1017 struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE]; 1018 uint64_t offset; 1019 uint64_t bufsz; 1020 uint32_t i; 1021 1022 RTE_SET_USED(mp); 1023 1024 if (req->niters == 0) 1025 return; 1026 1027 t->nb_used++; 1028 1029 if (t->model[req->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) { 1030 qbuff_seg[0].addr = req->output; 1031 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->output); 1032 qbuff_seg[0].length = t->model[req->fid].out_qsize; 1033 qbuff_seg[0].next = NULL; 1034 q_segs[0] = &qbuff_seg[0]; 1035 1036 dbuff_seg[0].addr = model->output; 1037 dbuff_seg[0].iova_addr = rte_mem_virt2iova(model->output); 1038 dbuff_seg[0].length = t->model[req->fid].out_dsize; 1039 dbuff_seg[0].next = NULL; 1040 d_segs[0] = &dbuff_seg[0]; 1041 } else { 1042 offset = 0; 1043 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) { 1044 bufsz = RTE_ALIGN_CEIL(t->model[req->fid].info.output_info[i].size, 1045 t->cmn.dev_info.align_size); 1046 qbuff_seg[i].addr = req->output + offset; 1047 qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->output + offset); 1048 qbuff_seg[i].length = bufsz; 1049 qbuff_seg[i].next = NULL; 1050 q_segs[i] = &qbuff_seg[i]; 1051 offset += bufsz; 1052 } 1053 1054 offset = 0; 1055 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) { 1056 bufsz = t->model[req->fid].info.output_info[i].nb_elements * sizeof(float); 1057 dbuff_seg[i].addr = model->output + offset; 1058 dbuff_seg[i].iova_addr = rte_mem_virt2iova(model->output + offset); 1059 dbuff_seg[i].length = bufsz; 1060 dbuff_seg[i].next = NULL; 1061 d_segs[i] = &dbuff_seg[i]; 1062 offset += bufsz; 1063 } 1064 } 1065 1066 rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, q_segs, d_segs); 1067 1068 if (model->reference == NULL) 1069 goto dump_output_pass; 1070 1071 if (!ml_inference_validation(opaque, req)) 1072 goto dump_output_fail; 1073 else 1074 goto dump_output_pass; 1075 1076 dump_output_pass: 1077 if (obj_idx == 0) { 1078 /* write quantized output */ 1079 if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1) 1080 return; 1081 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 1082 free(dump_path); 1083 if (error) 1084 return; 1085 1086 /* write dequantized output */ 1087 if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1) 1088 return; 1089 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 1090 free(dump_path); 1091 if (error) 1092 return; 1093 } 1094 t->nb_valid++; 1095 1096 return; 1097 1098 dump_output_fail: 1099 if (t->cmn.opt->debug) { 1100 /* dump quantized output buffer */ 1101 if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output, 1102 obj_idx) == -1) 1103 return; 1104 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 1105 free(dump_path); 1106 if (error) 1107 return; 1108 1109 /* dump dequantized output buffer */ 1110 if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) == 1111 -1) 1112 return; 1113 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 1114 free(dump_path); 1115 if (error) 1116 return; 1117 } 1118 } 1119 1120 int 1121 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid) 1122 { 1123 struct test_inference *t = ml_test_priv(test); 1124 uint64_t error_count = 0; 1125 uint32_t i; 1126 1127 RTE_SET_USED(opt); 1128 1129 /* check for errors */ 1130 for (i = 0; i < RTE_MAX_LCORE; i++) 1131 error_count += t->error_count[i]; 1132 1133 rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test); 1134 1135 if ((t->nb_used == t->nb_valid) && (error_count == 0)) 1136 t->cmn.result = ML_TEST_SUCCESS; 1137 else 1138 t->cmn.result = ML_TEST_FAILED; 1139 1140 return t->cmn.result; 1141 } 1142 1143 int 1144 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid, 1145 uint16_t end_fid) 1146 { 1147 struct test_inference *t = ml_test_priv(test); 1148 uint32_t lcore_id; 1149 uint32_t nb_reqs; 1150 uint32_t id = 0; 1151 uint32_t qp_id; 1152 1153 nb_reqs = opt->repetitions / opt->queue_pairs; 1154 1155 RTE_LCORE_FOREACH_WORKER(lcore_id) 1156 { 1157 if (id >= opt->queue_pairs * 2) 1158 break; 1159 1160 qp_id = id / 2; 1161 t->args[lcore_id].qp_id = qp_id; 1162 t->args[lcore_id].nb_reqs = nb_reqs; 1163 if (qp_id == 0) 1164 t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs; 1165 1166 if (t->args[lcore_id].nb_reqs == 0) { 1167 id++; 1168 break; 1169 } 1170 1171 t->args[lcore_id].start_fid = start_fid; 1172 t->args[lcore_id].end_fid = end_fid; 1173 1174 if (id % 2 == 0) 1175 rte_eal_remote_launch(t->enqueue, test, lcore_id); 1176 else 1177 rte_eal_remote_launch(t->dequeue, test, lcore_id); 1178 1179 id++; 1180 } 1181 1182 return 0; 1183 } 1184