1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include <errno.h> 6 #include <stdio.h> 7 #include <unistd.h> 8 9 #include <rte_common.h> 10 #include <rte_cycles.h> 11 #include <rte_hash_crc.h> 12 #include <rte_launch.h> 13 #include <rte_lcore.h> 14 #include <rte_malloc.h> 15 #include <rte_memzone.h> 16 #include <rte_mldev.h> 17 18 #include "ml_common.h" 19 #include "test_inference_common.h" 20 21 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer)) 22 23 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance) \ 24 (((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0)) 25 26 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \ 27 do { \ 28 FILE *fp = fopen(name, "w+"); \ 29 if (fp == NULL) { \ 30 ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \ 31 err = true; \ 32 } else { \ 33 if (fwrite(buffer, 1, size, fp) != size) { \ 34 ml_err("Error writing output, file: %s, error: %s", name, \ 35 strerror(errno)); \ 36 err = true; \ 37 } \ 38 fclose(fp); \ 39 } \ 40 } while (0) 41 42 /* Enqueue inference requests with burst size equal to 1 */ 43 static int 44 ml_enqueue_single(void *arg) 45 { 46 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 47 struct ml_request *req = NULL; 48 struct rte_ml_op *op = NULL; 49 struct ml_core_args *args; 50 uint64_t model_enq = 0; 51 uint64_t start_cycle; 52 uint32_t burst_enq; 53 uint32_t lcore_id; 54 uint16_t fid; 55 int ret; 56 57 lcore_id = rte_lcore_id(); 58 args = &t->args[lcore_id]; 59 args->start_cycles = 0; 60 model_enq = 0; 61 62 if (args->nb_reqs == 0) 63 return 0; 64 65 next_rep: 66 fid = args->start_fid; 67 68 next_model: 69 ret = rte_mempool_get(t->op_pool, (void **)&op); 70 if (ret != 0) 71 goto next_model; 72 73 retry: 74 ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req); 75 if (ret != 0) 76 goto retry; 77 78 op->model_id = t->model[fid].id; 79 op->nb_batches = t->model[fid].nb_batches; 80 op->mempool = t->op_pool; 81 82 op->input.addr = req->input; 83 op->input.length = t->model[fid].inp_qsize; 84 op->input.next = NULL; 85 86 op->output.addr = req->output; 87 op->output.length = t->model[fid].out_qsize; 88 op->output.next = NULL; 89 90 op->user_ptr = req; 91 req->niters++; 92 req->fid = fid; 93 94 enqueue_req: 95 start_cycle = rte_get_tsc_cycles(); 96 burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 97 if (burst_enq == 0) 98 goto enqueue_req; 99 100 args->start_cycles += start_cycle; 101 fid++; 102 if (likely(fid <= args->end_fid)) 103 goto next_model; 104 105 model_enq++; 106 if (likely(model_enq < args->nb_reqs)) 107 goto next_rep; 108 109 return 0; 110 } 111 112 /* Dequeue inference requests with burst size equal to 1 */ 113 static int 114 ml_dequeue_single(void *arg) 115 { 116 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 117 struct rte_ml_op_error error; 118 struct rte_ml_op *op = NULL; 119 struct ml_core_args *args; 120 struct ml_request *req; 121 uint64_t total_deq = 0; 122 uint8_t nb_filelist; 123 uint32_t burst_deq; 124 uint64_t end_cycle; 125 uint32_t lcore_id; 126 127 lcore_id = rte_lcore_id(); 128 args = &t->args[lcore_id]; 129 args->end_cycles = 0; 130 nb_filelist = args->end_fid - args->start_fid + 1; 131 132 if (args->nb_reqs == 0) 133 return 0; 134 135 dequeue_req: 136 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 137 end_cycle = rte_get_tsc_cycles(); 138 139 if (likely(burst_deq == 1)) { 140 total_deq += burst_deq; 141 args->end_cycles += end_cycle; 142 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) { 143 rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error); 144 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode, 145 error.message); 146 t->error_count[lcore_id]++; 147 } 148 req = (struct ml_request *)op->user_ptr; 149 rte_mempool_put(t->model[req->fid].io_pool, req); 150 rte_mempool_put(t->op_pool, op); 151 } 152 153 if (likely(total_deq < args->nb_reqs * nb_filelist)) 154 goto dequeue_req; 155 156 return 0; 157 } 158 159 /* Enqueue inference requests with burst size greater than 1 */ 160 static int 161 ml_enqueue_burst(void *arg) 162 { 163 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 164 struct ml_core_args *args; 165 uint64_t start_cycle; 166 uint16_t ops_count; 167 uint64_t model_enq; 168 uint16_t burst_enq; 169 uint32_t lcore_id; 170 uint16_t pending; 171 uint16_t idx; 172 uint16_t fid; 173 uint16_t i; 174 int ret; 175 176 lcore_id = rte_lcore_id(); 177 args = &t->args[lcore_id]; 178 args->start_cycles = 0; 179 model_enq = 0; 180 181 if (args->nb_reqs == 0) 182 return 0; 183 184 next_rep: 185 fid = args->start_fid; 186 187 next_model: 188 ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq); 189 ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count); 190 if (ret != 0) 191 goto next_model; 192 193 retry: 194 ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count); 195 if (ret != 0) 196 goto retry; 197 198 for (i = 0; i < ops_count; i++) { 199 args->enq_ops[i]->model_id = t->model[fid].id; 200 args->enq_ops[i]->nb_batches = t->model[fid].nb_batches; 201 args->enq_ops[i]->mempool = t->op_pool; 202 203 args->enq_ops[i]->input.addr = args->reqs[i]->input; 204 args->enq_ops[i]->input.length = t->model[fid].inp_qsize; 205 args->enq_ops[i]->input.next = NULL; 206 207 args->enq_ops[i]->output.addr = args->reqs[i]->output; 208 args->enq_ops[i]->output.length = t->model[fid].out_qsize; 209 args->enq_ops[i]->output.next = NULL; 210 211 args->enq_ops[i]->user_ptr = args->reqs[i]; 212 args->reqs[i]->niters++; 213 args->reqs[i]->fid = fid; 214 } 215 216 idx = 0; 217 pending = ops_count; 218 219 enqueue_reqs: 220 start_cycle = rte_get_tsc_cycles(); 221 burst_enq = 222 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending); 223 args->start_cycles += burst_enq * start_cycle; 224 pending = pending - burst_enq; 225 226 if (pending > 0) { 227 idx = idx + burst_enq; 228 goto enqueue_reqs; 229 } 230 231 fid++; 232 if (fid <= args->end_fid) 233 goto next_model; 234 235 model_enq = model_enq + ops_count; 236 if (model_enq < args->nb_reqs) 237 goto next_rep; 238 239 return 0; 240 } 241 242 /* Dequeue inference requests with burst size greater than 1 */ 243 static int 244 ml_dequeue_burst(void *arg) 245 { 246 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 247 struct rte_ml_op_error error; 248 struct ml_core_args *args; 249 struct ml_request *req; 250 uint64_t total_deq = 0; 251 uint16_t burst_deq = 0; 252 uint8_t nb_filelist; 253 uint64_t end_cycle; 254 uint32_t lcore_id; 255 uint32_t i; 256 257 lcore_id = rte_lcore_id(); 258 args = &t->args[lcore_id]; 259 args->end_cycles = 0; 260 nb_filelist = args->end_fid - args->start_fid + 1; 261 262 if (args->nb_reqs == 0) 263 return 0; 264 265 dequeue_burst: 266 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops, 267 t->cmn.opt->burst_size); 268 end_cycle = rte_get_tsc_cycles(); 269 270 if (likely(burst_deq > 0)) { 271 total_deq += burst_deq; 272 args->end_cycles += burst_deq * end_cycle; 273 274 for (i = 0; i < burst_deq; i++) { 275 if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) { 276 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error); 277 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", 278 error.errcode, error.message); 279 t->error_count[lcore_id]++; 280 } 281 req = (struct ml_request *)args->deq_ops[i]->user_ptr; 282 if (req != NULL) 283 rte_mempool_put(t->model[req->fid].io_pool, req); 284 } 285 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq); 286 } 287 288 if (total_deq < args->nb_reqs * nb_filelist) 289 goto dequeue_burst; 290 291 return 0; 292 } 293 294 bool 295 test_inference_cap_check(struct ml_options *opt) 296 { 297 struct rte_ml_dev_info dev_info; 298 299 if (!ml_test_cap_check(opt)) 300 return false; 301 302 rte_ml_dev_info_get(opt->dev_id, &dev_info); 303 304 if (opt->queue_pairs > dev_info.max_queue_pairs) { 305 ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)", 306 opt->queue_pairs, dev_info.max_queue_pairs); 307 return false; 308 } 309 310 if (opt->queue_size > dev_info.max_desc) { 311 ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)", 312 opt->queue_size, dev_info.max_desc); 313 return false; 314 } 315 316 if (opt->nb_filelist > dev_info.max_models) { 317 ml_err("Insufficient capabilities: Filelist count exceeded device limit, count = %u > (max limit = %u)", 318 opt->nb_filelist, dev_info.max_models); 319 return false; 320 } 321 322 return true; 323 } 324 325 int 326 test_inference_opt_check(struct ml_options *opt) 327 { 328 uint32_t i; 329 int ret; 330 331 /* check common opts */ 332 ret = ml_test_opt_check(opt); 333 if (ret != 0) 334 return ret; 335 336 /* check for at least one filelist */ 337 if (opt->nb_filelist == 0) { 338 ml_err("Filelist empty, need at least one filelist to run the test\n"); 339 return -EINVAL; 340 } 341 342 /* check file availability */ 343 for (i = 0; i < opt->nb_filelist; i++) { 344 if (access(opt->filelist[i].model, F_OK) == -1) { 345 ml_err("Model file not accessible: id = %u, file = %s", i, 346 opt->filelist[i].model); 347 return -ENOENT; 348 } 349 350 if (access(opt->filelist[i].input, F_OK) == -1) { 351 ml_err("Input file not accessible: id = %u, file = %s", i, 352 opt->filelist[i].input); 353 return -ENOENT; 354 } 355 } 356 357 if (opt->repetitions == 0) { 358 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions); 359 return -EINVAL; 360 } 361 362 if (opt->burst_size == 0) { 363 ml_err("Invalid option, burst_size = %u\n", opt->burst_size); 364 return -EINVAL; 365 } 366 367 if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) { 368 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size, 369 ML_TEST_MAX_POOL_SIZE); 370 return -EINVAL; 371 } 372 373 if (opt->queue_pairs == 0) { 374 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs); 375 return -EINVAL; 376 } 377 378 if (opt->queue_size == 0) { 379 ml_err("Invalid option, queue_size = %u\n", opt->queue_size); 380 return -EINVAL; 381 } 382 383 /* check number of available lcores. */ 384 if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) { 385 ml_err("Insufficient lcores = %u\n", rte_lcore_count()); 386 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs, 387 (opt->queue_pairs * 2 + 1)); 388 return -EINVAL; 389 } 390 391 return 0; 392 } 393 394 void 395 test_inference_opt_dump(struct ml_options *opt) 396 { 397 uint32_t i; 398 399 /* dump common opts */ 400 ml_test_opt_dump(opt); 401 402 /* dump test opts */ 403 ml_dump("repetitions", "%" PRIu64, opt->repetitions); 404 ml_dump("burst_size", "%u", opt->burst_size); 405 ml_dump("queue_pairs", "%u", opt->queue_pairs); 406 ml_dump("queue_size", "%u", opt->queue_size); 407 ml_dump("tolerance", "%-7.3f", opt->tolerance); 408 ml_dump("stats", "%s", (opt->stats ? "true" : "false")); 409 410 if (opt->batches == 0) 411 ml_dump("batches", "%u (default batch size)", opt->batches); 412 else 413 ml_dump("batches", "%u", opt->batches); 414 415 ml_dump_begin("filelist"); 416 for (i = 0; i < opt->nb_filelist; i++) { 417 ml_dump_list("model", i, opt->filelist[i].model); 418 ml_dump_list("input", i, opt->filelist[i].input); 419 ml_dump_list("output", i, opt->filelist[i].output); 420 if (strcmp(opt->filelist[i].reference, "\0") != 0) 421 ml_dump_list("reference", i, opt->filelist[i].reference); 422 } 423 ml_dump_end; 424 } 425 426 int 427 test_inference_setup(struct ml_test *test, struct ml_options *opt) 428 { 429 struct test_inference *t; 430 void *test_inference; 431 uint32_t lcore_id; 432 int ret = 0; 433 uint32_t i; 434 435 test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference), 436 RTE_CACHE_LINE_SIZE, opt->socket_id); 437 if (test_inference == NULL) { 438 ml_err("failed to allocate memory for test_model"); 439 ret = -ENOMEM; 440 goto error; 441 } 442 test->test_priv = test_inference; 443 t = ml_test_priv(test); 444 445 t->nb_used = 0; 446 t->nb_valid = 0; 447 t->cmn.result = ML_TEST_FAILED; 448 t->cmn.opt = opt; 449 memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t)); 450 451 /* get device info */ 452 ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info); 453 if (ret < 0) { 454 ml_err("failed to get device info"); 455 goto error; 456 } 457 458 if (opt->burst_size == 1) { 459 t->enqueue = ml_enqueue_single; 460 t->dequeue = ml_dequeue_single; 461 } else { 462 t->enqueue = ml_enqueue_burst; 463 t->dequeue = ml_dequeue_burst; 464 } 465 466 /* set model initial state */ 467 for (i = 0; i < opt->nb_filelist; i++) 468 t->model[i].state = MODEL_INITIAL; 469 470 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 471 t->args[lcore_id].enq_ops = rte_zmalloc_socket( 472 "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 473 RTE_CACHE_LINE_SIZE, opt->socket_id); 474 t->args[lcore_id].deq_ops = rte_zmalloc_socket( 475 "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 476 RTE_CACHE_LINE_SIZE, opt->socket_id); 477 t->args[lcore_id].reqs = rte_zmalloc_socket( 478 "ml_test_requests", opt->burst_size * sizeof(struct ml_request *), 479 RTE_CACHE_LINE_SIZE, opt->socket_id); 480 } 481 482 for (i = 0; i < RTE_MAX_LCORE; i++) { 483 t->args[i].start_cycles = 0; 484 t->args[i].end_cycles = 0; 485 } 486 487 return 0; 488 489 error: 490 rte_free(test_inference); 491 492 return ret; 493 } 494 495 void 496 test_inference_destroy(struct ml_test *test, struct ml_options *opt) 497 { 498 struct test_inference *t; 499 500 RTE_SET_USED(opt); 501 502 t = ml_test_priv(test); 503 rte_free(t); 504 } 505 506 int 507 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt) 508 { 509 struct rte_ml_dev_qp_conf qp_conf; 510 struct test_inference *t; 511 uint16_t qp_id; 512 int ret; 513 514 t = ml_test_priv(test); 515 516 RTE_SET_USED(t); 517 518 ret = ml_test_device_configure(test, opt); 519 if (ret != 0) 520 return ret; 521 522 /* setup queue pairs */ 523 qp_conf.nb_desc = opt->queue_size; 524 qp_conf.cb = NULL; 525 526 for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) { 527 qp_conf.nb_desc = opt->queue_size; 528 qp_conf.cb = NULL; 529 530 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id); 531 if (ret != 0) { 532 ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n", 533 opt->dev_id, qp_id); 534 return ret; 535 } 536 } 537 538 ret = ml_test_device_start(test, opt); 539 if (ret != 0) 540 goto error; 541 542 return 0; 543 544 error: 545 ml_test_device_close(test, opt); 546 547 return ret; 548 } 549 550 int 551 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt) 552 { 553 int ret; 554 555 ret = ml_test_device_stop(test, opt); 556 if (ret != 0) 557 goto error; 558 559 ret = ml_test_device_close(test, opt); 560 if (ret != 0) 561 return ret; 562 563 return 0; 564 565 error: 566 ml_test_device_close(test, opt); 567 568 return ret; 569 } 570 571 /* Callback for IO pool create. This function would compute the fields of ml_request 572 * structure and prepare the quantized input data. 573 */ 574 static void 575 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 576 { 577 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 578 struct ml_request *req = (struct ml_request *)obj; 579 580 RTE_SET_USED(mp); 581 RTE_SET_USED(obj_idx); 582 583 req->input = (uint8_t *)obj + 584 RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size); 585 req->output = req->input + 586 RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size); 587 req->niters = 0; 588 589 /* quantize data */ 590 rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches, 591 t->model[t->fid].input, req->input); 592 } 593 594 int 595 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid) 596 { 597 struct test_inference *t = ml_test_priv(test); 598 char mz_name[RTE_MEMZONE_NAMESIZE]; 599 char mp_name[RTE_MEMPOOL_NAMESIZE]; 600 const struct rte_memzone *mz; 601 uint64_t nb_buffers; 602 char *buffer = NULL; 603 uint32_t buff_size; 604 uint32_t mz_size; 605 size_t fsize; 606 int ret; 607 608 /* get input buffer size */ 609 ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 610 &t->model[fid].inp_qsize, &t->model[fid].inp_dsize); 611 if (ret != 0) { 612 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 613 return ret; 614 } 615 616 /* get output buffer size */ 617 ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 618 &t->model[fid].out_qsize, &t->model[fid].out_dsize); 619 if (ret != 0) { 620 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 621 return ret; 622 } 623 624 /* allocate buffer for user data */ 625 mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize; 626 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 627 mz_size += t->model[fid].out_dsize; 628 629 sprintf(mz_name, "ml_user_data_%d", fid); 630 mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0); 631 if (mz == NULL) { 632 ml_err("Memzone allocation failed for ml_user_data\n"); 633 ret = -ENOMEM; 634 goto error; 635 } 636 637 t->model[fid].input = mz->addr; 638 t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize; 639 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 640 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize; 641 else 642 t->model[fid].reference = NULL; 643 644 /* load input file */ 645 ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer); 646 if (ret != 0) 647 goto error; 648 649 if (fsize == t->model[fid].inp_dsize) { 650 rte_memcpy(t->model[fid].input, buffer, fsize); 651 free(buffer); 652 } else { 653 ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize, 654 t->model[fid].inp_dsize); 655 ret = -EINVAL; 656 goto error; 657 } 658 659 /* load reference file */ 660 buffer = NULL; 661 if (t->model[fid].reference != NULL) { 662 ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer); 663 if (ret != 0) 664 goto error; 665 666 if (fsize == t->model[fid].out_dsize) { 667 rte_memcpy(t->model[fid].reference, buffer, fsize); 668 free(buffer); 669 } else { 670 ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n", 671 fsize, t->model[fid].out_dsize); 672 ret = -EINVAL; 673 goto error; 674 } 675 } 676 677 /* create mempool for quantized input and output buffers. ml_request_initialize is 678 * used as a callback for object creation. 679 */ 680 buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) + 681 RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) + 682 RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size); 683 nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions); 684 685 t->fid = fid; 686 sprintf(mp_name, "ml_io_pool_%d", fid); 687 t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL, 688 ml_request_initialize, test, opt->socket_id, 0); 689 if (t->model[fid].io_pool == NULL) { 690 ml_err("Failed to create io pool : %s\n", "ml_io_pool"); 691 ret = -ENOMEM; 692 goto error; 693 } 694 695 return 0; 696 697 error: 698 if (mz != NULL) 699 rte_memzone_free(mz); 700 701 if (t->model[fid].io_pool != NULL) { 702 rte_mempool_free(t->model[fid].io_pool); 703 t->model[fid].io_pool = NULL; 704 } 705 706 free(buffer); 707 708 return ret; 709 } 710 711 void 712 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid) 713 { 714 char mz_name[RTE_MEMZONE_NAMESIZE]; 715 char mp_name[RTE_MEMPOOL_NAMESIZE]; 716 const struct rte_memzone *mz; 717 struct rte_mempool *mp; 718 719 RTE_SET_USED(test); 720 RTE_SET_USED(opt); 721 722 /* release user data memzone */ 723 sprintf(mz_name, "ml_user_data_%d", fid); 724 mz = rte_memzone_lookup(mz_name); 725 if (mz != NULL) 726 rte_memzone_free(mz); 727 728 /* destroy io pool */ 729 sprintf(mp_name, "ml_io_pool_%d", fid); 730 mp = rte_mempool_lookup(mp_name); 731 rte_mempool_free(mp); 732 } 733 734 int 735 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt) 736 { 737 struct test_inference *t = ml_test_priv(test); 738 739 /* create op pool */ 740 t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0, 741 opt->socket_id); 742 if (t->op_pool == NULL) { 743 ml_err("Failed to create op pool : %s\n", "ml_op_pool"); 744 return -ENOMEM; 745 } 746 747 return 0; 748 } 749 750 void 751 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt) 752 { 753 struct test_inference *t = ml_test_priv(test); 754 755 RTE_SET_USED(opt); 756 757 /* release op pool */ 758 rte_mempool_free(t->op_pool); 759 } 760 761 static bool 762 ml_inference_validation(struct ml_test *test, struct ml_request *req) 763 { 764 struct test_inference *t = ml_test_priv((struct ml_test *)test); 765 struct ml_model *model; 766 uint32_t nb_elements; 767 uint8_t *reference; 768 uint8_t *output; 769 bool match; 770 uint32_t i; 771 uint32_t j; 772 773 model = &t->model[req->fid]; 774 775 /* compare crc when tolerance is 0 */ 776 if (t->cmn.opt->tolerance == 0.0) { 777 match = (rte_hash_crc(model->output, model->out_dsize, 0) == 778 rte_hash_crc(model->reference, model->out_dsize, 0)); 779 } else { 780 output = model->output; 781 reference = model->reference; 782 783 i = 0; 784 next_output: 785 nb_elements = 786 model->info.output_info[i].shape.w * model->info.output_info[i].shape.x * 787 model->info.output_info[i].shape.y * model->info.output_info[i].shape.z; 788 j = 0; 789 next_element: 790 match = false; 791 switch (model->info.output_info[i].dtype) { 792 case RTE_ML_IO_TYPE_INT8: 793 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t), 794 ML_TEST_READ_TYPE(reference, int8_t), 795 t->cmn.opt->tolerance)) 796 match = true; 797 798 output += sizeof(int8_t); 799 reference += sizeof(int8_t); 800 break; 801 case RTE_ML_IO_TYPE_UINT8: 802 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t), 803 ML_TEST_READ_TYPE(reference, uint8_t), 804 t->cmn.opt->tolerance)) 805 match = true; 806 807 output += sizeof(float); 808 reference += sizeof(float); 809 break; 810 case RTE_ML_IO_TYPE_INT16: 811 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t), 812 ML_TEST_READ_TYPE(reference, int16_t), 813 t->cmn.opt->tolerance)) 814 match = true; 815 816 output += sizeof(int16_t); 817 reference += sizeof(int16_t); 818 break; 819 case RTE_ML_IO_TYPE_UINT16: 820 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t), 821 ML_TEST_READ_TYPE(reference, uint16_t), 822 t->cmn.opt->tolerance)) 823 match = true; 824 825 output += sizeof(uint16_t); 826 reference += sizeof(uint16_t); 827 break; 828 case RTE_ML_IO_TYPE_INT32: 829 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t), 830 ML_TEST_READ_TYPE(reference, int32_t), 831 t->cmn.opt->tolerance)) 832 match = true; 833 834 output += sizeof(int32_t); 835 reference += sizeof(int32_t); 836 break; 837 case RTE_ML_IO_TYPE_UINT32: 838 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t), 839 ML_TEST_READ_TYPE(reference, uint32_t), 840 t->cmn.opt->tolerance)) 841 match = true; 842 843 output += sizeof(uint32_t); 844 reference += sizeof(uint32_t); 845 break; 846 case RTE_ML_IO_TYPE_FP32: 847 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float), 848 ML_TEST_READ_TYPE(reference, float), 849 t->cmn.opt->tolerance)) 850 match = true; 851 852 output += sizeof(float); 853 reference += sizeof(float); 854 break; 855 default: /* other types, fp8, fp16, bfloat16 */ 856 match = true; 857 } 858 859 if (!match) 860 goto done; 861 j++; 862 if (j < nb_elements) 863 goto next_element; 864 865 i++; 866 if (i < model->info.nb_outputs) 867 goto next_output; 868 } 869 done: 870 return match; 871 } 872 873 /* Callback for mempool object iteration. This call would dequantize output data. */ 874 static void 875 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 876 { 877 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 878 struct ml_request *req = (struct ml_request *)obj; 879 struct ml_model *model = &t->model[req->fid]; 880 bool error = false; 881 char *dump_path; 882 883 RTE_SET_USED(mp); 884 885 if (req->niters == 0) 886 return; 887 888 t->nb_used++; 889 rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches, 890 req->output, model->output); 891 892 if (model->reference == NULL) 893 goto dump_output_pass; 894 895 if (!ml_inference_validation(opaque, req)) 896 goto dump_output_fail; 897 else 898 goto dump_output_pass; 899 900 dump_output_pass: 901 if (obj_idx == 0) { 902 /* write quantized output */ 903 if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1) 904 return; 905 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 906 free(dump_path); 907 if (error) 908 return; 909 910 /* write dequantized output */ 911 if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1) 912 return; 913 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 914 free(dump_path); 915 if (error) 916 return; 917 } 918 t->nb_valid++; 919 920 return; 921 922 dump_output_fail: 923 if (t->cmn.opt->debug) { 924 /* dump quantized output buffer */ 925 if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output, 926 obj_idx) == -1) 927 return; 928 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 929 free(dump_path); 930 if (error) 931 return; 932 933 /* dump dequantized output buffer */ 934 if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) == 935 -1) 936 return; 937 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 938 free(dump_path); 939 if (error) 940 return; 941 } 942 } 943 944 int 945 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid) 946 { 947 struct test_inference *t = ml_test_priv(test); 948 uint64_t error_count = 0; 949 uint32_t i; 950 951 RTE_SET_USED(opt); 952 953 /* check for errors */ 954 for (i = 0; i < RTE_MAX_LCORE; i++) 955 error_count += t->error_count[i]; 956 957 rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test); 958 959 if ((t->nb_used == t->nb_valid) && (error_count == 0)) 960 t->cmn.result = ML_TEST_SUCCESS; 961 else 962 t->cmn.result = ML_TEST_FAILED; 963 964 return t->cmn.result; 965 } 966 967 int 968 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid, 969 uint16_t end_fid) 970 { 971 struct test_inference *t = ml_test_priv(test); 972 uint32_t lcore_id; 973 uint32_t nb_reqs; 974 uint32_t id = 0; 975 uint32_t qp_id; 976 977 nb_reqs = opt->repetitions / opt->queue_pairs; 978 979 RTE_LCORE_FOREACH_WORKER(lcore_id) 980 { 981 if (id >= opt->queue_pairs * 2) 982 break; 983 984 qp_id = id / 2; 985 t->args[lcore_id].qp_id = qp_id; 986 t->args[lcore_id].nb_reqs = nb_reqs; 987 if (qp_id == 0) 988 t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs; 989 990 if (t->args[lcore_id].nb_reqs == 0) { 991 id++; 992 break; 993 } 994 995 t->args[lcore_id].start_fid = start_fid; 996 t->args[lcore_id].end_fid = end_fid; 997 998 if (id % 2 == 0) 999 rte_eal_remote_launch(t->enqueue, test, lcore_id); 1000 else 1001 rte_eal_remote_launch(t->dequeue, test, lcore_id); 1002 1003 id++; 1004 } 1005 1006 return 0; 1007 } 1008