1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include <errno.h> 6 #include <unistd.h> 7 8 #include <rte_common.h> 9 #include <rte_cycles.h> 10 #include <rte_hash_crc.h> 11 #include <rte_launch.h> 12 #include <rte_lcore.h> 13 #include <rte_malloc.h> 14 #include <rte_memzone.h> 15 #include <rte_mldev.h> 16 17 #include "ml_common.h" 18 #include "test_inference_common.h" 19 20 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer)) 21 22 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance) \ 23 (((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0)) 24 25 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \ 26 do { \ 27 FILE *fp = fopen(name, "w+"); \ 28 if (fp == NULL) { \ 29 ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \ 30 err = true; \ 31 } else { \ 32 if (fwrite(buffer, 1, size, fp) != size) { \ 33 ml_err("Error writing output, file: %s, error: %s", name, \ 34 strerror(errno)); \ 35 err = true; \ 36 } \ 37 fclose(fp); \ 38 } \ 39 } while (0) 40 41 static void 42 print_line(uint16_t len) 43 { 44 uint16_t i; 45 46 for (i = 0; i < len; i++) 47 printf("-"); 48 49 printf("\n"); 50 } 51 52 /* Enqueue inference requests with burst size equal to 1 */ 53 static int 54 ml_enqueue_single(void *arg) 55 { 56 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 57 struct ml_request *req = NULL; 58 struct rte_ml_op *op = NULL; 59 struct ml_core_args *args; 60 uint64_t model_enq = 0; 61 uint64_t start_cycle; 62 uint32_t burst_enq; 63 uint32_t lcore_id; 64 uint16_t fid; 65 int ret; 66 67 lcore_id = rte_lcore_id(); 68 args = &t->args[lcore_id]; 69 args->start_cycles = 0; 70 model_enq = 0; 71 72 if (args->nb_reqs == 0) 73 return 0; 74 75 next_rep: 76 fid = args->start_fid; 77 78 next_model: 79 ret = rte_mempool_get(t->op_pool, (void **)&op); 80 if (ret != 0) 81 goto next_model; 82 83 retry: 84 ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req); 85 if (ret != 0) 86 goto retry; 87 88 op->model_id = t->model[fid].id; 89 op->nb_batches = t->model[fid].nb_batches; 90 op->mempool = t->op_pool; 91 92 op->input.addr = req->input; 93 op->input.length = t->model[fid].inp_qsize; 94 op->input.next = NULL; 95 96 op->output.addr = req->output; 97 op->output.length = t->model[fid].out_qsize; 98 op->output.next = NULL; 99 100 op->user_ptr = req; 101 req->niters++; 102 req->fid = fid; 103 104 enqueue_req: 105 start_cycle = rte_get_tsc_cycles(); 106 burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 107 if (burst_enq == 0) 108 goto enqueue_req; 109 110 args->start_cycles += start_cycle; 111 fid++; 112 if (likely(fid <= args->end_fid)) 113 goto next_model; 114 115 model_enq++; 116 if (likely(model_enq < args->nb_reqs)) 117 goto next_rep; 118 119 return 0; 120 } 121 122 /* Dequeue inference requests with burst size equal to 1 */ 123 static int 124 ml_dequeue_single(void *arg) 125 { 126 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 127 struct rte_ml_op_error error; 128 struct rte_ml_op *op = NULL; 129 struct ml_core_args *args; 130 struct ml_request *req; 131 uint64_t total_deq = 0; 132 uint8_t nb_filelist; 133 uint32_t burst_deq; 134 uint64_t end_cycle; 135 uint32_t lcore_id; 136 137 lcore_id = rte_lcore_id(); 138 args = &t->args[lcore_id]; 139 args->end_cycles = 0; 140 nb_filelist = args->end_fid - args->start_fid + 1; 141 142 if (args->nb_reqs == 0) 143 return 0; 144 145 dequeue_req: 146 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 147 end_cycle = rte_get_tsc_cycles(); 148 149 if (likely(burst_deq == 1)) { 150 total_deq += burst_deq; 151 args->end_cycles += end_cycle; 152 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) { 153 rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error); 154 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode, 155 error.message); 156 t->error_count[lcore_id]++; 157 } 158 req = (struct ml_request *)op->user_ptr; 159 rte_mempool_put(t->model[req->fid].io_pool, req); 160 rte_mempool_put(t->op_pool, op); 161 } 162 163 if (likely(total_deq < args->nb_reqs * nb_filelist)) 164 goto dequeue_req; 165 166 return 0; 167 } 168 169 /* Enqueue inference requests with burst size greater than 1 */ 170 static int 171 ml_enqueue_burst(void *arg) 172 { 173 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 174 struct ml_core_args *args; 175 uint64_t start_cycle; 176 uint16_t ops_count; 177 uint64_t model_enq; 178 uint16_t burst_enq; 179 uint32_t lcore_id; 180 uint16_t pending; 181 uint16_t idx; 182 uint16_t fid; 183 uint16_t i; 184 int ret; 185 186 lcore_id = rte_lcore_id(); 187 args = &t->args[lcore_id]; 188 args->start_cycles = 0; 189 model_enq = 0; 190 191 if (args->nb_reqs == 0) 192 return 0; 193 194 next_rep: 195 fid = args->start_fid; 196 197 next_model: 198 ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq); 199 ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count); 200 if (ret != 0) 201 goto next_model; 202 203 retry: 204 ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count); 205 if (ret != 0) 206 goto retry; 207 208 for (i = 0; i < ops_count; i++) { 209 args->enq_ops[i]->model_id = t->model[fid].id; 210 args->enq_ops[i]->nb_batches = t->model[fid].nb_batches; 211 args->enq_ops[i]->mempool = t->op_pool; 212 213 args->enq_ops[i]->input.addr = args->reqs[i]->input; 214 args->enq_ops[i]->input.length = t->model[fid].inp_qsize; 215 args->enq_ops[i]->input.next = NULL; 216 217 args->enq_ops[i]->output.addr = args->reqs[i]->output; 218 args->enq_ops[i]->output.length = t->model[fid].out_qsize; 219 args->enq_ops[i]->output.next = NULL; 220 221 args->enq_ops[i]->user_ptr = args->reqs[i]; 222 args->reqs[i]->niters++; 223 args->reqs[i]->fid = fid; 224 } 225 226 idx = 0; 227 pending = ops_count; 228 229 enqueue_reqs: 230 start_cycle = rte_get_tsc_cycles(); 231 burst_enq = 232 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending); 233 args->start_cycles += burst_enq * start_cycle; 234 pending = pending - burst_enq; 235 236 if (pending > 0) { 237 idx = idx + burst_enq; 238 goto enqueue_reqs; 239 } 240 241 fid++; 242 if (fid <= args->end_fid) 243 goto next_model; 244 245 model_enq = model_enq + ops_count; 246 if (model_enq < args->nb_reqs) 247 goto next_rep; 248 249 return 0; 250 } 251 252 /* Dequeue inference requests with burst size greater than 1 */ 253 static int 254 ml_dequeue_burst(void *arg) 255 { 256 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 257 struct rte_ml_op_error error; 258 struct ml_core_args *args; 259 struct ml_request *req; 260 uint64_t total_deq = 0; 261 uint16_t burst_deq = 0; 262 uint8_t nb_filelist; 263 uint64_t end_cycle; 264 uint32_t lcore_id; 265 uint32_t i; 266 267 lcore_id = rte_lcore_id(); 268 args = &t->args[lcore_id]; 269 args->end_cycles = 0; 270 nb_filelist = args->end_fid - args->start_fid + 1; 271 272 if (args->nb_reqs == 0) 273 return 0; 274 275 dequeue_burst: 276 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops, 277 t->cmn.opt->burst_size); 278 end_cycle = rte_get_tsc_cycles(); 279 280 if (likely(burst_deq > 0)) { 281 total_deq += burst_deq; 282 args->end_cycles += burst_deq * end_cycle; 283 284 for (i = 0; i < burst_deq; i++) { 285 if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) { 286 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error); 287 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", 288 error.errcode, error.message); 289 t->error_count[lcore_id]++; 290 } 291 req = (struct ml_request *)args->deq_ops[i]->user_ptr; 292 if (req != NULL) 293 rte_mempool_put(t->model[req->fid].io_pool, req); 294 } 295 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq); 296 } 297 298 if (total_deq < args->nb_reqs * nb_filelist) 299 goto dequeue_burst; 300 301 return 0; 302 } 303 304 bool 305 test_inference_cap_check(struct ml_options *opt) 306 { 307 struct rte_ml_dev_info dev_info; 308 309 if (!ml_test_cap_check(opt)) 310 return false; 311 312 rte_ml_dev_info_get(opt->dev_id, &dev_info); 313 314 if (opt->queue_pairs > dev_info.max_queue_pairs) { 315 ml_err("Insufficient capabilities: queue_pairs = %u, max_queue_pairs = %u", 316 opt->queue_pairs, dev_info.max_queue_pairs); 317 return false; 318 } 319 320 if (opt->queue_size > dev_info.max_desc) { 321 ml_err("Insufficient capabilities: queue_size = %u, max_desc = %u", opt->queue_size, 322 dev_info.max_desc); 323 return false; 324 } 325 326 if (opt->nb_filelist > dev_info.max_models) { 327 ml_err("Insufficient capabilities: Filelist count exceeded device limit, count = %u (max limit = %u)", 328 opt->nb_filelist, dev_info.max_models); 329 return false; 330 } 331 332 return true; 333 } 334 335 int 336 test_inference_opt_check(struct ml_options *opt) 337 { 338 uint32_t i; 339 int ret; 340 341 /* check common opts */ 342 ret = ml_test_opt_check(opt); 343 if (ret != 0) 344 return ret; 345 346 /* check file availability */ 347 for (i = 0; i < opt->nb_filelist; i++) { 348 if (access(opt->filelist[i].model, F_OK) == -1) { 349 ml_err("Model file not accessible: id = %u, file = %s", i, 350 opt->filelist[i].model); 351 return -ENOENT; 352 } 353 354 if (access(opt->filelist[i].input, F_OK) == -1) { 355 ml_err("Input file not accessible: id = %u, file = %s", i, 356 opt->filelist[i].input); 357 return -ENOENT; 358 } 359 } 360 361 if (opt->repetitions == 0) { 362 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions); 363 return -EINVAL; 364 } 365 366 if (opt->burst_size == 0) { 367 ml_err("Invalid option, burst_size = %u\n", opt->burst_size); 368 return -EINVAL; 369 } 370 371 if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) { 372 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size, 373 ML_TEST_MAX_POOL_SIZE); 374 return -EINVAL; 375 } 376 377 if (opt->queue_pairs == 0) { 378 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs); 379 return -EINVAL; 380 } 381 382 if (opt->queue_size == 0) { 383 ml_err("Invalid option, queue_size = %u\n", opt->queue_size); 384 return -EINVAL; 385 } 386 387 /* check number of available lcores. */ 388 if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) { 389 ml_err("Insufficient lcores = %u\n", rte_lcore_count()); 390 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs, 391 (opt->queue_pairs * 2 + 1)); 392 return -EINVAL; 393 } 394 395 return 0; 396 } 397 398 void 399 test_inference_opt_dump(struct ml_options *opt) 400 { 401 uint32_t i; 402 403 /* dump common opts */ 404 ml_test_opt_dump(opt); 405 406 /* dump test opts */ 407 ml_dump("repetitions", "%" PRIu64, opt->repetitions); 408 ml_dump("burst_size", "%u", opt->burst_size); 409 ml_dump("queue_pairs", "%u", opt->queue_pairs); 410 ml_dump("queue_size", "%u", opt->queue_size); 411 ml_dump("tolerance", "%-7.3f", opt->tolerance); 412 ml_dump("stats", "%s", (opt->stats ? "true" : "false")); 413 414 if (opt->batches == 0) 415 ml_dump("batches", "%u (default)", opt->batches); 416 else 417 ml_dump("batches", "%u", opt->batches); 418 419 ml_dump_begin("filelist"); 420 for (i = 0; i < opt->nb_filelist; i++) { 421 ml_dump_list("model", i, opt->filelist[i].model); 422 ml_dump_list("input", i, opt->filelist[i].input); 423 ml_dump_list("output", i, opt->filelist[i].output); 424 if (strcmp(opt->filelist[i].reference, "\0") != 0) 425 ml_dump_list("reference", i, opt->filelist[i].reference); 426 } 427 ml_dump_end; 428 } 429 430 int 431 test_inference_setup(struct ml_test *test, struct ml_options *opt) 432 { 433 struct test_inference *t; 434 void *test_inference; 435 uint32_t lcore_id; 436 int ret = 0; 437 uint32_t i; 438 439 test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference), 440 RTE_CACHE_LINE_SIZE, opt->socket_id); 441 if (test_inference == NULL) { 442 ml_err("failed to allocate memory for test_model"); 443 ret = -ENOMEM; 444 goto error; 445 } 446 test->test_priv = test_inference; 447 t = ml_test_priv(test); 448 449 t->nb_used = 0; 450 t->nb_valid = 0; 451 t->cmn.result = ML_TEST_FAILED; 452 t->cmn.opt = opt; 453 memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t)); 454 455 /* get device info */ 456 ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info); 457 if (ret < 0) { 458 ml_err("failed to get device info"); 459 goto error; 460 } 461 462 if (opt->burst_size == 1) { 463 t->enqueue = ml_enqueue_single; 464 t->dequeue = ml_dequeue_single; 465 } else { 466 t->enqueue = ml_enqueue_burst; 467 t->dequeue = ml_dequeue_burst; 468 } 469 470 /* set model initial state */ 471 for (i = 0; i < opt->nb_filelist; i++) 472 t->model[i].state = MODEL_INITIAL; 473 474 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 475 t->args[lcore_id].enq_ops = rte_zmalloc_socket( 476 "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 477 RTE_CACHE_LINE_SIZE, opt->socket_id); 478 t->args[lcore_id].deq_ops = rte_zmalloc_socket( 479 "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 480 RTE_CACHE_LINE_SIZE, opt->socket_id); 481 t->args[lcore_id].reqs = rte_zmalloc_socket( 482 "ml_test_requests", opt->burst_size * sizeof(struct ml_request *), 483 RTE_CACHE_LINE_SIZE, opt->socket_id); 484 } 485 486 for (i = 0; i < RTE_MAX_LCORE; i++) { 487 t->args[i].start_cycles = 0; 488 t->args[i].end_cycles = 0; 489 } 490 491 return 0; 492 493 error: 494 if (test_inference != NULL) 495 rte_free(test_inference); 496 497 return ret; 498 } 499 500 void 501 test_inference_destroy(struct ml_test *test, struct ml_options *opt) 502 { 503 struct test_inference *t; 504 505 RTE_SET_USED(opt); 506 507 t = ml_test_priv(test); 508 if (t != NULL) 509 rte_free(t); 510 } 511 512 int 513 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt) 514 { 515 struct rte_ml_dev_qp_conf qp_conf; 516 struct test_inference *t; 517 uint16_t qp_id; 518 int ret; 519 520 t = ml_test_priv(test); 521 522 RTE_SET_USED(t); 523 524 ret = ml_test_device_configure(test, opt); 525 if (ret != 0) 526 return ret; 527 528 /* setup queue pairs */ 529 qp_conf.nb_desc = opt->queue_size; 530 qp_conf.cb = NULL; 531 532 for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) { 533 qp_conf.nb_desc = opt->queue_size; 534 qp_conf.cb = NULL; 535 536 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id); 537 if (ret != 0) { 538 ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n", 539 opt->dev_id, qp_id); 540 return ret; 541 } 542 } 543 544 ret = ml_test_device_start(test, opt); 545 if (ret != 0) 546 goto error; 547 548 return 0; 549 550 error: 551 ml_test_device_close(test, opt); 552 553 return ret; 554 } 555 556 int 557 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt) 558 { 559 int ret; 560 561 ret = ml_test_device_stop(test, opt); 562 if (ret != 0) 563 goto error; 564 565 ret = ml_test_device_close(test, opt); 566 if (ret != 0) 567 return ret; 568 569 return 0; 570 571 error: 572 ml_test_device_close(test, opt); 573 574 return ret; 575 } 576 577 /* Callback for IO pool create. This function would compute the fields of ml_request 578 * structure and prepare the quantized input data. 579 */ 580 static void 581 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 582 { 583 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 584 struct ml_request *req = (struct ml_request *)obj; 585 586 RTE_SET_USED(mp); 587 RTE_SET_USED(obj_idx); 588 589 req->input = (uint8_t *)obj + 590 RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size); 591 req->output = req->input + 592 RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size); 593 req->niters = 0; 594 595 /* quantize data */ 596 rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches, 597 t->model[t->fid].input, req->input); 598 } 599 600 int 601 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid) 602 { 603 struct test_inference *t = ml_test_priv(test); 604 char mz_name[RTE_MEMZONE_NAMESIZE]; 605 char mp_name[RTE_MEMPOOL_NAMESIZE]; 606 const struct rte_memzone *mz; 607 uint64_t nb_buffers; 608 uint32_t buff_size; 609 uint32_t mz_size; 610 uint32_t fsize; 611 FILE *fp; 612 int ret; 613 614 /* get input buffer size */ 615 ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 616 &t->model[fid].inp_qsize, &t->model[fid].inp_dsize); 617 if (ret != 0) { 618 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 619 return ret; 620 } 621 622 /* get output buffer size */ 623 ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 624 &t->model[fid].out_qsize, &t->model[fid].out_dsize); 625 if (ret != 0) { 626 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 627 return ret; 628 } 629 630 /* allocate buffer for user data */ 631 mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize; 632 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 633 mz_size += t->model[fid].out_dsize; 634 635 sprintf(mz_name, "ml_user_data_%d", fid); 636 mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0); 637 if (mz == NULL) { 638 ml_err("Memzone allocation failed for ml_user_data\n"); 639 ret = -ENOMEM; 640 goto error; 641 } 642 643 t->model[fid].input = mz->addr; 644 t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize; 645 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 646 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize; 647 else 648 t->model[fid].reference = NULL; 649 650 /* load input file */ 651 fp = fopen(opt->filelist[fid].input, "r"); 652 if (fp == NULL) { 653 ml_err("Failed to open input file : %s\n", opt->filelist[fid].input); 654 ret = -errno; 655 goto error; 656 } 657 658 fseek(fp, 0, SEEK_END); 659 fsize = ftell(fp); 660 fseek(fp, 0, SEEK_SET); 661 if (fsize != t->model[fid].inp_dsize) { 662 ml_err("Invalid input file, size = %u (expected size = %" PRIu64 ")\n", fsize, 663 t->model[fid].inp_dsize); 664 ret = -EINVAL; 665 fclose(fp); 666 goto error; 667 } 668 669 if (fread(t->model[fid].input, 1, t->model[fid].inp_dsize, fp) != t->model[fid].inp_dsize) { 670 ml_err("Failed to read input file : %s\n", opt->filelist[fid].input); 671 ret = -errno; 672 fclose(fp); 673 goto error; 674 } 675 fclose(fp); 676 677 /* load reference file */ 678 if (t->model[fid].reference != NULL) { 679 fp = fopen(opt->filelist[fid].reference, "r"); 680 if (fp == NULL) { 681 ml_err("Failed to open reference file : %s\n", 682 opt->filelist[fid].reference); 683 ret = -errno; 684 goto error; 685 } 686 687 if (fread(t->model[fid].reference, 1, t->model[fid].out_dsize, fp) != 688 t->model[fid].out_dsize) { 689 ml_err("Failed to read reference file : %s\n", 690 opt->filelist[fid].reference); 691 ret = -errno; 692 fclose(fp); 693 goto error; 694 } 695 fclose(fp); 696 } 697 698 /* create mempool for quantized input and output buffers. ml_request_initialize is 699 * used as a callback for object creation. 700 */ 701 buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) + 702 RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) + 703 RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size); 704 nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions); 705 706 t->fid = fid; 707 sprintf(mp_name, "ml_io_pool_%d", fid); 708 t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL, 709 ml_request_initialize, test, opt->socket_id, 0); 710 if (t->model[fid].io_pool == NULL) { 711 ml_err("Failed to create io pool : %s\n", "ml_io_pool"); 712 ret = -ENOMEM; 713 goto error; 714 } 715 716 return 0; 717 718 error: 719 if (mz != NULL) 720 rte_memzone_free(mz); 721 722 if (t->model[fid].io_pool != NULL) { 723 rte_mempool_free(t->model[fid].io_pool); 724 t->model[fid].io_pool = NULL; 725 } 726 727 return ret; 728 } 729 730 void 731 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid) 732 { 733 char mz_name[RTE_MEMZONE_NAMESIZE]; 734 char mp_name[RTE_MEMPOOL_NAMESIZE]; 735 const struct rte_memzone *mz; 736 struct rte_mempool *mp; 737 738 RTE_SET_USED(test); 739 RTE_SET_USED(opt); 740 741 /* release user data memzone */ 742 sprintf(mz_name, "ml_user_data_%d", fid); 743 mz = rte_memzone_lookup(mz_name); 744 if (mz != NULL) 745 rte_memzone_free(mz); 746 747 /* destroy io pool */ 748 sprintf(mp_name, "ml_io_pool_%d", fid); 749 mp = rte_mempool_lookup(mp_name); 750 if (mp != NULL) 751 rte_mempool_free(mp); 752 } 753 754 int 755 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt) 756 { 757 struct test_inference *t = ml_test_priv(test); 758 759 /* create op pool */ 760 t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0, 761 opt->socket_id); 762 if (t->op_pool == NULL) { 763 ml_err("Failed to create op pool : %s\n", "ml_op_pool"); 764 return -ENOMEM; 765 } 766 767 return 0; 768 } 769 770 void 771 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt) 772 { 773 struct test_inference *t = ml_test_priv(test); 774 775 RTE_SET_USED(opt); 776 777 /* release op pool */ 778 if (t->op_pool != NULL) 779 rte_mempool_free(t->op_pool); 780 } 781 782 static bool 783 ml_inference_validation(struct ml_test *test, struct ml_request *req) 784 { 785 struct test_inference *t = ml_test_priv((struct ml_test *)test); 786 struct ml_model *model; 787 uint32_t nb_elements; 788 uint8_t *reference; 789 uint8_t *output; 790 bool match; 791 uint32_t i; 792 uint32_t j; 793 794 model = &t->model[req->fid]; 795 796 /* compare crc when tolerance is 0 */ 797 if (t->cmn.opt->tolerance == 0.0) { 798 match = (rte_hash_crc(model->output, model->out_dsize, 0) == 799 rte_hash_crc(model->reference, model->out_dsize, 0)); 800 } else { 801 output = model->output; 802 reference = model->reference; 803 804 i = 0; 805 next_output: 806 nb_elements = 807 model->info.output_info[i].shape.w * model->info.output_info[i].shape.x * 808 model->info.output_info[i].shape.y * model->info.output_info[i].shape.z; 809 j = 0; 810 next_element: 811 match = false; 812 switch (model->info.output_info[i].dtype) { 813 case RTE_ML_IO_TYPE_INT8: 814 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t), 815 ML_TEST_READ_TYPE(reference, int8_t), 816 t->cmn.opt->tolerance)) 817 match = true; 818 819 output += sizeof(int8_t); 820 reference += sizeof(int8_t); 821 break; 822 case RTE_ML_IO_TYPE_UINT8: 823 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t), 824 ML_TEST_READ_TYPE(reference, uint8_t), 825 t->cmn.opt->tolerance)) 826 match = true; 827 828 output += sizeof(float); 829 reference += sizeof(float); 830 break; 831 case RTE_ML_IO_TYPE_INT16: 832 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t), 833 ML_TEST_READ_TYPE(reference, int16_t), 834 t->cmn.opt->tolerance)) 835 match = true; 836 837 output += sizeof(int16_t); 838 reference += sizeof(int16_t); 839 break; 840 case RTE_ML_IO_TYPE_UINT16: 841 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t), 842 ML_TEST_READ_TYPE(reference, uint16_t), 843 t->cmn.opt->tolerance)) 844 match = true; 845 846 output += sizeof(uint16_t); 847 reference += sizeof(uint16_t); 848 break; 849 case RTE_ML_IO_TYPE_INT32: 850 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t), 851 ML_TEST_READ_TYPE(reference, int32_t), 852 t->cmn.opt->tolerance)) 853 match = true; 854 855 output += sizeof(int32_t); 856 reference += sizeof(int32_t); 857 break; 858 case RTE_ML_IO_TYPE_UINT32: 859 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t), 860 ML_TEST_READ_TYPE(reference, uint32_t), 861 t->cmn.opt->tolerance)) 862 match = true; 863 864 output += sizeof(uint32_t); 865 reference += sizeof(uint32_t); 866 break; 867 case RTE_ML_IO_TYPE_FP32: 868 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float), 869 ML_TEST_READ_TYPE(reference, float), 870 t->cmn.opt->tolerance)) 871 match = true; 872 873 output += sizeof(float); 874 reference += sizeof(float); 875 break; 876 default: /* other types, fp8, fp16, bfloat16 */ 877 match = true; 878 } 879 880 if (!match) 881 goto done; 882 j++; 883 if (j < nb_elements) 884 goto next_element; 885 886 i++; 887 if (i < model->info.nb_outputs) 888 goto next_output; 889 } 890 done: 891 if (match) 892 t->nb_valid++; 893 894 return match; 895 } 896 897 /* Callback for mempool object iteration. This call would dequantize output data. */ 898 static void 899 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 900 { 901 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 902 struct ml_request *req = (struct ml_request *)obj; 903 struct ml_model *model = &t->model[req->fid]; 904 char str[PATH_MAX]; 905 bool error = false; 906 907 RTE_SET_USED(mp); 908 909 if (req->niters == 0) 910 return; 911 912 t->nb_used++; 913 rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches, 914 req->output, model->output); 915 916 if (model->reference == NULL) { 917 t->nb_valid++; 918 goto dump_output_pass; 919 } 920 921 if (!ml_inference_validation(opaque, req)) 922 goto dump_output_fail; 923 else 924 goto dump_output_pass; 925 926 dump_output_pass: 927 if (obj_idx == 0) { 928 /* write quantized output */ 929 snprintf(str, PATH_MAX, "%s.q", t->cmn.opt->filelist[req->fid].output); 930 ML_OPEN_WRITE_GET_ERR(str, req->output, model->out_qsize, error); 931 if (error) 932 return; 933 934 /* write dequantized output */ 935 snprintf(str, PATH_MAX, "%s", t->cmn.opt->filelist[req->fid].output); 936 ML_OPEN_WRITE_GET_ERR(str, model->output, model->out_dsize, error); 937 if (error) 938 return; 939 } 940 941 return; 942 943 dump_output_fail: 944 if (t->cmn.opt->debug) { 945 /* dump quantized output buffer */ 946 snprintf(str, PATH_MAX, "%s.q.%d", t->cmn.opt->filelist[req->fid].output, obj_idx); 947 ML_OPEN_WRITE_GET_ERR(str, req->output, model->out_qsize, error); 948 if (error) 949 return; 950 951 /* dump dequantized output buffer */ 952 snprintf(str, PATH_MAX, "%s.%d", t->cmn.opt->filelist[req->fid].output, obj_idx); 953 ML_OPEN_WRITE_GET_ERR(str, model->output, model->out_dsize, error); 954 if (error) 955 return; 956 } 957 } 958 959 int 960 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid) 961 { 962 struct test_inference *t = ml_test_priv(test); 963 uint64_t error_count = 0; 964 uint32_t i; 965 966 RTE_SET_USED(opt); 967 968 /* check for errors */ 969 for (i = 0; i < RTE_MAX_LCORE; i++) 970 error_count += t->error_count[i]; 971 972 rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test); 973 974 if ((t->nb_used == t->nb_valid) && (error_count == 0)) 975 t->cmn.result = ML_TEST_SUCCESS; 976 else 977 t->cmn.result = ML_TEST_FAILED; 978 979 return t->cmn.result; 980 } 981 982 int 983 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid, 984 uint16_t end_fid) 985 { 986 struct test_inference *t = ml_test_priv(test); 987 uint32_t lcore_id; 988 uint32_t nb_reqs; 989 uint32_t id = 0; 990 uint32_t qp_id; 991 992 nb_reqs = opt->repetitions / opt->queue_pairs; 993 994 RTE_LCORE_FOREACH_WORKER(lcore_id) 995 { 996 if (id >= opt->queue_pairs * 2) 997 break; 998 999 qp_id = id / 2; 1000 t->args[lcore_id].qp_id = qp_id; 1001 t->args[lcore_id].nb_reqs = nb_reqs; 1002 if (qp_id == 0) 1003 t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs; 1004 1005 if (t->args[lcore_id].nb_reqs == 0) { 1006 id++; 1007 break; 1008 } 1009 1010 t->args[lcore_id].start_fid = start_fid; 1011 t->args[lcore_id].end_fid = end_fid; 1012 1013 if (id % 2 == 0) 1014 rte_eal_remote_launch(t->enqueue, test, lcore_id); 1015 else 1016 rte_eal_remote_launch(t->dequeue, test, lcore_id); 1017 1018 id++; 1019 } 1020 1021 return 0; 1022 } 1023 1024 int 1025 ml_inference_stats_get(struct ml_test *test, struct ml_options *opt) 1026 { 1027 struct test_inference *t = ml_test_priv(test); 1028 uint64_t total_cycles = 0; 1029 uint32_t nb_filelist; 1030 uint64_t throughput; 1031 uint64_t avg_e2e; 1032 uint32_t qp_id; 1033 uint64_t freq; 1034 int ret; 1035 int i; 1036 1037 if (!opt->stats) 1038 return 0; 1039 1040 /* get xstats size */ 1041 t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0); 1042 if (t->xstats_size >= 0) { 1043 /* allocate for xstats_map and values */ 1044 t->xstats_map = rte_malloc( 1045 "ml_xstats_map", t->xstats_size * sizeof(struct rte_ml_dev_xstats_map), 0); 1046 if (t->xstats_map == NULL) { 1047 ret = -ENOMEM; 1048 goto error; 1049 } 1050 1051 t->xstats_values = 1052 rte_malloc("ml_xstats_values", t->xstats_size * sizeof(uint64_t), 0); 1053 if (t->xstats_values == NULL) { 1054 ret = -ENOMEM; 1055 goto error; 1056 } 1057 1058 ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, t->xstats_size); 1059 if (ret != t->xstats_size) { 1060 printf("Unable to get xstats names, ret = %d\n", ret); 1061 ret = -1; 1062 goto error; 1063 } 1064 1065 for (i = 0; i < t->xstats_size; i++) 1066 rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id, 1067 &t->xstats_values[i], 1); 1068 } 1069 1070 /* print xstats*/ 1071 printf("\n"); 1072 print_line(80); 1073 printf(" ML Device Extended Statistics\n"); 1074 print_line(80); 1075 for (i = 0; i < t->xstats_size; i++) 1076 printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, t->xstats_values[i]); 1077 print_line(80); 1078 1079 /* release buffers */ 1080 if (t->xstats_map) 1081 rte_free(t->xstats_map); 1082 1083 if (t->xstats_values) 1084 rte_free(t->xstats_values); 1085 1086 /* print end-to-end stats */ 1087 freq = rte_get_tsc_hz(); 1088 for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++) 1089 total_cycles += t->args[qp_id].end_cycles - t->args[qp_id].start_cycles; 1090 avg_e2e = total_cycles / opt->repetitions; 1091 1092 if (freq == 0) { 1093 avg_e2e = total_cycles / opt->repetitions; 1094 printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (cycles)", avg_e2e); 1095 } else { 1096 avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq); 1097 printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (ns)", avg_e2e); 1098 } 1099 1100 /* print inference throughput */ 1101 if (strcmp(opt->test_name, "inference_ordered") == 0) 1102 nb_filelist = 1; 1103 else 1104 nb_filelist = opt->nb_filelist; 1105 1106 if (freq == 0) { 1107 throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) / total_cycles; 1108 printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / million cycles)", 1109 throughput); 1110 } else { 1111 throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / total_cycles; 1112 printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / second)", 1113 throughput); 1114 } 1115 1116 print_line(80); 1117 1118 return 0; 1119 1120 error: 1121 if (t->xstats_map) 1122 rte_free(t->xstats_map); 1123 1124 if (t->xstats_values) 1125 rte_free(t->xstats_values); 1126 1127 return ret; 1128 } 1129