1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include <errno.h> 6 #include <stdio.h> 7 #include <unistd.h> 8 9 #include <rte_common.h> 10 #include <rte_cycles.h> 11 #include <rte_hash_crc.h> 12 #include <rte_launch.h> 13 #include <rte_lcore.h> 14 #include <rte_malloc.h> 15 #include <rte_memzone.h> 16 #include <rte_mldev.h> 17 18 #include "ml_common.h" 19 #include "test_inference_common.h" 20 21 #define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer)) 22 23 #define ML_TEST_CHECK_OUTPUT(output, reference, tolerance) \ 24 (((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0)) 25 26 #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \ 27 do { \ 28 FILE *fp = fopen(name, "w+"); \ 29 if (fp == NULL) { \ 30 ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \ 31 err = true; \ 32 } else { \ 33 if (fwrite(buffer, 1, size, fp) != size) { \ 34 ml_err("Error writing output, file: %s, error: %s", name, \ 35 strerror(errno)); \ 36 err = true; \ 37 } \ 38 fclose(fp); \ 39 } \ 40 } while (0) 41 42 static void 43 print_line(uint16_t len) 44 { 45 uint16_t i; 46 47 for (i = 0; i < len; i++) 48 printf("-"); 49 50 printf("\n"); 51 } 52 53 /* Enqueue inference requests with burst size equal to 1 */ 54 static int 55 ml_enqueue_single(void *arg) 56 { 57 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 58 struct ml_request *req = NULL; 59 struct rte_ml_op *op = NULL; 60 struct ml_core_args *args; 61 uint64_t model_enq = 0; 62 uint64_t start_cycle; 63 uint32_t burst_enq; 64 uint32_t lcore_id; 65 uint16_t fid; 66 int ret; 67 68 lcore_id = rte_lcore_id(); 69 args = &t->args[lcore_id]; 70 args->start_cycles = 0; 71 model_enq = 0; 72 73 if (args->nb_reqs == 0) 74 return 0; 75 76 next_rep: 77 fid = args->start_fid; 78 79 next_model: 80 ret = rte_mempool_get(t->op_pool, (void **)&op); 81 if (ret != 0) 82 goto next_model; 83 84 retry: 85 ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req); 86 if (ret != 0) 87 goto retry; 88 89 op->model_id = t->model[fid].id; 90 op->nb_batches = t->model[fid].nb_batches; 91 op->mempool = t->op_pool; 92 93 op->input.addr = req->input; 94 op->input.length = t->model[fid].inp_qsize; 95 op->input.next = NULL; 96 97 op->output.addr = req->output; 98 op->output.length = t->model[fid].out_qsize; 99 op->output.next = NULL; 100 101 op->user_ptr = req; 102 req->niters++; 103 req->fid = fid; 104 105 enqueue_req: 106 start_cycle = rte_get_tsc_cycles(); 107 burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 108 if (burst_enq == 0) 109 goto enqueue_req; 110 111 args->start_cycles += start_cycle; 112 fid++; 113 if (likely(fid <= args->end_fid)) 114 goto next_model; 115 116 model_enq++; 117 if (likely(model_enq < args->nb_reqs)) 118 goto next_rep; 119 120 return 0; 121 } 122 123 /* Dequeue inference requests with burst size equal to 1 */ 124 static int 125 ml_dequeue_single(void *arg) 126 { 127 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 128 struct rte_ml_op_error error; 129 struct rte_ml_op *op = NULL; 130 struct ml_core_args *args; 131 struct ml_request *req; 132 uint64_t total_deq = 0; 133 uint8_t nb_filelist; 134 uint32_t burst_deq; 135 uint64_t end_cycle; 136 uint32_t lcore_id; 137 138 lcore_id = rte_lcore_id(); 139 args = &t->args[lcore_id]; 140 args->end_cycles = 0; 141 nb_filelist = args->end_fid - args->start_fid + 1; 142 143 if (args->nb_reqs == 0) 144 return 0; 145 146 dequeue_req: 147 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1); 148 end_cycle = rte_get_tsc_cycles(); 149 150 if (likely(burst_deq == 1)) { 151 total_deq += burst_deq; 152 args->end_cycles += end_cycle; 153 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) { 154 rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error); 155 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode, 156 error.message); 157 t->error_count[lcore_id]++; 158 } 159 req = (struct ml_request *)op->user_ptr; 160 rte_mempool_put(t->model[req->fid].io_pool, req); 161 rte_mempool_put(t->op_pool, op); 162 } 163 164 if (likely(total_deq < args->nb_reqs * nb_filelist)) 165 goto dequeue_req; 166 167 return 0; 168 } 169 170 /* Enqueue inference requests with burst size greater than 1 */ 171 static int 172 ml_enqueue_burst(void *arg) 173 { 174 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 175 struct ml_core_args *args; 176 uint64_t start_cycle; 177 uint16_t ops_count; 178 uint64_t model_enq; 179 uint16_t burst_enq; 180 uint32_t lcore_id; 181 uint16_t pending; 182 uint16_t idx; 183 uint16_t fid; 184 uint16_t i; 185 int ret; 186 187 lcore_id = rte_lcore_id(); 188 args = &t->args[lcore_id]; 189 args->start_cycles = 0; 190 model_enq = 0; 191 192 if (args->nb_reqs == 0) 193 return 0; 194 195 next_rep: 196 fid = args->start_fid; 197 198 next_model: 199 ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq); 200 ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count); 201 if (ret != 0) 202 goto next_model; 203 204 retry: 205 ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count); 206 if (ret != 0) 207 goto retry; 208 209 for (i = 0; i < ops_count; i++) { 210 args->enq_ops[i]->model_id = t->model[fid].id; 211 args->enq_ops[i]->nb_batches = t->model[fid].nb_batches; 212 args->enq_ops[i]->mempool = t->op_pool; 213 214 args->enq_ops[i]->input.addr = args->reqs[i]->input; 215 args->enq_ops[i]->input.length = t->model[fid].inp_qsize; 216 args->enq_ops[i]->input.next = NULL; 217 218 args->enq_ops[i]->output.addr = args->reqs[i]->output; 219 args->enq_ops[i]->output.length = t->model[fid].out_qsize; 220 args->enq_ops[i]->output.next = NULL; 221 222 args->enq_ops[i]->user_ptr = args->reqs[i]; 223 args->reqs[i]->niters++; 224 args->reqs[i]->fid = fid; 225 } 226 227 idx = 0; 228 pending = ops_count; 229 230 enqueue_reqs: 231 start_cycle = rte_get_tsc_cycles(); 232 burst_enq = 233 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending); 234 args->start_cycles += burst_enq * start_cycle; 235 pending = pending - burst_enq; 236 237 if (pending > 0) { 238 idx = idx + burst_enq; 239 goto enqueue_reqs; 240 } 241 242 fid++; 243 if (fid <= args->end_fid) 244 goto next_model; 245 246 model_enq = model_enq + ops_count; 247 if (model_enq < args->nb_reqs) 248 goto next_rep; 249 250 return 0; 251 } 252 253 /* Dequeue inference requests with burst size greater than 1 */ 254 static int 255 ml_dequeue_burst(void *arg) 256 { 257 struct test_inference *t = ml_test_priv((struct ml_test *)arg); 258 struct rte_ml_op_error error; 259 struct ml_core_args *args; 260 struct ml_request *req; 261 uint64_t total_deq = 0; 262 uint16_t burst_deq = 0; 263 uint8_t nb_filelist; 264 uint64_t end_cycle; 265 uint32_t lcore_id; 266 uint32_t i; 267 268 lcore_id = rte_lcore_id(); 269 args = &t->args[lcore_id]; 270 args->end_cycles = 0; 271 nb_filelist = args->end_fid - args->start_fid + 1; 272 273 if (args->nb_reqs == 0) 274 return 0; 275 276 dequeue_burst: 277 burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops, 278 t->cmn.opt->burst_size); 279 end_cycle = rte_get_tsc_cycles(); 280 281 if (likely(burst_deq > 0)) { 282 total_deq += burst_deq; 283 args->end_cycles += burst_deq * end_cycle; 284 285 for (i = 0; i < burst_deq; i++) { 286 if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) { 287 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error); 288 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", 289 error.errcode, error.message); 290 t->error_count[lcore_id]++; 291 } 292 req = (struct ml_request *)args->deq_ops[i]->user_ptr; 293 if (req != NULL) 294 rte_mempool_put(t->model[req->fid].io_pool, req); 295 } 296 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq); 297 } 298 299 if (total_deq < args->nb_reqs * nb_filelist) 300 goto dequeue_burst; 301 302 return 0; 303 } 304 305 bool 306 test_inference_cap_check(struct ml_options *opt) 307 { 308 struct rte_ml_dev_info dev_info; 309 310 if (!ml_test_cap_check(opt)) 311 return false; 312 313 rte_ml_dev_info_get(opt->dev_id, &dev_info); 314 315 if (opt->queue_pairs > dev_info.max_queue_pairs) { 316 ml_err("Insufficient capabilities: queue_pairs = %u, max_queue_pairs = %u", 317 opt->queue_pairs, dev_info.max_queue_pairs); 318 return false; 319 } 320 321 if (opt->queue_size > dev_info.max_desc) { 322 ml_err("Insufficient capabilities: queue_size = %u, max_desc = %u", opt->queue_size, 323 dev_info.max_desc); 324 return false; 325 } 326 327 if (opt->nb_filelist > dev_info.max_models) { 328 ml_err("Insufficient capabilities: Filelist count exceeded device limit, count = %u (max limit = %u)", 329 opt->nb_filelist, dev_info.max_models); 330 return false; 331 } 332 333 return true; 334 } 335 336 int 337 test_inference_opt_check(struct ml_options *opt) 338 { 339 uint32_t i; 340 int ret; 341 342 /* check common opts */ 343 ret = ml_test_opt_check(opt); 344 if (ret != 0) 345 return ret; 346 347 /* check file availability */ 348 for (i = 0; i < opt->nb_filelist; i++) { 349 if (access(opt->filelist[i].model, F_OK) == -1) { 350 ml_err("Model file not accessible: id = %u, file = %s", i, 351 opt->filelist[i].model); 352 return -ENOENT; 353 } 354 355 if (access(opt->filelist[i].input, F_OK) == -1) { 356 ml_err("Input file not accessible: id = %u, file = %s", i, 357 opt->filelist[i].input); 358 return -ENOENT; 359 } 360 } 361 362 if (opt->repetitions == 0) { 363 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions); 364 return -EINVAL; 365 } 366 367 if (opt->burst_size == 0) { 368 ml_err("Invalid option, burst_size = %u\n", opt->burst_size); 369 return -EINVAL; 370 } 371 372 if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) { 373 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size, 374 ML_TEST_MAX_POOL_SIZE); 375 return -EINVAL; 376 } 377 378 if (opt->queue_pairs == 0) { 379 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs); 380 return -EINVAL; 381 } 382 383 if (opt->queue_size == 0) { 384 ml_err("Invalid option, queue_size = %u\n", opt->queue_size); 385 return -EINVAL; 386 } 387 388 /* check number of available lcores. */ 389 if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) { 390 ml_err("Insufficient lcores = %u\n", rte_lcore_count()); 391 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs, 392 (opt->queue_pairs * 2 + 1)); 393 return -EINVAL; 394 } 395 396 return 0; 397 } 398 399 void 400 test_inference_opt_dump(struct ml_options *opt) 401 { 402 uint32_t i; 403 404 /* dump common opts */ 405 ml_test_opt_dump(opt); 406 407 /* dump test opts */ 408 ml_dump("repetitions", "%" PRIu64, opt->repetitions); 409 ml_dump("burst_size", "%u", opt->burst_size); 410 ml_dump("queue_pairs", "%u", opt->queue_pairs); 411 ml_dump("queue_size", "%u", opt->queue_size); 412 ml_dump("tolerance", "%-7.3f", opt->tolerance); 413 ml_dump("stats", "%s", (opt->stats ? "true" : "false")); 414 415 if (opt->batches == 0) 416 ml_dump("batches", "%u (default)", opt->batches); 417 else 418 ml_dump("batches", "%u", opt->batches); 419 420 ml_dump_begin("filelist"); 421 for (i = 0; i < opt->nb_filelist; i++) { 422 ml_dump_list("model", i, opt->filelist[i].model); 423 ml_dump_list("input", i, opt->filelist[i].input); 424 ml_dump_list("output", i, opt->filelist[i].output); 425 if (strcmp(opt->filelist[i].reference, "\0") != 0) 426 ml_dump_list("reference", i, opt->filelist[i].reference); 427 } 428 ml_dump_end; 429 } 430 431 int 432 test_inference_setup(struct ml_test *test, struct ml_options *opt) 433 { 434 struct test_inference *t; 435 void *test_inference; 436 uint32_t lcore_id; 437 int ret = 0; 438 uint32_t i; 439 440 test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference), 441 RTE_CACHE_LINE_SIZE, opt->socket_id); 442 if (test_inference == NULL) { 443 ml_err("failed to allocate memory for test_model"); 444 ret = -ENOMEM; 445 goto error; 446 } 447 test->test_priv = test_inference; 448 t = ml_test_priv(test); 449 450 t->nb_used = 0; 451 t->nb_valid = 0; 452 t->cmn.result = ML_TEST_FAILED; 453 t->cmn.opt = opt; 454 memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t)); 455 456 /* get device info */ 457 ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info); 458 if (ret < 0) { 459 ml_err("failed to get device info"); 460 goto error; 461 } 462 463 if (opt->burst_size == 1) { 464 t->enqueue = ml_enqueue_single; 465 t->dequeue = ml_dequeue_single; 466 } else { 467 t->enqueue = ml_enqueue_burst; 468 t->dequeue = ml_dequeue_burst; 469 } 470 471 /* set model initial state */ 472 for (i = 0; i < opt->nb_filelist; i++) 473 t->model[i].state = MODEL_INITIAL; 474 475 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 476 t->args[lcore_id].enq_ops = rte_zmalloc_socket( 477 "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 478 RTE_CACHE_LINE_SIZE, opt->socket_id); 479 t->args[lcore_id].deq_ops = rte_zmalloc_socket( 480 "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *), 481 RTE_CACHE_LINE_SIZE, opt->socket_id); 482 t->args[lcore_id].reqs = rte_zmalloc_socket( 483 "ml_test_requests", opt->burst_size * sizeof(struct ml_request *), 484 RTE_CACHE_LINE_SIZE, opt->socket_id); 485 } 486 487 for (i = 0; i < RTE_MAX_LCORE; i++) { 488 t->args[i].start_cycles = 0; 489 t->args[i].end_cycles = 0; 490 } 491 492 return 0; 493 494 error: 495 if (test_inference != NULL) 496 rte_free(test_inference); 497 498 return ret; 499 } 500 501 void 502 test_inference_destroy(struct ml_test *test, struct ml_options *opt) 503 { 504 struct test_inference *t; 505 506 RTE_SET_USED(opt); 507 508 t = ml_test_priv(test); 509 if (t != NULL) 510 rte_free(t); 511 } 512 513 int 514 ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt) 515 { 516 struct rte_ml_dev_qp_conf qp_conf; 517 struct test_inference *t; 518 uint16_t qp_id; 519 int ret; 520 521 t = ml_test_priv(test); 522 523 RTE_SET_USED(t); 524 525 ret = ml_test_device_configure(test, opt); 526 if (ret != 0) 527 return ret; 528 529 /* setup queue pairs */ 530 qp_conf.nb_desc = opt->queue_size; 531 qp_conf.cb = NULL; 532 533 for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) { 534 qp_conf.nb_desc = opt->queue_size; 535 qp_conf.cb = NULL; 536 537 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id); 538 if (ret != 0) { 539 ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n", 540 opt->dev_id, qp_id); 541 return ret; 542 } 543 } 544 545 ret = ml_test_device_start(test, opt); 546 if (ret != 0) 547 goto error; 548 549 return 0; 550 551 error: 552 ml_test_device_close(test, opt); 553 554 return ret; 555 } 556 557 int 558 ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt) 559 { 560 int ret; 561 562 ret = ml_test_device_stop(test, opt); 563 if (ret != 0) 564 goto error; 565 566 ret = ml_test_device_close(test, opt); 567 if (ret != 0) 568 return ret; 569 570 return 0; 571 572 error: 573 ml_test_device_close(test, opt); 574 575 return ret; 576 } 577 578 /* Callback for IO pool create. This function would compute the fields of ml_request 579 * structure and prepare the quantized input data. 580 */ 581 static void 582 ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 583 { 584 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 585 struct ml_request *req = (struct ml_request *)obj; 586 587 RTE_SET_USED(mp); 588 RTE_SET_USED(obj_idx); 589 590 req->input = (uint8_t *)obj + 591 RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size); 592 req->output = req->input + 593 RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size); 594 req->niters = 0; 595 596 /* quantize data */ 597 rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches, 598 t->model[t->fid].input, req->input); 599 } 600 601 int 602 ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid) 603 { 604 struct test_inference *t = ml_test_priv(test); 605 char mz_name[RTE_MEMZONE_NAMESIZE]; 606 char mp_name[RTE_MEMPOOL_NAMESIZE]; 607 const struct rte_memzone *mz; 608 uint64_t nb_buffers; 609 uint32_t buff_size; 610 uint32_t mz_size; 611 uint32_t fsize; 612 FILE *fp; 613 int ret; 614 615 /* get input buffer size */ 616 ret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 617 &t->model[fid].inp_qsize, &t->model[fid].inp_dsize); 618 if (ret != 0) { 619 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 620 return ret; 621 } 622 623 /* get output buffer size */ 624 ret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches, 625 &t->model[fid].out_qsize, &t->model[fid].out_dsize); 626 if (ret != 0) { 627 ml_err("Failed to get input size, model : %s\n", opt->filelist[fid].model); 628 return ret; 629 } 630 631 /* allocate buffer for user data */ 632 mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize; 633 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 634 mz_size += t->model[fid].out_dsize; 635 636 sprintf(mz_name, "ml_user_data_%d", fid); 637 mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0); 638 if (mz == NULL) { 639 ml_err("Memzone allocation failed for ml_user_data\n"); 640 ret = -ENOMEM; 641 goto error; 642 } 643 644 t->model[fid].input = mz->addr; 645 t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize; 646 if (strcmp(opt->filelist[fid].reference, "\0") != 0) 647 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize; 648 else 649 t->model[fid].reference = NULL; 650 651 /* load input file */ 652 fp = fopen(opt->filelist[fid].input, "r"); 653 if (fp == NULL) { 654 ml_err("Failed to open input file : %s\n", opt->filelist[fid].input); 655 ret = -errno; 656 goto error; 657 } 658 659 fseek(fp, 0, SEEK_END); 660 fsize = ftell(fp); 661 fseek(fp, 0, SEEK_SET); 662 if (fsize != t->model[fid].inp_dsize) { 663 ml_err("Invalid input file, size = %u (expected size = %" PRIu64 ")\n", fsize, 664 t->model[fid].inp_dsize); 665 ret = -EINVAL; 666 fclose(fp); 667 goto error; 668 } 669 670 if (fread(t->model[fid].input, 1, t->model[fid].inp_dsize, fp) != t->model[fid].inp_dsize) { 671 ml_err("Failed to read input file : %s\n", opt->filelist[fid].input); 672 ret = -errno; 673 fclose(fp); 674 goto error; 675 } 676 fclose(fp); 677 678 /* load reference file */ 679 if (t->model[fid].reference != NULL) { 680 fp = fopen(opt->filelist[fid].reference, "r"); 681 if (fp == NULL) { 682 ml_err("Failed to open reference file : %s\n", 683 opt->filelist[fid].reference); 684 ret = -errno; 685 goto error; 686 } 687 688 if (fread(t->model[fid].reference, 1, t->model[fid].out_dsize, fp) != 689 t->model[fid].out_dsize) { 690 ml_err("Failed to read reference file : %s\n", 691 opt->filelist[fid].reference); 692 ret = -errno; 693 fclose(fp); 694 goto error; 695 } 696 fclose(fp); 697 } 698 699 /* create mempool for quantized input and output buffers. ml_request_initialize is 700 * used as a callback for object creation. 701 */ 702 buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) + 703 RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) + 704 RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size); 705 nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions); 706 707 t->fid = fid; 708 sprintf(mp_name, "ml_io_pool_%d", fid); 709 t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL, 710 ml_request_initialize, test, opt->socket_id, 0); 711 if (t->model[fid].io_pool == NULL) { 712 ml_err("Failed to create io pool : %s\n", "ml_io_pool"); 713 ret = -ENOMEM; 714 goto error; 715 } 716 717 return 0; 718 719 error: 720 if (mz != NULL) 721 rte_memzone_free(mz); 722 723 if (t->model[fid].io_pool != NULL) { 724 rte_mempool_free(t->model[fid].io_pool); 725 t->model[fid].io_pool = NULL; 726 } 727 728 return ret; 729 } 730 731 void 732 ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid) 733 { 734 char mz_name[RTE_MEMZONE_NAMESIZE]; 735 char mp_name[RTE_MEMPOOL_NAMESIZE]; 736 const struct rte_memzone *mz; 737 struct rte_mempool *mp; 738 739 RTE_SET_USED(test); 740 RTE_SET_USED(opt); 741 742 /* release user data memzone */ 743 sprintf(mz_name, "ml_user_data_%d", fid); 744 mz = rte_memzone_lookup(mz_name); 745 if (mz != NULL) 746 rte_memzone_free(mz); 747 748 /* destroy io pool */ 749 sprintf(mp_name, "ml_io_pool_%d", fid); 750 mp = rte_mempool_lookup(mp_name); 751 if (mp != NULL) 752 rte_mempool_free(mp); 753 } 754 755 int 756 ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt) 757 { 758 struct test_inference *t = ml_test_priv(test); 759 760 /* create op pool */ 761 t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0, 762 opt->socket_id); 763 if (t->op_pool == NULL) { 764 ml_err("Failed to create op pool : %s\n", "ml_op_pool"); 765 return -ENOMEM; 766 } 767 768 return 0; 769 } 770 771 void 772 ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt) 773 { 774 struct test_inference *t = ml_test_priv(test); 775 776 RTE_SET_USED(opt); 777 778 /* release op pool */ 779 if (t->op_pool != NULL) 780 rte_mempool_free(t->op_pool); 781 } 782 783 static bool 784 ml_inference_validation(struct ml_test *test, struct ml_request *req) 785 { 786 struct test_inference *t = ml_test_priv((struct ml_test *)test); 787 struct ml_model *model; 788 uint32_t nb_elements; 789 uint8_t *reference; 790 uint8_t *output; 791 bool match; 792 uint32_t i; 793 uint32_t j; 794 795 model = &t->model[req->fid]; 796 797 /* compare crc when tolerance is 0 */ 798 if (t->cmn.opt->tolerance == 0.0) { 799 match = (rte_hash_crc(model->output, model->out_dsize, 0) == 800 rte_hash_crc(model->reference, model->out_dsize, 0)); 801 } else { 802 output = model->output; 803 reference = model->reference; 804 805 i = 0; 806 next_output: 807 nb_elements = 808 model->info.output_info[i].shape.w * model->info.output_info[i].shape.x * 809 model->info.output_info[i].shape.y * model->info.output_info[i].shape.z; 810 j = 0; 811 next_element: 812 match = false; 813 switch (model->info.output_info[i].dtype) { 814 case RTE_ML_IO_TYPE_INT8: 815 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t), 816 ML_TEST_READ_TYPE(reference, int8_t), 817 t->cmn.opt->tolerance)) 818 match = true; 819 820 output += sizeof(int8_t); 821 reference += sizeof(int8_t); 822 break; 823 case RTE_ML_IO_TYPE_UINT8: 824 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t), 825 ML_TEST_READ_TYPE(reference, uint8_t), 826 t->cmn.opt->tolerance)) 827 match = true; 828 829 output += sizeof(float); 830 reference += sizeof(float); 831 break; 832 case RTE_ML_IO_TYPE_INT16: 833 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t), 834 ML_TEST_READ_TYPE(reference, int16_t), 835 t->cmn.opt->tolerance)) 836 match = true; 837 838 output += sizeof(int16_t); 839 reference += sizeof(int16_t); 840 break; 841 case RTE_ML_IO_TYPE_UINT16: 842 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t), 843 ML_TEST_READ_TYPE(reference, uint16_t), 844 t->cmn.opt->tolerance)) 845 match = true; 846 847 output += sizeof(uint16_t); 848 reference += sizeof(uint16_t); 849 break; 850 case RTE_ML_IO_TYPE_INT32: 851 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t), 852 ML_TEST_READ_TYPE(reference, int32_t), 853 t->cmn.opt->tolerance)) 854 match = true; 855 856 output += sizeof(int32_t); 857 reference += sizeof(int32_t); 858 break; 859 case RTE_ML_IO_TYPE_UINT32: 860 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint32_t), 861 ML_TEST_READ_TYPE(reference, uint32_t), 862 t->cmn.opt->tolerance)) 863 match = true; 864 865 output += sizeof(uint32_t); 866 reference += sizeof(uint32_t); 867 break; 868 case RTE_ML_IO_TYPE_FP32: 869 if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, float), 870 ML_TEST_READ_TYPE(reference, float), 871 t->cmn.opt->tolerance)) 872 match = true; 873 874 output += sizeof(float); 875 reference += sizeof(float); 876 break; 877 default: /* other types, fp8, fp16, bfloat16 */ 878 match = true; 879 } 880 881 if (!match) 882 goto done; 883 j++; 884 if (j < nb_elements) 885 goto next_element; 886 887 i++; 888 if (i < model->info.nb_outputs) 889 goto next_output; 890 } 891 done: 892 if (match) 893 t->nb_valid++; 894 895 return match; 896 } 897 898 /* Callback for mempool object iteration. This call would dequantize output data. */ 899 static void 900 ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx) 901 { 902 struct test_inference *t = ml_test_priv((struct ml_test *)opaque); 903 struct ml_request *req = (struct ml_request *)obj; 904 struct ml_model *model = &t->model[req->fid]; 905 bool error = false; 906 char *dump_path; 907 908 RTE_SET_USED(mp); 909 910 if (req->niters == 0) 911 return; 912 913 t->nb_used++; 914 rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches, 915 req->output, model->output); 916 917 if (model->reference == NULL) { 918 t->nb_valid++; 919 goto dump_output_pass; 920 } 921 922 if (!ml_inference_validation(opaque, req)) 923 goto dump_output_fail; 924 else 925 goto dump_output_pass; 926 927 dump_output_pass: 928 if (obj_idx == 0) { 929 /* write quantized output */ 930 if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1) 931 return; 932 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 933 free(dump_path); 934 if (error) 935 return; 936 937 /* write dequantized output */ 938 if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1) 939 return; 940 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 941 free(dump_path); 942 if (error) 943 return; 944 } 945 946 return; 947 948 dump_output_fail: 949 if (t->cmn.opt->debug) { 950 /* dump quantized output buffer */ 951 if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output, 952 obj_idx) == -1) 953 return; 954 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error); 955 free(dump_path); 956 if (error) 957 return; 958 959 /* dump dequantized output buffer */ 960 if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, 961 obj_idx) == -1) 962 return; 963 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error); 964 free(dump_path); 965 if (error) 966 return; 967 } 968 } 969 970 int 971 ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid) 972 { 973 struct test_inference *t = ml_test_priv(test); 974 uint64_t error_count = 0; 975 uint32_t i; 976 977 RTE_SET_USED(opt); 978 979 /* check for errors */ 980 for (i = 0; i < RTE_MAX_LCORE; i++) 981 error_count += t->error_count[i]; 982 983 rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test); 984 985 if ((t->nb_used == t->nb_valid) && (error_count == 0)) 986 t->cmn.result = ML_TEST_SUCCESS; 987 else 988 t->cmn.result = ML_TEST_FAILED; 989 990 return t->cmn.result; 991 } 992 993 int 994 ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid, 995 uint16_t end_fid) 996 { 997 struct test_inference *t = ml_test_priv(test); 998 uint32_t lcore_id; 999 uint32_t nb_reqs; 1000 uint32_t id = 0; 1001 uint32_t qp_id; 1002 1003 nb_reqs = opt->repetitions / opt->queue_pairs; 1004 1005 RTE_LCORE_FOREACH_WORKER(lcore_id) 1006 { 1007 if (id >= opt->queue_pairs * 2) 1008 break; 1009 1010 qp_id = id / 2; 1011 t->args[lcore_id].qp_id = qp_id; 1012 t->args[lcore_id].nb_reqs = nb_reqs; 1013 if (qp_id == 0) 1014 t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs; 1015 1016 if (t->args[lcore_id].nb_reqs == 0) { 1017 id++; 1018 break; 1019 } 1020 1021 t->args[lcore_id].start_fid = start_fid; 1022 t->args[lcore_id].end_fid = end_fid; 1023 1024 if (id % 2 == 0) 1025 rte_eal_remote_launch(t->enqueue, test, lcore_id); 1026 else 1027 rte_eal_remote_launch(t->dequeue, test, lcore_id); 1028 1029 id++; 1030 } 1031 1032 return 0; 1033 } 1034 1035 int 1036 ml_inference_stats_get(struct ml_test *test, struct ml_options *opt) 1037 { 1038 struct test_inference *t = ml_test_priv(test); 1039 uint64_t total_cycles = 0; 1040 uint32_t nb_filelist; 1041 uint64_t throughput; 1042 uint64_t avg_e2e; 1043 uint32_t qp_id; 1044 uint64_t freq; 1045 int ret; 1046 int i; 1047 1048 if (!opt->stats) 1049 return 0; 1050 1051 /* get xstats size */ 1052 t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0); 1053 if (t->xstats_size >= 0) { 1054 /* allocate for xstats_map and values */ 1055 t->xstats_map = rte_malloc( 1056 "ml_xstats_map", t->xstats_size * sizeof(struct rte_ml_dev_xstats_map), 0); 1057 if (t->xstats_map == NULL) { 1058 ret = -ENOMEM; 1059 goto error; 1060 } 1061 1062 t->xstats_values = 1063 rte_malloc("ml_xstats_values", t->xstats_size * sizeof(uint64_t), 0); 1064 if (t->xstats_values == NULL) { 1065 ret = -ENOMEM; 1066 goto error; 1067 } 1068 1069 ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, t->xstats_size); 1070 if (ret != t->xstats_size) { 1071 printf("Unable to get xstats names, ret = %d\n", ret); 1072 ret = -1; 1073 goto error; 1074 } 1075 1076 for (i = 0; i < t->xstats_size; i++) 1077 rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id, 1078 &t->xstats_values[i], 1); 1079 } 1080 1081 /* print xstats*/ 1082 printf("\n"); 1083 print_line(80); 1084 printf(" ML Device Extended Statistics\n"); 1085 print_line(80); 1086 for (i = 0; i < t->xstats_size; i++) 1087 printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, t->xstats_values[i]); 1088 print_line(80); 1089 1090 /* release buffers */ 1091 if (t->xstats_map) 1092 rte_free(t->xstats_map); 1093 1094 if (t->xstats_values) 1095 rte_free(t->xstats_values); 1096 1097 /* print end-to-end stats */ 1098 freq = rte_get_tsc_hz(); 1099 for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++) 1100 total_cycles += t->args[qp_id].end_cycles - t->args[qp_id].start_cycles; 1101 avg_e2e = total_cycles / opt->repetitions; 1102 1103 if (freq == 0) { 1104 avg_e2e = total_cycles / opt->repetitions; 1105 printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (cycles)", avg_e2e); 1106 } else { 1107 avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq); 1108 printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (ns)", avg_e2e); 1109 } 1110 1111 /* print inference throughput */ 1112 if (strcmp(opt->test_name, "inference_ordered") == 0) 1113 nb_filelist = 1; 1114 else 1115 nb_filelist = opt->nb_filelist; 1116 1117 if (freq == 0) { 1118 throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) / total_cycles; 1119 printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / million cycles)", 1120 throughput); 1121 } else { 1122 throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / total_cycles; 1123 printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / second)", 1124 throughput); 1125 } 1126 1127 print_line(80); 1128 1129 return 0; 1130 1131 error: 1132 if (t->xstats_map) 1133 rte_free(t->xstats_map); 1134 1135 if (t->xstats_values) 1136 rte_free(t->xstats_values); 1137 1138 return ret; 1139 } 1140