1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_INTERNAL_H 6 #define RTE_MLDEV_INTERNAL_H 7 8 /** 9 * @file 10 * 11 * MLDEV internal header 12 * 13 * This file contains MLDEV private data structures and macros. 14 * 15 * @note 16 * These APIs are for MLDEV PMDs and library only. 17 */ 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif 22 23 #include <stdint.h> 24 25 #include <dev_driver.h> 26 #include <rte_common.h> 27 #include <rte_log.h> 28 #include <rte_mldev.h> 29 30 /* Device state */ 31 #define ML_DEV_DETACHED (0) 32 #define ML_DEV_ATTACHED (1) 33 34 struct rte_ml_dev; 35 36 /** 37 * @internal 38 * 39 * Enqueue a burst of inference requests to a queue on ML device. 40 * 41 * @param dev 42 * ML device pointer. 43 * @param qp_id 44 * Queue-pair ID. 45 * @param ops 46 * Array of ML ops to be enqueued. 47 * @param nb_ops 48 * Number of ops to enqueue. 49 * 50 * @return 51 * - Number of ops enqueued. 52 */ 53 typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops, 54 uint16_t nb_ops); 55 56 /** 57 * @internal 58 * 59 * Dequeue a burst of inference requests from a queue on ML device. 60 * 61 * @param dev 62 * ML device pointer. 63 * @param qp_id 64 * Queue-pair ID. 65 * @param ops 66 * Array of ML ops to dequeued. 67 * @param nb_ops 68 * Number of ops to dequeue. 69 * 70 * @return 71 * - Number of ops dequeued. 72 */ 73 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops, 74 uint16_t nb_ops); 75 76 /** 77 * @internal 78 * 79 * Get error information for an Op. 80 * 81 * @param dev 82 * ML device pointer. 83 * @param op 84 * ML Op handle. 85 * @param error 86 * Pointer to error structure. 87 * 88 * @return 89 * - 0 on success. 90 * - <0, error on failure. 91 */ 92 typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op, 93 struct rte_ml_op_error *error); 94 95 /** 96 * Definitions of all functions exported by a driver through the generic structure of type 97 * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device. 98 */ 99 100 /** 101 * @internal 102 * 103 * Function used to get device information. 104 * 105 * @param dev 106 * ML device pointer. 107 * @param dev_info 108 * Pointer to info structure. 109 * 110 * @return 111 * - 0 on success. 112 * - < 0, error code on failure. 113 */ 114 typedef int (*mldev_info_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info); 115 116 /** 117 * @internal 118 * 119 * Function used to configure device. 120 * 121 * @param dev 122 * ML device pointer. 123 * @param config 124 * ML device configurations. 125 * 126 * @return 127 * - 0 on success 128 * - < 0, error code on failure. 129 */ 130 typedef int (*mldev_configure_t)(struct rte_ml_dev *dev, const struct rte_ml_dev_config *config); 131 132 /** 133 * @internal 134 * 135 * Function used to close a configured device. 136 * 137 * @param dev 138 * ML device pointer. 139 * 140 * @return 141 * - 0 on success. 142 * - -EAGAIN if can't close as device is busy. 143 * - < 0, error code on failure, other than busy. 144 */ 145 typedef int (*mldev_close_t)(struct rte_ml_dev *dev); 146 147 /** 148 * @internal 149 * 150 * Function used to start a configured device. 151 * 152 * @param dev 153 * ML device pointer. 154 * 155 * @return 156 * - 0 on success. 157 * - < 0, error code on failure. 158 */ 159 typedef int (*mldev_start_t)(struct rte_ml_dev *dev); 160 161 /** 162 * @internal 163 * 164 * Function used to stop a configured device. 165 * 166 * @param dev 167 * ML device pointer. 168 * 169 * @return 170 * - 0 on success. 171 * - < 0, error code on failure. 172 */ 173 typedef int (*mldev_stop_t)(struct rte_ml_dev *dev); 174 175 /** 176 * @internal 177 * 178 * Setup a queue pair for a device. 179 * 180 * @param dev 181 * ML device pointer. 182 * @param queue_pair_id 183 * Queue pair index. 184 * @param queue_pair_conf 185 * Queue pair configuration structure. 186 * @param socket_id 187 * Socket index. 188 * 189 * @return 190 * - 0 on success. 191 * - < 0, error on failure. 192 */ 193 typedef int (*mldev_queue_pair_setup_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id, 194 const struct rte_ml_dev_qp_conf *queue_pair_conf, 195 int socket_id); 196 197 /** 198 * @internal 199 * 200 * Release memory resources allocated by given queue pair. 201 * 202 * @param dev 203 * ML device pointer. 204 * @param queue_pair_id 205 * Queue pair index. 206 * 207 * @return 208 * - 0 on success. 209 * - -EAGAIN, if can't close as device is busy. 210 */ 211 typedef int (*mldev_queue_pair_release_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id); 212 213 /** 214 * @internal 215 * 216 * Function used to get device statistics. 217 * 218 * @param dev 219 * ML device pointer. 220 * @param stats 221 * Pointer to ML device stats structure to update. 222 * 223 * @return 224 * - 0 on success. 225 * - < 0, error on failure. 226 */ 227 typedef int (*mldev_stats_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats); 228 229 /** 230 * @internal 231 * 232 * Function used to reset device statistics. 233 * 234 * @param dev 235 * ML device pointer. 236 */ 237 typedef void (*mldev_stats_reset_t)(struct rte_ml_dev *dev); 238 239 /** 240 * @internal 241 * 242 * Function used to get names of extended stats. 243 * 244 * @param dev 245 * ML device pointer. 246 * @param mode 247 * Mode of stats to retrieve. 248 * @param model_id 249 * Used to specify model id in model mode. Ignored in device mode. 250 * @param xstats_map 251 * Array to insert id and names into. 252 * @param size 253 * Size of xstats_map array. 254 * 255 * @return 256 * - >= 0 and <= size on success. 257 * - > size, error. Returns the size of xstats_map array required. 258 * - < 0, error code on failure. 259 */ 260 typedef int (*mldev_xstats_names_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, 261 int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map, 262 uint32_t size); 263 264 /** 265 * @internal 266 * 267 * Function used to get a single extended stat by name. 268 * 269 * @param dev 270 * ML device pointer. 271 * @param name 272 * Name of the stat to retrieve. 273 * @param stat_id 274 * ID of the stat to be returned. 275 * @param value 276 * Value of the stat to be returned. 277 * 278 * @return 279 * - = 0 success. 280 * - < 0, error code on failure. 281 */ 282 typedef int (*mldev_xstats_by_name_get_t)(struct rte_ml_dev *dev, const char *name, 283 uint16_t *stat_id, uint64_t *value); 284 285 /** 286 * @internal 287 * 288 * Function used to retrieve extended stats of a device. 289 * 290 * @param dev 291 * ML device pointer. 292 * @param mode 293 * Mode of stats to retrieve. 294 * @param model_id 295 * Used to specify model id in model mode. Ignored in device mode. 296 * @param stat_ids 297 * Array of ID numbers of the stats to be retrieved. 298 * @param values 299 * Values of the stats requested by the ID. 300 * @param nb_ids 301 * Number of stats requested. 302 * 303 * @return 304 * - >= 0, number of entries filled into the values array. 305 * - < 0, error code on failure. 306 */ 307 typedef int (*mldev_xstats_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, 308 int32_t model_id, const uint16_t stat_ids[], uint64_t values[], 309 uint16_t nb_ids); 310 311 /** 312 * @internal 313 * 314 * Function used to reset extended stats. 315 * 316 * @param dev 317 * ML device pointer. 318 * @param mode 319 * Mode of stats to retrieve. 320 * @param model_id 321 * Used to specify model id in model mode. Ignored in device mode. 322 * @param stat_ids 323 * Array of stats IDs to be reset. 324 * @param nb_ids 325 * Number of IDs in the stat_ids array. 326 * 327 * @return 328 * - 0 on success. 329 * - < 0, error code on failure. 330 */ 331 typedef int (*mldev_xstats_reset_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, 332 int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids); 333 334 /** 335 * @internal 336 * 337 * Function used to dump ML device debug info. 338 * 339 * @param dev 340 * ML device pointer. 341 * @param fd 342 * File descriptor to dump the debug info. 343 * 344 * @return 345 * - 0 on success. 346 * - < 0, error code on failure. 347 */ 348 349 typedef int (*mldev_dump_t)(struct rte_ml_dev *dev, FILE *fd); 350 351 /** 352 * @internal 353 * 354 * Function used for selftest of ML device. 355 * 356 * @param dev 357 * ML device pointer. 358 * 359 * @return 360 * - 0 on success. 361 * - < 0, error on failure. 362 */ 363 typedef int (*mldev_selftest_t)(struct rte_ml_dev *dev); 364 365 /** 366 * @internal 367 * 368 * Function used to load an ML model. 369 * 370 * @param dev 371 * ML device pointer. 372 * @param params 373 * Model load params. 374 * @param model_id 375 * Model ID returned by the library. 376 * 377 * @return 378 * - 0 on success. 379 * - < 0, error on failure. 380 */ 381 typedef int (*mldev_model_load_t)(struct rte_ml_dev *dev, struct rte_ml_model_params *params, 382 uint16_t *model_id); 383 384 /** 385 * @internal 386 * 387 * Function used to unload an ML model. 388 * 389 * @param dev 390 * ML device pointer. 391 * @param model_id 392 * Model ID to use. 393 * 394 * @return 395 * - 0 on success. 396 * - < 0, error on failure. 397 */ 398 typedef int (*mldev_model_unload_t)(struct rte_ml_dev *dev, uint16_t model_id); 399 400 /** 401 * @internal 402 * 403 * Function used to start an ML model. 404 * 405 * @param dev 406 * ML device pointer. 407 * @param model_id 408 * Model ID to use. 409 * 410 * @return 411 * - 0 on success. 412 * - <0, error on failure. 413 */ 414 typedef int (*mldev_model_start_t)(struct rte_ml_dev *dev, uint16_t model_id); 415 416 /** 417 * @internal 418 * 419 * Function used to stop an ML model. 420 * 421 * @param dev 422 * ML device pointer. 423 * @param model_id 424 * Model ID to use. 425 * 426 * @return 427 * - 0 on success. 428 * - <0, error on failure. 429 */ 430 typedef int (*mldev_model_stop_t)(struct rte_ml_dev *dev, uint16_t model_id); 431 432 /** 433 * @internal 434 * 435 * Get info about a model. 436 * 437 * @param dev 438 * ML device pointer. 439 * @param model_id 440 * Model ID to use. 441 * @param model_info 442 * Pointer to model info structure. 443 * 444 * @return 445 * - 0 on success. 446 * - <0, error on failure. 447 */ 448 typedef int (*mldev_model_info_get_t)(struct rte_ml_dev *dev, uint16_t model_id, 449 struct rte_ml_model_info *model_info); 450 451 /** 452 * @internal 453 * 454 * Update model params. 455 * 456 * @param dev 457 * ML device pointer. 458 * @param model_id 459 * Model ID to use. 460 * @param buffer 461 * Pointer to model params. 462 * 463 * @return 464 * - 0 on success. 465 * - <0, error on failure. 466 */ 467 typedef int (*mldev_model_params_update_t)(struct rte_ml_dev *dev, uint16_t model_id, void *buffer); 468 469 /** 470 * @internal 471 * 472 * Get size of input buffers. 473 * 474 * @param dev 475 * ML device pointer. 476 * @param model_id 477 * Model ID to use. 478 * @param nb_batches 479 * Number of batches. 480 * @param input_qsize 481 * Size of quantized input. 482 * @param input_dsize 483 * Size of dequantized input. 484 * 485 * @return 486 * - 0 on success. 487 * - <0, error on failure. 488 */ 489 typedef int (*mldev_io_input_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id, 490 uint32_t nb_batches, uint64_t *input_qsize, 491 uint64_t *input_dsize); 492 493 /** 494 * @internal 495 * 496 * Get size of output buffers. 497 * 498 * @param dev 499 * ML device pointer. 500 * @param model_id 501 * Model ID to use. 502 * @param nb_batches 503 * Number of batches. 504 * @param output_qsize 505 * Size of quantized output. 506 * @param output_dsize 507 * Size of dequantized output. 508 * 509 * @return 510 * - 0 on success. 511 * - <0, error on failure. 512 */ 513 typedef int (*mldev_io_output_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id, 514 uint32_t nb_batches, uint64_t *output_qsize, 515 uint64_t *output_dsize); 516 517 /** 518 * @internal 519 * 520 * Quantize model data. 521 * 522 * @param dev 523 * ML device pointer. 524 * @param model_id 525 * Model ID to use. 526 * @param nb_batches 527 * Number of batches. 528 * @param dbuffer 529 * Pointer t de-quantized data buffer. 530 * @param qbuffer 531 * Pointer t de-quantized data buffer. 532 * 533 * @return 534 * - 0 on success. 535 * - <0, error on failure. 536 */ 537 typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches, 538 void *dbuffer, void *qbuffer); 539 540 /** 541 * @internal 542 * 543 * Quantize model data. 544 * 545 * @param dev 546 * ML device pointer. 547 * @param model_id 548 * Model ID to use. 549 * @param nb_batches 550 * Number of batches. 551 * @param qbuffer 552 * Pointer t de-quantized data buffer. 553 * @param dbuffer 554 * Pointer t de-quantized data buffer. 555 * 556 * @return 557 * - 0 on success. 558 * - <0, error on failure. 559 */ 560 typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches, 561 void *qbuffer, void *dbuffer); 562 563 /** 564 * @internal 565 * 566 * ML device operations function pointer table. 567 */ 568 struct rte_ml_dev_ops { 569 /** Get device information. */ 570 mldev_info_get_t dev_info_get; 571 572 /** Configure device. */ 573 mldev_configure_t dev_configure; 574 575 /** Close device. */ 576 mldev_close_t dev_close; 577 578 /** Start device. */ 579 mldev_start_t dev_start; 580 581 /** Stop device. */ 582 mldev_stop_t dev_stop; 583 584 /** Set up a device queue pair. */ 585 mldev_queue_pair_setup_t dev_queue_pair_setup; 586 587 /** Release a device queue pair. */ 588 mldev_queue_pair_release_t dev_queue_pair_release; 589 590 /** Get device statistics. */ 591 mldev_stats_get_t dev_stats_get; 592 593 /** Reset device statistics. */ 594 mldev_stats_reset_t dev_stats_reset; 595 596 /** Get names of extended stats. */ 597 mldev_xstats_names_get_t dev_xstats_names_get; 598 599 /** Get value of a single extended stat. */ 600 mldev_xstats_by_name_get_t dev_xstats_by_name_get; 601 602 /** Get extended stats of a device. */ 603 mldev_xstats_get_t dev_xstats_get; 604 605 /** Reset extended stats of the device. */ 606 mldev_xstats_reset_t dev_xstats_reset; 607 608 /** Dump ML device debug info. */ 609 mldev_dump_t dev_dump; 610 611 /** Dump ML device debug info. */ 612 mldev_selftest_t dev_selftest; 613 614 /** Load an ML model. */ 615 mldev_model_load_t model_load; 616 617 /** Unload an ML model. */ 618 mldev_model_unload_t model_unload; 619 620 /** Start an ML model. */ 621 mldev_model_start_t model_start; 622 623 /** Stop an ML model. */ 624 mldev_model_stop_t model_stop; 625 626 /** Get model information. */ 627 mldev_model_info_get_t model_info_get; 628 629 /** Update model params. */ 630 mldev_model_params_update_t model_params_update; 631 632 /** Get input buffer size. */ 633 mldev_io_input_size_get_t io_input_size_get; 634 635 /** Get output buffer size. */ 636 mldev_io_output_size_get_t io_output_size_get; 637 638 /** Quantize data */ 639 mldev_io_quantize_t io_quantize; 640 641 /** De-quantize data */ 642 mldev_io_dequantize_t io_dequantize; 643 }; 644 645 /** 646 * @internal 647 * 648 * The data part, with no function pointers, associated with each device. This structure is safe to 649 * place in shared memory to be common among different processes in a multi-process configuration. 650 */ 651 struct rte_ml_dev_data { 652 /** Device ID for this instance. */ 653 int16_t dev_id; 654 655 /** Socket ID where memory is allocated. */ 656 int16_t socket_id; 657 658 /** Device state: STOPPED(0) / STARTED(1) */ 659 __extension__ uint8_t dev_started : 1; 660 661 /** Number of device queue pairs. */ 662 uint16_t nb_queue_pairs; 663 664 /** Number of ML models. */ 665 uint16_t nb_models; 666 667 /** Array of pointers to queue pairs. */ 668 void **queue_pairs; 669 670 /** Array of pointers to ML models. */ 671 void **models; 672 673 /** PMD-specific private data. */ 674 void *dev_private; 675 676 /** Unique identifier name. */ 677 char name[RTE_ML_STR_MAX]; 678 }; 679 680 /** 681 * @internal 682 * 683 * The data structure associated with each ML device. 684 */ 685 struct rte_ml_dev { 686 /** Pointer to PMD enqueue function. */ 687 mldev_enqueue_t enqueue_burst; 688 689 /** Pointer to PMD dequeue function. */ 690 mldev_dequeue_t dequeue_burst; 691 692 /** Pointer to PMD Op error get function. */ 693 mldev_op_error_get_t op_error_get; 694 695 /** Pointer to device data. */ 696 struct rte_ml_dev_data *data; 697 698 /** Functions exported by PMD. */ 699 struct rte_ml_dev_ops *dev_ops; 700 701 /** Backing RTE device. */ 702 struct rte_device *device; 703 704 /** Flag indicating the device is attached. */ 705 __extension__ uint8_t attached : 1; 706 } __rte_cache_aligned; 707 708 /** 709 * @internal 710 * 711 * Global structure used for maintaining state of allocated ML devices. 712 */ 713 struct rte_ml_dev_global { 714 /** Device information array. */ 715 struct rte_ml_dev *devs; 716 717 /** Device private data array. */ 718 struct rte_ml_dev_data **data; 719 720 /** Number of devices found. */ 721 uint8_t nb_devs; 722 723 /** Maximum number of devices. */ 724 uint8_t max_devs; 725 }; 726 727 #ifdef __cplusplus 728 } 729 #endif 730 731 #endif /* RTE_MLDEV_INTERNAL_H */ 732