1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 148 #define RTE_MLDEV_LOG(level, fmt, args...) \ 149 rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args) 150 151 #define RTE_ML_STR_MAX 128 152 /**< Maximum length of name string */ 153 154 #define RTE_MLDEV_DEFAULT_MAX 32 155 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 156 157 /* Device operations */ 158 159 /** 160 * Initialize the device array before probing devices. If not called, the first device probed would 161 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 162 * 163 * @param dev_max 164 * Maximum number of devices. 165 * 166 * @return 167 * 0 on success, -rte_errno otherwise: 168 * - ENOMEM if out of memory 169 * - EINVAL if 0 size 170 * - EBUSY if already initialized 171 */ 172 __rte_experimental 173 int 174 rte_ml_dev_init(size_t dev_max); 175 176 /** 177 * Get the total number of ML devices that have been successfully initialised. 178 * 179 * @return 180 * - The total number of usable ML devices. 181 */ 182 __rte_experimental 183 uint16_t 184 rte_ml_dev_count(void); 185 186 /** 187 * Check if the device is in ready state. 188 * 189 * @param dev_id 190 * The identifier of the device. 191 * 192 * @return 193 * - 0 if device state is not in ready state. 194 * - 1 if device state is ready state. 195 */ 196 __rte_experimental 197 int 198 rte_ml_dev_is_valid_dev(int16_t dev_id); 199 200 /** 201 * Return the NUMA socket to which a device is connected. 202 * 203 * @param dev_id 204 * The identifier of the device. 205 * 206 * @return 207 * - The NUMA socket id to which the device is connected 208 * - 0 If the socket could not be determined. 209 * - -EINVAL: if the dev_id value is not valid. 210 */ 211 __rte_experimental 212 int 213 rte_ml_dev_socket_id(int16_t dev_id); 214 215 /** ML device information */ 216 struct rte_ml_dev_info { 217 const char *driver_name; 218 /**< Driver name */ 219 uint16_t max_models; 220 /**< Maximum number of models supported by the device. 221 * @see struct rte_ml_dev_config::nb_models 222 */ 223 uint16_t max_queue_pairs; 224 /**< Maximum number of queues pairs supported by the device. 225 * @see struct rte_ml_dev_config::nb_queue_pairs 226 */ 227 uint16_t max_desc; 228 /**< Maximum allowed number of descriptors for queue pair by the device. 229 * @see struct rte_ml_dev_qp_conf::nb_desc 230 */ 231 uint16_t max_segments; 232 /**< Maximum number of scatter-gather entries supported by the device. 233 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 234 */ 235 uint16_t min_align_size; 236 /**< Minimum alignment size of IO buffers used by the device. */ 237 }; 238 239 /** 240 * Retrieve the information of the device. 241 * 242 * @param dev_id 243 * The identifier of the device. 244 * @param dev_info 245 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 246 * 247 * @return 248 * - 0: Success, driver updates the information of the ML device 249 * - < 0: Error code returned by the driver info get function. 250 */ 251 __rte_experimental 252 int 253 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 254 255 /** ML device configuration structure */ 256 struct rte_ml_dev_config { 257 int socket_id; 258 /**< Socket to allocate resources on. */ 259 uint16_t nb_models; 260 /**< Number of models to be loaded on the device. 261 * This value cannot exceed the max_models which is previously provided in 262 * struct rte_ml_dev_info::max_models 263 */ 264 uint16_t nb_queue_pairs; 265 /**< Number of queue pairs to configure on this device. 266 * This value cannot exceed the max_models which is previously provided in 267 * struct rte_ml_dev_info::max_queue_pairs 268 */ 269 }; 270 271 /** 272 * Configure an ML device. 273 * 274 * This function must be invoked first before any other function in the API. 275 * 276 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 277 * rte_ml_dev_close() is called. 278 * 279 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 280 * this ML device. 281 * 282 * @param dev_id 283 * The identifier of the device to configure. 284 * @param config 285 * The ML device configuration structure. 286 * 287 * @return 288 * - 0: Success, device configured. 289 * - < 0: Error code returned by the driver configuration function. 290 */ 291 __rte_experimental 292 int 293 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 294 295 /* Forward declaration */ 296 struct rte_ml_op; 297 298 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 299 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 300 301 /** ML device queue pair configuration structure. */ 302 struct rte_ml_dev_qp_conf { 303 uint32_t nb_desc; 304 /**< Number of descriptors per queue pair. 305 * This value cannot exceed the max_desc which previously provided in 306 * struct rte_ml_dev_info:max_desc 307 */ 308 rte_ml_dev_stop_flush_t cb; 309 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 310 * Value NULL is allowed, in which case callback will not be invoked. 311 * This function can be used to properly dispose of outstanding ML ops from all 312 * queue pairs, for example ops containing memory pointers. 313 * @see rte_ml_dev_stop() 314 */ 315 }; 316 317 /** 318 * Set up a queue pair for a device. This should only be called when the device is stopped. 319 * 320 * @param dev_id 321 * The identifier of the device. 322 * @param queue_pair_id 323 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 324 * previously supplied to rte_ml_dev_configure(). 325 * @param qp_conf 326 * The pointer to the configuration data to be used for the queue pair. 327 * @param socket_id 328 * The *socket_id* argument is the socket identifier in case of NUMA. 329 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 330 * for the queue pair. 331 * 332 * @return 333 * - 0: Success, queue pair correctly set up. 334 * - < 0: Queue pair configuration failed. 335 */ 336 __rte_experimental 337 int 338 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 339 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 340 341 /** 342 * Start an ML device. 343 * 344 * The device start step consists of setting the configured features and enabling the ML device 345 * to accept inference jobs. 346 * 347 * @param dev_id 348 * The identifier of the device. 349 * 350 * @return 351 * - 0: Success, device started. 352 * - <0: Error code of the driver device start function. 353 */ 354 __rte_experimental 355 int 356 rte_ml_dev_start(int16_t dev_id); 357 358 /** 359 * Stop an ML device. A stopped device cannot accept inference jobs. 360 * The device can be restarted with a call to rte_ml_dev_start(). 361 * 362 * @param dev_id 363 * The identifier of the device. 364 * 365 * @return 366 * - 0: Success, device stopped. 367 * - <0: Error code of the driver device stop function. 368 */ 369 __rte_experimental 370 int 371 rte_ml_dev_stop(int16_t dev_id); 372 373 /** 374 * Close an ML device. The device cannot be restarted! 375 * 376 * @param dev_id 377 * The identifier of the device. 378 * 379 * @return 380 * - 0 on successfully closing device. 381 * - <0 on failure to close device. 382 */ 383 __rte_experimental 384 int 385 rte_ml_dev_close(int16_t dev_id); 386 387 /** Status of ML operation */ 388 enum rte_ml_op_status { 389 RTE_ML_OP_STATUS_SUCCESS = 0, 390 /**< Operation completed successfully */ 391 RTE_ML_OP_STATUS_NOT_PROCESSED, 392 /**< Operation has not yet been processed by the device. */ 393 RTE_ML_OP_STATUS_ERROR, 394 /**< Operation completed with error. 395 * Application can invoke rte_ml_op_error_get() to get PMD specific 396 * error code if needed. 397 */ 398 }; 399 400 /** ML operation's input and output buffer representation as scatter gather list 401 */ 402 struct rte_ml_buff_seg { 403 rte_iova_t iova_addr; 404 /**< IOVA address of segment buffer. */ 405 void *addr; 406 /**< Virtual address of segment buffer. */ 407 uint32_t length; 408 /**< Segment length. */ 409 uint32_t reserved; 410 /**< Reserved for future use. */ 411 struct rte_ml_buff_seg *next; 412 /**< Points to next segment. Value NULL represents the last segment. */ 413 }; 414 415 /** 416 * ML Operation. 417 * 418 * This structure contains data related to performing an ML operation on the buffers using 419 * the model specified through model_id. 420 */ 421 struct rte_ml_op { 422 uint16_t model_id; 423 /**< Model ID to be used for the operation. */ 424 uint16_t nb_batches; 425 /**< Number of batches. Minimum value must be one. 426 * Input buffer must hold inference data for each batch as contiguous. 427 */ 428 uint32_t reserved; 429 /**< Reserved for future use. */ 430 struct rte_mempool *mempool; 431 /**< Pool from which operation is allocated. */ 432 struct rte_ml_buff_seg input; 433 /**< Input buffer to hold the inference data. */ 434 struct rte_ml_buff_seg output; 435 /**< Output buffer to hold the inference output by the driver. */ 436 RTE_STD_C11 437 union { 438 uint64_t user_u64; 439 /**< User data as uint64_t.*/ 440 void *user_ptr; 441 /**< User data as void*.*/ 442 }; 443 enum rte_ml_op_status status; 444 /**< Operation status. */ 445 uint64_t impl_opaque; 446 /**< Implementation specific opaque value. 447 * An implementation may use this field to hold 448 * implementation specific value to share between 449 * dequeue and enqueue operation. 450 * The application should not modify this field. 451 */ 452 } __rte_cache_aligned; 453 454 /* Enqueue/Dequeue operations */ 455 456 /** 457 * Enqueue a burst of ML inferences for processing on an ML device. 458 * 459 * The rte_ml_enqueue_burst() function is invoked to place ML inference 460 * operations on the queue *qp_id* of the device designated by its *dev_id*. 461 * 462 * The *nb_ops* parameter is the number of inferences to process which are 463 * supplied in the *ops* array of *rte_ml_op* structures. 464 * 465 * The rte_ml_enqueue_burst() function returns the number of inferences it 466 * actually enqueued for processing. A return value equal to *nb_ops* means that 467 * all packets have been enqueued. 468 * 469 * @param dev_id 470 * The identifier of the device. 471 * @param qp_id 472 * The index of the queue pair which inferences are to be enqueued for processing. 473 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 474 * *rte_ml_dev_configure*. 475 * @param ops 476 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 477 * ML inferences to be processed. 478 * @param nb_ops 479 * The number of operations to process. 480 * 481 * @return 482 * The number of inference operations actually enqueued to the ML device. 483 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 484 * is full or if invalid parameters are specified in a *rte_ml_op*. 485 */ 486 __rte_experimental 487 uint16_t 488 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 489 490 /** 491 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 492 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 493 * in the *ops* array. 494 * 495 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 496 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 497 * 498 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 499 * and this is likely to signify that other processed operations remain in the devices output queue. 500 * Application implementing a "retrieve as many processed operations as possible" policy can check 501 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 502 * *nb_ops* is returned. 503 * 504 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 505 * the corresponding overhead. 506 * 507 * @param dev_id 508 * The identifier of the device. 509 * @param qp_id 510 * The index of the queue pair from which to retrieve processed packets. 511 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 512 * rte_ml_dev_configure(). 513 * @param ops 514 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 515 * store *nb_ops* pointers in it. 516 * @param nb_ops 517 * The maximum number of inferences to dequeue. 518 * 519 * @return 520 * The number of operations actually dequeued, which is the number of pointers 521 * to *rte_ml_op* structures effectively supplied to the *ops* array. 522 */ 523 __rte_experimental 524 uint16_t 525 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 526 527 /** 528 * Verbose error structure definition. 529 */ 530 struct rte_ml_op_error { 531 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 532 uint64_t errcode; /**< Vendor specific error code. */ 533 }; 534 535 /** 536 * Get PMD specific error information for an ML op. 537 * 538 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 539 * This API allows to get PMD specific error details. 540 * 541 * @param[in] dev_id 542 * Device identifier 543 * @param[in] op 544 * Handle of ML operation 545 * @param[in] error 546 * Address of structure rte_ml_op_error to be filled 547 * 548 * @return 549 * - Returns 0 on success 550 * - Returns negative value on failure 551 */ 552 __rte_experimental 553 int 554 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 555 556 /* Statistics operations */ 557 558 /** Device statistics. */ 559 struct rte_ml_dev_stats { 560 uint64_t enqueued_count; 561 /**< Count of all operations enqueued */ 562 uint64_t dequeued_count; 563 /**< Count of all operations dequeued */ 564 uint64_t enqueue_err_count; 565 /**< Total error count on operations enqueued */ 566 uint64_t dequeue_err_count; 567 /**< Total error count on operations dequeued */ 568 }; 569 570 /** 571 * Retrieve the general I/O statistics of a device. 572 * 573 * @param dev_id 574 * The identifier of the device. 575 * @param stats 576 * Pointer to structure to where statistics will be copied. 577 * On error, this location may or may not have been modified. 578 * @return 579 * - 0 on success 580 * - -EINVAL: If invalid parameter pointer is provided. 581 */ 582 __rte_experimental 583 int 584 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 585 586 /** 587 * Reset the statistics of a device. 588 * 589 * @param dev_id 590 * The identifier of the device. 591 */ 592 __rte_experimental 593 void 594 rte_ml_dev_stats_reset(int16_t dev_id); 595 596 /** 597 * Selects the component of the mldev to retrieve statistics from. 598 */ 599 enum rte_ml_dev_xstats_mode { 600 RTE_ML_DEV_XSTATS_DEVICE, 601 /**< Device xstats */ 602 RTE_ML_DEV_XSTATS_MODEL, 603 /**< Model xstats */ 604 }; 605 606 /** 607 * A name-key lookup element for extended statistics. 608 * 609 * This structure is used to map between names and ID numbers for extended ML device statistics. 610 */ 611 struct rte_ml_dev_xstats_map { 612 uint16_t id; 613 /**< xstat identifier */ 614 char name[RTE_ML_STR_MAX]; 615 /**< xstat name */ 616 }; 617 618 /** 619 * Retrieve names of extended statistics of an ML device. 620 * 621 * @param dev_id 622 * The identifier of the device. 623 * @param mode 624 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 625 * @param model_id 626 * Used to specify the model number in model mode, and is ignored in device mode. 627 * @param[out] xstats_map 628 * Block of memory to insert names and ids into. Must be at least size in capacity. If set to 629 * NULL, function returns required capacity. The id values returned can be passed to 630 * *rte_ml_dev_xstats_get* to select statistics. 631 * @param size 632 * Capacity of xstats_names (number of xstats_map). 633 * @return 634 * - Positive value lower or equal to size: success. The return value is the number of entries 635 * filled in the stats table. 636 * - Positive value higher than size: error, the given statistics table is too small. The return 637 * value corresponds to the size that should be given to succeed. The entries in the table are not 638 * valid and shall not be used by the caller. 639 * - Negative value on error: 640 * -ENODEV for invalid *dev_id*. 641 * -EINVAL for invalid mode, model parameters. 642 * -ENOTSUP if the device doesn't support this function. 643 */ 644 __rte_experimental 645 int 646 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 647 struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); 648 649 /** 650 * Retrieve the value of a single stat by requesting it by name. 651 * 652 * @param dev_id 653 * The identifier of the device. 654 * @param name 655 * Name of stat name to retrieve. 656 * @param[out] stat_id 657 * If non-NULL, the numerical id of the stat will be returned, so that further requests for the 658 * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a 659 * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1. 660 * @param[out] value 661 * Value of the stat to be returned. 662 * @return 663 * - Zero: No error. 664 * - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported. 665 */ 666 __rte_experimental 667 int 668 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 669 670 /** 671 * Retrieve extended statistics of an ML device. 672 * 673 * @param dev_id 674 * The identifier of the device. 675 * @param mode 676 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 677 * @param model_id 678 * Used to specify the model id in model mode, and is ignored in device mode. 679 * @param stat_ids 680 * ID numbers of the stats to get. The ids can be got from the stat position in the stat list from 681 * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 682 * @param[out] values 683 * Values for each stats request by ID. 684 * @param nb_ids 685 * Number of stats requested. 686 * @return 687 * - Positive value: number of stat entries filled into the values array 688 * - Negative value on error: 689 * -ENODEV for invalid *dev_id*. 690 * -EINVAL for invalid mode, model id or stat id parameters. 691 * -ENOTSUP if the device doesn't support this function. 692 */ 693 __rte_experimental 694 int 695 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 696 const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids); 697 698 /** 699 * Reset the values of the xstats of the selected component in the device. 700 * 701 * @param dev_id 702 * The identifier of the device. 703 * @param mode 704 * Mode of the statistics to reset. Choose from device or model. 705 * @param model_id 706 * Model stats to reset. 0 and positive values select models, while -1 indicates all models. 707 * @param stat_ids 708 * Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be 709 * reset. If non-NULL, must point to array of at least *nb_ids* size. 710 * @param nb_ids 711 * The number of ids available from the *ids* array. Ignored when ids is NULL. 712 * @return 713 * - Zero: successfully reset the statistics. 714 * - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. 715 */ 716 __rte_experimental 717 int 718 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 719 const uint16_t stat_ids[], uint16_t nb_ids); 720 721 /** 722 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 723 * 724 * @param dev_id 725 * The identifier of the device. 726 * @param fd 727 * A pointer to a file for output. 728 * @return 729 * - 0: on success. 730 * - <0: on failure. 731 */ 732 __rte_experimental 733 int 734 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 735 736 /** 737 * Trigger the ML device self test. 738 * 739 * @param dev_id 740 * The identifier of the device. 741 * @return 742 * - 0: Selftest successful. 743 * - -ENOTSUP: if the device doesn't support selftest. 744 * - other values < 0 on failure. 745 */ 746 __rte_experimental 747 int 748 rte_ml_dev_selftest(int16_t dev_id); 749 750 /* Model operations */ 751 752 /** ML model load parameters 753 * 754 * Parameters required to load an ML model. 755 */ 756 struct rte_ml_model_params { 757 void *addr; 758 /**< Address of model buffer */ 759 size_t size; 760 /**< Size of model buffer */ 761 }; 762 763 /** 764 * Load an ML model to the device. 765 * 766 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 767 * 768 * @param[in] dev_id 769 * The identifier of the device. 770 * @param[in] params 771 * Parameters for the model to be loaded. 772 * @param[out] model_id 773 * Identifier of the model loaded. 774 * 775 * @return 776 * - 0: Success, Model loaded. 777 * - < 0: Failure, Error code of the model load driver function. 778 */ 779 __rte_experimental 780 int 781 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 782 783 /** 784 * Unload an ML model from the device. 785 * 786 * @param[in] dev_id 787 * The identifier of the device. 788 * @param[in] model_id 789 * Identifier of the model to be unloaded. 790 * 791 * @return 792 * - 0: Success, Model unloaded. 793 * - < 0: Failure, Error code of the model unload driver function. 794 */ 795 __rte_experimental 796 int 797 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 798 799 /** 800 * Start an ML model for the given device ID. 801 * 802 * Start an ML model to accept inference requests. 803 * 804 * @param[in] dev_id 805 * The identifier of the device. 806 * @param[in] model_id 807 * Identifier of the model to be started. 808 * 809 * @return 810 * - 0: Success, Model loaded. 811 * - < 0: Failure, Error code of the model start driver function. 812 */ 813 __rte_experimental 814 int 815 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 816 817 /** 818 * Stop an ML model for the given device ID. 819 * 820 * Model stop would disable the ML model to be used for inference jobs. 821 * All inference jobs must have been completed before model stop is attempted. 822 823 * @param[in] dev_id 824 * The identifier of the device. 825 * @param[in] model_id 826 * Identifier of the model to be stopped. 827 * 828 * @return 829 * - 0: Success, Model unloaded. 830 * - < 0: Failure, Error code of the model stop driver function. 831 */ 832 __rte_experimental 833 int 834 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 835 836 /** 837 * Input and output data types. ML models can operate on reduced precision 838 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 839 * This enum is used to represent the lower precision integer and floating point types used 840 * by ML models. 841 */ 842 enum rte_ml_io_type { 843 RTE_ML_IO_TYPE_UNKNOWN = 0, 844 /**< Invalid or unknown type */ 845 RTE_ML_IO_TYPE_INT8, 846 /**< 8-bit integer */ 847 RTE_ML_IO_TYPE_UINT8, 848 /**< 8-bit unsigned integer */ 849 RTE_ML_IO_TYPE_INT16, 850 /**< 16-bit integer */ 851 RTE_ML_IO_TYPE_UINT16, 852 /**< 16-bit unsigned integer */ 853 RTE_ML_IO_TYPE_INT32, 854 /**< 32-bit integer */ 855 RTE_ML_IO_TYPE_UINT32, 856 /**< 32-bit unsigned integer */ 857 RTE_ML_IO_TYPE_FP8, 858 /**< 8-bit floating point number */ 859 RTE_ML_IO_TYPE_FP16, 860 /**< IEEE 754 16-bit floating point number */ 861 RTE_ML_IO_TYPE_FP32, 862 /**< IEEE 754 32-bit floating point number */ 863 RTE_ML_IO_TYPE_BFLOAT16 864 /**< 16-bit brain floating point number. */ 865 }; 866 867 /** 868 * Input and output format. This is used to represent the encoding type of multi-dimensional 869 * used by ML models. 870 */ 871 enum rte_ml_io_format { 872 RTE_ML_IO_FORMAT_NCHW = 1, 873 /**< Batch size (N) x channels (C) x height (H) x width (W) */ 874 RTE_ML_IO_FORMAT_NHWC, 875 /**< Batch size (N) x height (H) x width (W) x channels (C) */ 876 RTE_ML_IO_FORMAT_CHWN, 877 /**< Channels (C) x height (H) x width (W) x batch size (N) */ 878 RTE_ML_IO_FORMAT_3D, 879 /**< Format to represent a 3 dimensional data */ 880 RTE_ML_IO_FORMAT_2D, 881 /**< Format to represent matrix data */ 882 RTE_ML_IO_FORMAT_1D, 883 /**< Format to represent vector data */ 884 RTE_ML_IO_FORMAT_SCALAR, 885 /**< Format to represent scalar data */ 886 }; 887 888 /** 889 * Input and output shape. This structure represents the encoding format and dimensions 890 * of the tensor or vector. 891 * 892 * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used 893 * for the data would depend on the format. Unused dimensions to be set to 1. 894 */ 895 struct rte_ml_io_shape { 896 enum rte_ml_io_format format; 897 /**< Format of the data */ 898 uint32_t w; 899 /**< First dimension */ 900 uint32_t x; 901 /**< Second dimension */ 902 uint32_t y; 903 /**< Third dimension */ 904 uint32_t z; 905 /**< Fourth dimension */ 906 }; 907 908 /** Input and output data information structure 909 * 910 * Specifies the type and shape of input and output data. 911 */ 912 struct rte_ml_io_info { 913 char name[RTE_ML_STR_MAX]; 914 /**< Name of data */ 915 struct rte_ml_io_shape shape; 916 /**< Shape of data */ 917 enum rte_ml_io_type qtype; 918 /**< Type of quantized data */ 919 enum rte_ml_io_type dtype; 920 /**< Type of de-quantized data */ 921 }; 922 923 /** Model information structure */ 924 struct rte_ml_model_info { 925 char name[RTE_ML_STR_MAX]; 926 /**< Model name. */ 927 char version[RTE_ML_STR_MAX]; 928 /**< Model version */ 929 uint16_t model_id; 930 /**< Model ID */ 931 uint16_t device_id; 932 /**< Device ID */ 933 uint16_t batch_size; 934 /**< Maximum number of batches that the model can process simultaneously */ 935 uint32_t nb_inputs; 936 /**< Number of inputs */ 937 const struct rte_ml_io_info *input_info; 938 /**< Input info array. Array size is equal to nb_inputs */ 939 uint32_t nb_outputs; 940 /**< Number of outputs */ 941 const struct rte_ml_io_info *output_info; 942 /**< Output info array. Array size is equal to nb_output */ 943 uint64_t wb_size; 944 /**< Size of model weights and bias */ 945 }; 946 947 /** 948 * Get ML model information. 949 * 950 * @param[in] dev_id 951 * The identifier of the device. 952 * @param[in] model_id 953 * Identifier for the model created 954 * @param[out] model_info 955 * Pointer to a model info structure 956 * 957 * @return 958 * - Returns 0 on success 959 * - Returns negative value on failure 960 */ 961 __rte_experimental 962 int 963 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 964 965 /** 966 * Update the model parameters without unloading model. 967 * 968 * Update model parameters such as weights and bias without unloading the model. 969 * rte_ml_model_stop() must be called before invoking this API. 970 * 971 * @param[in] dev_id 972 * The identifier of the device. 973 * @param[in] model_id 974 * Identifier for the model created 975 * @param[in] buffer 976 * Pointer to the model weights and bias buffer. 977 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 978 * 979 * @return 980 * - Returns 0 on success 981 * - Returns negative value on failure 982 */ 983 __rte_experimental 984 int 985 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 986 987 /* IO operations */ 988 989 /** 990 * Get size of quantized and dequantized input buffers. 991 * 992 * Calculate the size of buffers required for quantized and dequantized input data. 993 * This API would return the buffer sizes for the number of batches provided and would 994 * consider the alignment requirements as per the PMD. Input sizes computed by this API can 995 * be used by the application to allocate buffers. 996 * 997 * @param[in] dev_id 998 * The identifier of the device. 999 * @param[in] model_id 1000 * Identifier for the model created 1001 * @param[in] nb_batches 1002 * Number of batches of input to be processed in a single inference job 1003 * @param[out] input_qsize 1004 * Quantized input size pointer. 1005 * NULL value is allowed, in which case input_qsize is not calculated by the driver. 1006 * @param[out] input_dsize 1007 * Dequantized input size pointer. 1008 * NULL value is allowed, in which case input_dsize is not calculated by the driver. 1009 * 1010 * @return 1011 * - Returns 0 on success 1012 * - Returns negative value on failure 1013 */ 1014 __rte_experimental 1015 int 1016 rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 1017 uint64_t *input_qsize, uint64_t *input_dsize); 1018 1019 /** 1020 * Get size of quantized and dequantized output buffers. 1021 * 1022 * Calculate the size of buffers required for quantized and dequantized output data. 1023 * This API would return the buffer sizes for the number of batches provided and would consider 1024 * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the 1025 * application to allocate buffers. 1026 * 1027 * @param[in] dev_id 1028 * The identifier of the device. 1029 * @param[in] model_id 1030 * Identifier for the model created 1031 * @param[in] nb_batches 1032 * Number of batches of input to be processed in a single inference job 1033 * @param[out] output_qsize 1034 * Quantized output size pointer. 1035 * NULL value is allowed, in which case output_qsize is not calculated by the driver. 1036 * @param[out] output_dsize 1037 * Dequantized output size pointer. 1038 * NULL value is allowed, in which case output_dsize is not calculated by the driver. 1039 * 1040 * @return 1041 * - Returns 0 on success 1042 * - Returns negative value on failure 1043 */ 1044 __rte_experimental 1045 int 1046 rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 1047 uint64_t *output_qsize, uint64_t *output_dsize); 1048 1049 /** 1050 * Quantize input data. 1051 * 1052 * Quantization converts data from a higher precision types to a lower precision types to improve 1053 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1054 * Types of dequantized data and quantized data are specified by the model. 1055 * 1056 * @param[in] dev_id 1057 * The identifier of the device. 1058 * @param[in] model_id 1059 * Identifier for the model 1060 * @param[in] nb_batches 1061 * Number of batches in the dequantized input buffer 1062 * @param[in] dbuffer 1063 * Address of dequantized input data 1064 * @param[in] qbuffer 1065 * Address of quantized input data 1066 * 1067 * @return 1068 * - Returns 0 on success 1069 * - Returns negative value on failure 1070 */ 1071 __rte_experimental 1072 int 1073 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer, 1074 void *qbuffer); 1075 1076 /** 1077 * Dequantize output data. 1078 * 1079 * Dequantization converts data from a lower precision type to a higher precision type. 1080 * Types of quantized data and dequantized are specified by the model. 1081 * 1082 * @param[in] dev_id 1083 * The identifier of the device. 1084 * @param[in] model_id 1085 * Identifier for the model 1086 * @param[in] nb_batches 1087 * Number of batches in the dequantized output buffer 1088 * @param[in] qbuffer 1089 * Address of quantized output data 1090 * @param[in] dbuffer 1091 * Address of dequantized output data 1092 * 1093 * @return 1094 * - Returns 0 on success 1095 * - Returns negative value on failure 1096 */ 1097 __rte_experimental 1098 int 1099 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer, 1100 void *dbuffer); 1101 1102 /* ML op pool operations */ 1103 1104 /** 1105 * Create an ML operation pool 1106 * 1107 * @param name 1108 * ML operations pool name 1109 * @param nb_elts 1110 * Number of elements in pool 1111 * @param cache_size 1112 * Number of elements to cache on lcore, see 1113 * *rte_mempool_create* for further details about cache size 1114 * @param user_size 1115 * Size of private data to allocate for user with each operation 1116 * @param socket_id 1117 * Socket to identifier allocate memory on 1118 * @return 1119 * - On success pointer to mempool 1120 * - On failure NULL 1121 */ 1122 __rte_experimental 1123 struct rte_mempool * 1124 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1125 uint16_t user_size, int socket_id); 1126 1127 /** 1128 * Free an ML operation pool 1129 * 1130 * @param mempool 1131 * A pointer to the mempool structure. 1132 * If NULL then, the function does nothing. 1133 */ 1134 __rte_experimental 1135 void 1136 rte_ml_op_pool_free(struct rte_mempool *mempool); 1137 1138 #ifdef __cplusplus 1139 } 1140 #endif 1141 1142 #endif /* RTE_MLDEV_H */ 1143