1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 148 #define RTE_MLDEV_LOG(level, fmt, args...) \ 149 rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args) 150 151 #define RTE_ML_STR_MAX 128 152 /**< Maximum length of name string */ 153 154 #define RTE_MLDEV_DEFAULT_MAX 32 155 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 156 157 /* Device operations */ 158 159 /** 160 * Initialize the device array before probing devices. If not called, the first device probed would 161 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 162 * 163 * @param dev_max 164 * Maximum number of devices. 165 * 166 * @return 167 * 0 on success, -rte_errno otherwise: 168 * - ENOMEM if out of memory 169 * - EINVAL if 0 size 170 * - EBUSY if already initialized 171 */ 172 __rte_experimental 173 int 174 rte_ml_dev_init(size_t dev_max); 175 176 /** 177 * Get the total number of ML devices that have been successfully initialised. 178 * 179 * @return 180 * - The total number of usable ML devices. 181 */ 182 __rte_experimental 183 uint16_t 184 rte_ml_dev_count(void); 185 186 /** 187 * Check if the device is in ready state. 188 * 189 * @param dev_id 190 * The identifier of the device. 191 * 192 * @return 193 * - 0 if device state is not in ready state. 194 * - 1 if device state is ready state. 195 */ 196 __rte_experimental 197 int 198 rte_ml_dev_is_valid_dev(int16_t dev_id); 199 200 /** 201 * Return the NUMA socket to which a device is connected. 202 * 203 * @param dev_id 204 * The identifier of the device. 205 * 206 * @return 207 * - The NUMA socket id to which the device is connected 208 * - 0 If the socket could not be determined. 209 * - -EINVAL: if the dev_id value is not valid. 210 */ 211 __rte_experimental 212 int 213 rte_ml_dev_socket_id(int16_t dev_id); 214 215 /** ML device information */ 216 struct rte_ml_dev_info { 217 const char *driver_name; 218 /**< Driver name */ 219 uint16_t max_models; 220 /**< Maximum number of models supported by the device. 221 * @see struct rte_ml_dev_config::nb_models 222 */ 223 uint16_t max_queue_pairs; 224 /**< Maximum number of queues pairs supported by the device. 225 * @see struct rte_ml_dev_config::nb_queue_pairs 226 */ 227 uint16_t max_desc; 228 /**< Maximum allowed number of descriptors for queue pair by the device. 229 * @see struct rte_ml_dev_qp_conf::nb_desc 230 */ 231 uint16_t max_segments; 232 /**< Maximum number of scatter-gather entries supported by the device. 233 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 234 */ 235 uint16_t min_align_size; 236 /**< Minimum alignment size of IO buffers used by the device. */ 237 }; 238 239 /** 240 * Retrieve the information of the device. 241 * 242 * @param dev_id 243 * The identifier of the device. 244 * @param dev_info 245 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 246 * 247 * @return 248 * - 0: Success, driver updates the information of the ML device 249 * - < 0: Error code returned by the driver info get function. 250 */ 251 __rte_experimental 252 int 253 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 254 255 /** ML device configuration structure */ 256 struct rte_ml_dev_config { 257 int socket_id; 258 /**< Socket to allocate resources on. */ 259 uint16_t nb_models; 260 /**< Number of models to be loaded on the device. 261 * This value cannot exceed the max_models which is previously provided in 262 * struct rte_ml_dev_info::max_models 263 */ 264 uint16_t nb_queue_pairs; 265 /**< Number of queue pairs to configure on this device. 266 * This value cannot exceed the max_models which is previously provided in 267 * struct rte_ml_dev_info::max_queue_pairs 268 */ 269 }; 270 271 /** 272 * Configure an ML device. 273 * 274 * This function must be invoked first before any other function in the API. 275 * 276 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 277 * rte_ml_dev_close() is called. 278 * 279 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 280 * this ML device. 281 * 282 * @param dev_id 283 * The identifier of the device to configure. 284 * @param config 285 * The ML device configuration structure. 286 * 287 * @return 288 * - 0: Success, device configured. 289 * - < 0: Error code returned by the driver configuration function. 290 */ 291 __rte_experimental 292 int 293 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 294 295 /* Forward declaration */ 296 struct rte_ml_op; 297 298 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 299 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 300 301 /** ML device queue pair configuration structure. */ 302 struct rte_ml_dev_qp_conf { 303 uint32_t nb_desc; 304 /**< Number of descriptors per queue pair. 305 * This value cannot exceed the max_desc which previously provided in 306 * struct rte_ml_dev_info:max_desc 307 */ 308 rte_ml_dev_stop_flush_t cb; 309 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 310 * Value NULL is allowed, in which case callback will not be invoked. 311 * This function can be used to properly dispose of outstanding ML ops from all 312 * queue pairs, for example ops containing memory pointers. 313 * @see rte_ml_dev_stop() 314 */ 315 }; 316 317 /** 318 * Set up a queue pair for a device. This should only be called when the device is stopped. 319 * 320 * @param dev_id 321 * The identifier of the device. 322 * @param queue_pair_id 323 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 324 * previously supplied to rte_ml_dev_configure(). 325 * @param qp_conf 326 * The pointer to the configuration data to be used for the queue pair. 327 * @param socket_id 328 * The *socket_id* argument is the socket identifier in case of NUMA. 329 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 330 * for the queue pair. 331 * 332 * @return 333 * - 0: Success, queue pair correctly set up. 334 * - < 0: Queue pair configuration failed. 335 */ 336 __rte_experimental 337 int 338 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 339 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 340 341 /** 342 * Start an ML device. 343 * 344 * The device start step consists of setting the configured features and enabling the ML device 345 * to accept inference jobs. 346 * 347 * @param dev_id 348 * The identifier of the device. 349 * 350 * @return 351 * - 0: Success, device started. 352 * - <0: Error code of the driver device start function. 353 */ 354 __rte_experimental 355 int 356 rte_ml_dev_start(int16_t dev_id); 357 358 /** 359 * Stop an ML device. A stopped device cannot accept inference jobs. 360 * The device can be restarted with a call to rte_ml_dev_start(). 361 * 362 * @param dev_id 363 * The identifier of the device. 364 * 365 * @return 366 * - 0: Success, device stopped. 367 * - <0: Error code of the driver device stop function. 368 */ 369 __rte_experimental 370 int 371 rte_ml_dev_stop(int16_t dev_id); 372 373 /** 374 * Close an ML device. The device cannot be restarted! 375 * 376 * @param dev_id 377 * The identifier of the device. 378 * 379 * @return 380 * - 0 on successfully closing device. 381 * - <0 on failure to close device. 382 */ 383 __rte_experimental 384 int 385 rte_ml_dev_close(int16_t dev_id); 386 387 /** Status of ML operation */ 388 enum rte_ml_op_status { 389 RTE_ML_OP_STATUS_SUCCESS = 0, 390 /**< Operation completed successfully */ 391 RTE_ML_OP_STATUS_NOT_PROCESSED, 392 /**< Operation has not yet been processed by the device. */ 393 RTE_ML_OP_STATUS_ERROR, 394 /**< Operation completed with error. 395 * Application can invoke rte_ml_op_error_get() to get PMD specific 396 * error code if needed. 397 */ 398 }; 399 400 /** ML operation's input and output buffer representation as scatter gather list 401 */ 402 struct rte_ml_buff_seg { 403 rte_iova_t iova_addr; 404 /**< IOVA address of segment buffer. */ 405 void *addr; 406 /**< Virtual address of segment buffer. */ 407 uint32_t length; 408 /**< Segment length. */ 409 uint32_t reserved; 410 /**< Reserved for future use. */ 411 struct rte_ml_buff_seg *next; 412 /**< Points to next segment. Value NULL represents the last segment. */ 413 }; 414 415 /** 416 * ML Operation. 417 * 418 * This structure contains data related to performing an ML operation on the buffers using 419 * the model specified through model_id. 420 */ 421 struct rte_ml_op { 422 uint16_t model_id; 423 /**< Model ID to be used for the operation. */ 424 uint16_t nb_batches; 425 /**< Number of batches. Minimum value must be one. 426 * Input buffer must hold inference data for each batch as contiguous. 427 */ 428 uint32_t reserved; 429 /**< Reserved for future use. */ 430 struct rte_mempool *mempool; 431 /**< Pool from which operation is allocated. */ 432 struct rte_ml_buff_seg input; 433 /**< Input buffer to hold the inference data. */ 434 struct rte_ml_buff_seg output; 435 /**< Output buffer to hold the inference output by the driver. */ 436 union { 437 uint64_t user_u64; 438 /**< User data as uint64_t.*/ 439 void *user_ptr; 440 /**< User data as void*.*/ 441 }; 442 enum rte_ml_op_status status; 443 /**< Operation status. */ 444 uint64_t impl_opaque; 445 /**< Implementation specific opaque value. 446 * An implementation may use this field to hold 447 * implementation specific value to share between 448 * dequeue and enqueue operation. 449 * The application should not modify this field. 450 */ 451 } __rte_cache_aligned; 452 453 /* Enqueue/Dequeue operations */ 454 455 /** 456 * Enqueue a burst of ML inferences for processing on an ML device. 457 * 458 * The rte_ml_enqueue_burst() function is invoked to place ML inference 459 * operations on the queue *qp_id* of the device designated by its *dev_id*. 460 * 461 * The *nb_ops* parameter is the number of inferences to process which are 462 * supplied in the *ops* array of *rte_ml_op* structures. 463 * 464 * The rte_ml_enqueue_burst() function returns the number of inferences it 465 * actually enqueued for processing. A return value equal to *nb_ops* means that 466 * all packets have been enqueued. 467 * 468 * @param dev_id 469 * The identifier of the device. 470 * @param qp_id 471 * The index of the queue pair which inferences are to be enqueued for processing. 472 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 473 * *rte_ml_dev_configure*. 474 * @param ops 475 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 476 * ML inferences to be processed. 477 * @param nb_ops 478 * The number of operations to process. 479 * 480 * @return 481 * The number of inference operations actually enqueued to the ML device. 482 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 483 * is full or if invalid parameters are specified in a *rte_ml_op*. 484 */ 485 __rte_experimental 486 uint16_t 487 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 488 489 /** 490 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 491 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 492 * in the *ops* array. 493 * 494 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 495 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 496 * 497 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 498 * and this is likely to signify that other processed operations remain in the devices output queue. 499 * Application implementing a "retrieve as many processed operations as possible" policy can check 500 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 501 * *nb_ops* is returned. 502 * 503 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 504 * the corresponding overhead. 505 * 506 * @param dev_id 507 * The identifier of the device. 508 * @param qp_id 509 * The index of the queue pair from which to retrieve processed packets. 510 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 511 * rte_ml_dev_configure(). 512 * @param ops 513 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 514 * store *nb_ops* pointers in it. 515 * @param nb_ops 516 * The maximum number of inferences to dequeue. 517 * 518 * @return 519 * The number of operations actually dequeued, which is the number of pointers 520 * to *rte_ml_op* structures effectively supplied to the *ops* array. 521 */ 522 __rte_experimental 523 uint16_t 524 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 525 526 /** 527 * Verbose error structure definition. 528 */ 529 struct rte_ml_op_error { 530 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 531 uint64_t errcode; /**< Vendor specific error code. */ 532 }; 533 534 /** 535 * Get PMD specific error information for an ML op. 536 * 537 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 538 * This API allows to get PMD specific error details. 539 * 540 * @param[in] dev_id 541 * Device identifier 542 * @param[in] op 543 * Handle of ML operation 544 * @param[in] error 545 * Address of structure rte_ml_op_error to be filled 546 * 547 * @return 548 * - Returns 0 on success 549 * - Returns negative value on failure 550 */ 551 __rte_experimental 552 int 553 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 554 555 /* Statistics operations */ 556 557 /** Device statistics. */ 558 struct rte_ml_dev_stats { 559 uint64_t enqueued_count; 560 /**< Count of all operations enqueued */ 561 uint64_t dequeued_count; 562 /**< Count of all operations dequeued */ 563 uint64_t enqueue_err_count; 564 /**< Total error count on operations enqueued */ 565 uint64_t dequeue_err_count; 566 /**< Total error count on operations dequeued */ 567 }; 568 569 /** 570 * Retrieve the general I/O statistics of a device. 571 * 572 * @param dev_id 573 * The identifier of the device. 574 * @param stats 575 * Pointer to structure to where statistics will be copied. 576 * On error, this location may or may not have been modified. 577 * @return 578 * - 0 on success 579 * - -EINVAL: If invalid parameter pointer is provided. 580 */ 581 __rte_experimental 582 int 583 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 584 585 /** 586 * Reset the statistics of a device. 587 * 588 * @param dev_id 589 * The identifier of the device. 590 */ 591 __rte_experimental 592 void 593 rte_ml_dev_stats_reset(int16_t dev_id); 594 595 /** 596 * Selects the component of the mldev to retrieve statistics from. 597 */ 598 enum rte_ml_dev_xstats_mode { 599 RTE_ML_DEV_XSTATS_DEVICE, 600 /**< Device xstats */ 601 RTE_ML_DEV_XSTATS_MODEL, 602 /**< Model xstats */ 603 }; 604 605 /** 606 * A name-key lookup element for extended statistics. 607 * 608 * This structure is used to map between names and ID numbers for extended ML device statistics. 609 */ 610 struct rte_ml_dev_xstats_map { 611 uint16_t id; 612 /**< xstat identifier */ 613 char name[RTE_ML_STR_MAX]; 614 /**< xstat name */ 615 }; 616 617 /** 618 * Retrieve names of extended statistics of an ML device. 619 * 620 * @param dev_id 621 * The identifier of the device. 622 * @param mode 623 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 624 * @param model_id 625 * Used to specify the model number in model mode, and is ignored in device mode. 626 * @param[out] xstats_map 627 * Block of memory to insert names and ids into. Must be at least size in capacity. If set to 628 * NULL, function returns required capacity. The id values returned can be passed to 629 * *rte_ml_dev_xstats_get* to select statistics. 630 * @param size 631 * Capacity of xstats_names (number of xstats_map). 632 * @return 633 * - Positive value lower or equal to size: success. The return value is the number of entries 634 * filled in the stats table. 635 * - Positive value higher than size: error, the given statistics table is too small. The return 636 * value corresponds to the size that should be given to succeed. The entries in the table are not 637 * valid and shall not be used by the caller. 638 * - Negative value on error: 639 * -ENODEV for invalid *dev_id*. 640 * -EINVAL for invalid mode, model parameters. 641 * -ENOTSUP if the device doesn't support this function. 642 */ 643 __rte_experimental 644 int 645 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 646 struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); 647 648 /** 649 * Retrieve the value of a single stat by requesting it by name. 650 * 651 * @param dev_id 652 * The identifier of the device. 653 * @param name 654 * Name of stat name to retrieve. 655 * @param[out] stat_id 656 * If non-NULL, the numerical id of the stat will be returned, so that further requests for the 657 * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a 658 * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1. 659 * @param[out] value 660 * Value of the stat to be returned. 661 * @return 662 * - Zero: No error. 663 * - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported. 664 */ 665 __rte_experimental 666 int 667 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 668 669 /** 670 * Retrieve extended statistics of an ML device. 671 * 672 * @param dev_id 673 * The identifier of the device. 674 * @param mode 675 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 676 * @param model_id 677 * Used to specify the model id in model mode, and is ignored in device mode. 678 * @param stat_ids 679 * ID numbers of the stats to get. The ids can be got from the stat position in the stat list from 680 * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 681 * @param[out] values 682 * Values for each stats request by ID. 683 * @param nb_ids 684 * Number of stats requested. 685 * @return 686 * - Positive value: number of stat entries filled into the values array 687 * - Negative value on error: 688 * -ENODEV for invalid *dev_id*. 689 * -EINVAL for invalid mode, model id or stat id parameters. 690 * -ENOTSUP if the device doesn't support this function. 691 */ 692 __rte_experimental 693 int 694 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 695 const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids); 696 697 /** 698 * Reset the values of the xstats of the selected component in the device. 699 * 700 * @param dev_id 701 * The identifier of the device. 702 * @param mode 703 * Mode of the statistics to reset. Choose from device or model. 704 * @param model_id 705 * Model stats to reset. 0 and positive values select models, while -1 indicates all models. 706 * @param stat_ids 707 * Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be 708 * reset. If non-NULL, must point to array of at least *nb_ids* size. 709 * @param nb_ids 710 * The number of ids available from the *ids* array. Ignored when ids is NULL. 711 * @return 712 * - Zero: successfully reset the statistics. 713 * - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. 714 */ 715 __rte_experimental 716 int 717 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 718 const uint16_t stat_ids[], uint16_t nb_ids); 719 720 /** 721 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 722 * 723 * @param dev_id 724 * The identifier of the device. 725 * @param fd 726 * A pointer to a file for output. 727 * @return 728 * - 0: on success. 729 * - <0: on failure. 730 */ 731 __rte_experimental 732 int 733 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 734 735 /** 736 * Trigger the ML device self test. 737 * 738 * @param dev_id 739 * The identifier of the device. 740 * @return 741 * - 0: Selftest successful. 742 * - -ENOTSUP: if the device doesn't support selftest. 743 * - other values < 0 on failure. 744 */ 745 __rte_experimental 746 int 747 rte_ml_dev_selftest(int16_t dev_id); 748 749 /* Model operations */ 750 751 /** ML model load parameters 752 * 753 * Parameters required to load an ML model. 754 */ 755 struct rte_ml_model_params { 756 void *addr; 757 /**< Address of model buffer */ 758 size_t size; 759 /**< Size of model buffer */ 760 }; 761 762 /** 763 * Load an ML model to the device. 764 * 765 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 766 * 767 * @param[in] dev_id 768 * The identifier of the device. 769 * @param[in] params 770 * Parameters for the model to be loaded. 771 * @param[out] model_id 772 * Identifier of the model loaded. 773 * 774 * @return 775 * - 0: Success, Model loaded. 776 * - < 0: Failure, Error code of the model load driver function. 777 */ 778 __rte_experimental 779 int 780 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 781 782 /** 783 * Unload an ML model from the device. 784 * 785 * @param[in] dev_id 786 * The identifier of the device. 787 * @param[in] model_id 788 * Identifier of the model to be unloaded. 789 * 790 * @return 791 * - 0: Success, Model unloaded. 792 * - < 0: Failure, Error code of the model unload driver function. 793 */ 794 __rte_experimental 795 int 796 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 797 798 /** 799 * Start an ML model for the given device ID. 800 * 801 * Start an ML model to accept inference requests. 802 * 803 * @param[in] dev_id 804 * The identifier of the device. 805 * @param[in] model_id 806 * Identifier of the model to be started. 807 * 808 * @return 809 * - 0: Success, Model loaded. 810 * - < 0: Failure, Error code of the model start driver function. 811 */ 812 __rte_experimental 813 int 814 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 815 816 /** 817 * Stop an ML model for the given device ID. 818 * 819 * Model stop would disable the ML model to be used for inference jobs. 820 * All inference jobs must have been completed before model stop is attempted. 821 822 * @param[in] dev_id 823 * The identifier of the device. 824 * @param[in] model_id 825 * Identifier of the model to be stopped. 826 * 827 * @return 828 * - 0: Success, Model unloaded. 829 * - < 0: Failure, Error code of the model stop driver function. 830 */ 831 __rte_experimental 832 int 833 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 834 835 /** 836 * Input and output data types. ML models can operate on reduced precision 837 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 838 * This enum is used to represent the lower precision integer and floating point types used 839 * by ML models. 840 */ 841 enum rte_ml_io_type { 842 RTE_ML_IO_TYPE_UNKNOWN = 0, 843 /**< Invalid or unknown type */ 844 RTE_ML_IO_TYPE_INT8, 845 /**< 8-bit integer */ 846 RTE_ML_IO_TYPE_UINT8, 847 /**< 8-bit unsigned integer */ 848 RTE_ML_IO_TYPE_INT16, 849 /**< 16-bit integer */ 850 RTE_ML_IO_TYPE_UINT16, 851 /**< 16-bit unsigned integer */ 852 RTE_ML_IO_TYPE_INT32, 853 /**< 32-bit integer */ 854 RTE_ML_IO_TYPE_UINT32, 855 /**< 32-bit unsigned integer */ 856 RTE_ML_IO_TYPE_FP8, 857 /**< 8-bit floating point number */ 858 RTE_ML_IO_TYPE_FP16, 859 /**< IEEE 754 16-bit floating point number */ 860 RTE_ML_IO_TYPE_FP32, 861 /**< IEEE 754 32-bit floating point number */ 862 RTE_ML_IO_TYPE_BFLOAT16 863 /**< 16-bit brain floating point number. */ 864 }; 865 866 /** 867 * Input and output format. This is used to represent the encoding type of multi-dimensional 868 * used by ML models. 869 */ 870 enum rte_ml_io_format { 871 RTE_ML_IO_FORMAT_NCHW = 1, 872 /**< Batch size (N) x channels (C) x height (H) x width (W) */ 873 RTE_ML_IO_FORMAT_NHWC, 874 /**< Batch size (N) x height (H) x width (W) x channels (C) */ 875 RTE_ML_IO_FORMAT_CHWN, 876 /**< Channels (C) x height (H) x width (W) x batch size (N) */ 877 RTE_ML_IO_FORMAT_3D, 878 /**< Format to represent a 3 dimensional data */ 879 RTE_ML_IO_FORMAT_2D, 880 /**< Format to represent matrix data */ 881 RTE_ML_IO_FORMAT_1D, 882 /**< Format to represent vector data */ 883 RTE_ML_IO_FORMAT_SCALAR, 884 /**< Format to represent scalar data */ 885 }; 886 887 /** 888 * Input and output shape. This structure represents the encoding format and dimensions 889 * of the tensor or vector. 890 * 891 * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used 892 * for the data would depend on the format. Unused dimensions to be set to 1. 893 */ 894 struct rte_ml_io_shape { 895 enum rte_ml_io_format format; 896 /**< Format of the data */ 897 uint32_t w; 898 /**< First dimension */ 899 uint32_t x; 900 /**< Second dimension */ 901 uint32_t y; 902 /**< Third dimension */ 903 uint32_t z; 904 /**< Fourth dimension */ 905 }; 906 907 /** Input and output data information structure 908 * 909 * Specifies the type and shape of input and output data. 910 */ 911 struct rte_ml_io_info { 912 char name[RTE_ML_STR_MAX]; 913 /**< Name of data */ 914 struct rte_ml_io_shape shape; 915 /**< Shape of data */ 916 enum rte_ml_io_type qtype; 917 /**< Type of quantized data */ 918 enum rte_ml_io_type dtype; 919 /**< Type of de-quantized data */ 920 }; 921 922 /** Model information structure */ 923 struct rte_ml_model_info { 924 char name[RTE_ML_STR_MAX]; 925 /**< Model name. */ 926 char version[RTE_ML_STR_MAX]; 927 /**< Model version */ 928 uint16_t model_id; 929 /**< Model ID */ 930 uint16_t device_id; 931 /**< Device ID */ 932 uint16_t batch_size; 933 /**< Maximum number of batches that the model can process simultaneously */ 934 uint32_t nb_inputs; 935 /**< Number of inputs */ 936 const struct rte_ml_io_info *input_info; 937 /**< Input info array. Array size is equal to nb_inputs */ 938 uint32_t nb_outputs; 939 /**< Number of outputs */ 940 const struct rte_ml_io_info *output_info; 941 /**< Output info array. Array size is equal to nb_output */ 942 uint64_t wb_size; 943 /**< Size of model weights and bias */ 944 }; 945 946 /** 947 * Get ML model information. 948 * 949 * @param[in] dev_id 950 * The identifier of the device. 951 * @param[in] model_id 952 * Identifier for the model created 953 * @param[out] model_info 954 * Pointer to a model info structure 955 * 956 * @return 957 * - Returns 0 on success 958 * - Returns negative value on failure 959 */ 960 __rte_experimental 961 int 962 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 963 964 /** 965 * Update the model parameters without unloading model. 966 * 967 * Update model parameters such as weights and bias without unloading the model. 968 * rte_ml_model_stop() must be called before invoking this API. 969 * 970 * @param[in] dev_id 971 * The identifier of the device. 972 * @param[in] model_id 973 * Identifier for the model created 974 * @param[in] buffer 975 * Pointer to the model weights and bias buffer. 976 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 977 * 978 * @return 979 * - Returns 0 on success 980 * - Returns negative value on failure 981 */ 982 __rte_experimental 983 int 984 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 985 986 /* IO operations */ 987 988 /** 989 * Get size of quantized and dequantized input buffers. 990 * 991 * Calculate the size of buffers required for quantized and dequantized input data. 992 * This API would return the buffer sizes for the number of batches provided and would 993 * consider the alignment requirements as per the PMD. Input sizes computed by this API can 994 * be used by the application to allocate buffers. 995 * 996 * @param[in] dev_id 997 * The identifier of the device. 998 * @param[in] model_id 999 * Identifier for the model created 1000 * @param[in] nb_batches 1001 * Number of batches of input to be processed in a single inference job 1002 * @param[out] input_qsize 1003 * Quantized input size pointer. 1004 * NULL value is allowed, in which case input_qsize is not calculated by the driver. 1005 * @param[out] input_dsize 1006 * Dequantized input size pointer. 1007 * NULL value is allowed, in which case input_dsize is not calculated by the driver. 1008 * 1009 * @return 1010 * - Returns 0 on success 1011 * - Returns negative value on failure 1012 */ 1013 __rte_experimental 1014 int 1015 rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 1016 uint64_t *input_qsize, uint64_t *input_dsize); 1017 1018 /** 1019 * Get size of quantized and dequantized output buffers. 1020 * 1021 * Calculate the size of buffers required for quantized and dequantized output data. 1022 * This API would return the buffer sizes for the number of batches provided and would consider 1023 * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the 1024 * application to allocate buffers. 1025 * 1026 * @param[in] dev_id 1027 * The identifier of the device. 1028 * @param[in] model_id 1029 * Identifier for the model created 1030 * @param[in] nb_batches 1031 * Number of batches of input to be processed in a single inference job 1032 * @param[out] output_qsize 1033 * Quantized output size pointer. 1034 * NULL value is allowed, in which case output_qsize is not calculated by the driver. 1035 * @param[out] output_dsize 1036 * Dequantized output size pointer. 1037 * NULL value is allowed, in which case output_dsize is not calculated by the driver. 1038 * 1039 * @return 1040 * - Returns 0 on success 1041 * - Returns negative value on failure 1042 */ 1043 __rte_experimental 1044 int 1045 rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 1046 uint64_t *output_qsize, uint64_t *output_dsize); 1047 1048 /** 1049 * Quantize input data. 1050 * 1051 * Quantization converts data from a higher precision types to a lower precision types to improve 1052 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1053 * Types of dequantized data and quantized data are specified by the model. 1054 * 1055 * @param[in] dev_id 1056 * The identifier of the device. 1057 * @param[in] model_id 1058 * Identifier for the model 1059 * @param[in] nb_batches 1060 * Number of batches in the dequantized input buffer 1061 * @param[in] dbuffer 1062 * Address of dequantized input data 1063 * @param[in] qbuffer 1064 * Address of quantized input data 1065 * 1066 * @return 1067 * - Returns 0 on success 1068 * - Returns negative value on failure 1069 */ 1070 __rte_experimental 1071 int 1072 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer, 1073 void *qbuffer); 1074 1075 /** 1076 * Dequantize output data. 1077 * 1078 * Dequantization converts data from a lower precision type to a higher precision type. 1079 * Types of quantized data and dequantized are specified by the model. 1080 * 1081 * @param[in] dev_id 1082 * The identifier of the device. 1083 * @param[in] model_id 1084 * Identifier for the model 1085 * @param[in] nb_batches 1086 * Number of batches in the dequantized output buffer 1087 * @param[in] qbuffer 1088 * Address of quantized output data 1089 * @param[in] dbuffer 1090 * Address of dequantized output data 1091 * 1092 * @return 1093 * - Returns 0 on success 1094 * - Returns negative value on failure 1095 */ 1096 __rte_experimental 1097 int 1098 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer, 1099 void *dbuffer); 1100 1101 /* ML op pool operations */ 1102 1103 /** 1104 * Create an ML operation pool 1105 * 1106 * @param name 1107 * ML operations pool name 1108 * @param nb_elts 1109 * Number of elements in pool 1110 * @param cache_size 1111 * Number of elements to cache on lcore, see 1112 * *rte_mempool_create* for further details about cache size 1113 * @param user_size 1114 * Size of private data to allocate for user with each operation 1115 * @param socket_id 1116 * Socket to identifier allocate memory on 1117 * @return 1118 * - On success pointer to mempool 1119 * - On failure NULL 1120 */ 1121 __rte_experimental 1122 struct rte_mempool * 1123 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1124 uint16_t user_size, int socket_id); 1125 1126 /** 1127 * Free an ML operation pool 1128 * 1129 * @param mempool 1130 * A pointer to the mempool structure. 1131 * If NULL then, the function does nothing. 1132 */ 1133 __rte_experimental 1134 void 1135 rte_ml_op_pool_free(struct rte_mempool *mempool); 1136 1137 #ifdef __cplusplus 1138 } 1139 #endif 1140 1141 #endif /* RTE_MLDEV_H */ 1142