1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 #define RTE_LOGTYPE_MLDEV rte_ml_dev_logtype 148 149 #define RTE_MLDEV_LOG(level, fmt, args...) \ 150 RTE_LOG_LINE(level, MLDEV, "%s(): " fmt, __func__, ##args) 151 152 #define RTE_ML_STR_MAX 128 153 /**< Maximum length of name string */ 154 155 #define RTE_MLDEV_DEFAULT_MAX 32 156 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 157 158 /* Device operations */ 159 160 /** 161 * Initialize the device array before probing devices. If not called, the first device probed would 162 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 163 * 164 * @param dev_max 165 * Maximum number of devices. 166 * 167 * @return 168 * 0 on success, -rte_errno otherwise: 169 * - ENOMEM if out of memory 170 * - EINVAL if 0 size 171 * - EBUSY if already initialized 172 */ 173 __rte_experimental 174 int 175 rte_ml_dev_init(size_t dev_max); 176 177 /** 178 * Get the total number of ML devices that have been successfully initialised. 179 * 180 * @return 181 * - The total number of usable ML devices. 182 */ 183 __rte_experimental 184 uint16_t 185 rte_ml_dev_count(void); 186 187 /** 188 * Check if the device is in ready state. 189 * 190 * @param dev_id 191 * The identifier of the device. 192 * 193 * @return 194 * - 0 if device state is not in ready state. 195 * - 1 if device state is ready state. 196 */ 197 __rte_experimental 198 int 199 rte_ml_dev_is_valid_dev(int16_t dev_id); 200 201 /** 202 * Return the NUMA socket to which a device is connected. 203 * 204 * @param dev_id 205 * The identifier of the device. 206 * 207 * @return 208 * - The NUMA socket id to which the device is connected 209 * - 0 If the socket could not be determined. 210 * - -EINVAL: if the dev_id value is not valid. 211 */ 212 __rte_experimental 213 int 214 rte_ml_dev_socket_id(int16_t dev_id); 215 216 /** ML device information */ 217 struct rte_ml_dev_info { 218 const char *driver_name; 219 /**< Driver name */ 220 uint16_t max_models; 221 /**< Maximum number of models supported by the device. 222 * @see struct rte_ml_dev_config::nb_models 223 */ 224 uint16_t max_queue_pairs; 225 /**< Maximum number of queues pairs supported by the device. 226 * @see struct rte_ml_dev_config::nb_queue_pairs 227 */ 228 uint16_t max_desc; 229 /**< Maximum allowed number of descriptors for queue pair by the device. 230 * @see struct rte_ml_dev_qp_conf::nb_desc 231 */ 232 uint16_t max_io; 233 /**< Maximum number of inputs/outputs supported per model. */ 234 uint16_t max_segments; 235 /**< Maximum number of scatter-gather entries supported by the device. 236 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 237 */ 238 uint16_t align_size; 239 /**< Alignment size of IO buffers used by the device. */ 240 }; 241 242 /** 243 * Retrieve the information of the device. 244 * 245 * @param dev_id 246 * The identifier of the device. 247 * @param dev_info 248 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 249 * 250 * @return 251 * - 0: Success, driver updates the information of the ML device 252 * - < 0: Error code returned by the driver info get function. 253 */ 254 __rte_experimental 255 int 256 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 257 258 /** ML device configuration structure */ 259 struct rte_ml_dev_config { 260 int socket_id; 261 /**< Socket to allocate resources on. */ 262 uint16_t nb_models; 263 /**< Number of models to be loaded on the device. 264 * This value cannot exceed the max_models which is previously provided in 265 * struct rte_ml_dev_info::max_models 266 */ 267 uint16_t nb_queue_pairs; 268 /**< Number of queue pairs to configure on this device. 269 * This value cannot exceed the max_models which is previously provided in 270 * struct rte_ml_dev_info::max_queue_pairs 271 */ 272 }; 273 274 /** 275 * Configure an ML device. 276 * 277 * This function must be invoked first before any other function in the API. 278 * 279 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 280 * rte_ml_dev_close() is called. 281 * 282 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 283 * this ML device. 284 * 285 * @param dev_id 286 * The identifier of the device to configure. 287 * @param config 288 * The ML device configuration structure. 289 * 290 * @return 291 * - 0: Success, device configured. 292 * - < 0: Error code returned by the driver configuration function. 293 */ 294 __rte_experimental 295 int 296 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 297 298 /* Forward declaration */ 299 struct rte_ml_op; 300 301 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 302 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 303 304 /** ML device queue pair configuration structure. */ 305 struct rte_ml_dev_qp_conf { 306 uint32_t nb_desc; 307 /**< Number of descriptors per queue pair. 308 * This value cannot exceed the max_desc which previously provided in 309 * struct rte_ml_dev_info:max_desc 310 */ 311 rte_ml_dev_stop_flush_t cb; 312 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 313 * Value NULL is allowed, in which case callback will not be invoked. 314 * This function can be used to properly dispose of outstanding ML ops from all 315 * queue pairs, for example ops containing memory pointers. 316 * @see rte_ml_dev_stop() 317 */ 318 }; 319 320 /** 321 * Set up a queue pair for a device. This should only be called when the device is stopped. 322 * 323 * @param dev_id 324 * The identifier of the device. 325 * @param queue_pair_id 326 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 327 * previously supplied to rte_ml_dev_configure(). 328 * @param qp_conf 329 * The pointer to the configuration data to be used for the queue pair. 330 * @param socket_id 331 * The *socket_id* argument is the socket identifier in case of NUMA. 332 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 333 * for the queue pair. 334 * 335 * @return 336 * - 0: Success, queue pair correctly set up. 337 * - < 0: Queue pair configuration failed. 338 */ 339 __rte_experimental 340 int 341 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 342 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 343 344 /** 345 * Start an ML device. 346 * 347 * The device start step consists of setting the configured features and enabling the ML device 348 * to accept inference jobs. 349 * 350 * @param dev_id 351 * The identifier of the device. 352 * 353 * @return 354 * - 0: Success, device started. 355 * - <0: Error code of the driver device start function. 356 */ 357 __rte_experimental 358 int 359 rte_ml_dev_start(int16_t dev_id); 360 361 /** 362 * Stop an ML device. A stopped device cannot accept inference jobs. 363 * The device can be restarted with a call to rte_ml_dev_start(). 364 * 365 * @param dev_id 366 * The identifier of the device. 367 * 368 * @return 369 * - 0: Success, device stopped. 370 * - <0: Error code of the driver device stop function. 371 */ 372 __rte_experimental 373 int 374 rte_ml_dev_stop(int16_t dev_id); 375 376 /** 377 * Close an ML device. The device cannot be restarted! 378 * 379 * @param dev_id 380 * The identifier of the device. 381 * 382 * @return 383 * - 0 on successfully closing device. 384 * - <0 on failure to close device. 385 */ 386 __rte_experimental 387 int 388 rte_ml_dev_close(int16_t dev_id); 389 390 /** Status of ML operation */ 391 enum rte_ml_op_status { 392 RTE_ML_OP_STATUS_SUCCESS = 0, 393 /**< Operation completed successfully */ 394 RTE_ML_OP_STATUS_NOT_PROCESSED, 395 /**< Operation has not yet been processed by the device. */ 396 RTE_ML_OP_STATUS_ERROR, 397 /**< Operation completed with error. 398 * Application can invoke rte_ml_op_error_get() to get PMD specific 399 * error code if needed. 400 */ 401 }; 402 403 /** ML operation's input and output buffer representation as scatter gather list 404 */ 405 struct rte_ml_buff_seg { 406 rte_iova_t iova_addr; 407 /**< IOVA address of segment buffer. */ 408 void *addr; 409 /**< Virtual address of segment buffer. */ 410 uint32_t length; 411 /**< Segment length. */ 412 uint32_t reserved; 413 /**< Reserved for future use. */ 414 struct rte_ml_buff_seg *next; 415 /**< Points to next segment. Value NULL represents the last segment. */ 416 }; 417 418 /** 419 * ML Operation. 420 * 421 * This structure contains data related to performing an ML operation on the buffers using 422 * the model specified through model_id. 423 */ 424 struct rte_ml_op { 425 uint16_t model_id; 426 /**< Model ID to be used for the operation. */ 427 uint16_t nb_batches; 428 /**< Number of batches. Minimum value must be one. 429 * Input buffer must hold inference data for each batch as contiguous. 430 */ 431 uint32_t reserved; 432 /**< Reserved for future use. */ 433 struct rte_mempool *mempool; 434 /**< Pool from which operation is allocated. */ 435 struct rte_ml_buff_seg **input; 436 /**< Array of buffer segments to hold the inference input data. 437 * 438 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 439 * the array is 1. 440 * 441 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 442 * the array is rte_ml_model_info::nb_inputs. 443 * 444 * @see struct rte_ml_dev_info::io_layout 445 */ 446 struct rte_ml_buff_seg **output; 447 /**< Array of buffer segments to hold the inference output data. 448 * 449 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 450 * the array is 1. 451 * 452 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 453 * the array is rte_ml_model_info::nb_outputs. 454 * 455 * @see struct rte_ml_dev_info::io_layout 456 */ 457 union { 458 uint64_t user_u64; 459 /**< User data as uint64_t.*/ 460 void *user_ptr; 461 /**< User data as void*.*/ 462 }; 463 enum rte_ml_op_status status; 464 /**< Operation status. */ 465 uint64_t impl_opaque; 466 /**< Implementation specific opaque value. 467 * An implementation may use this field to hold 468 * implementation specific value to share between 469 * dequeue and enqueue operation. 470 * The application should not modify this field. 471 */ 472 } __rte_cache_aligned; 473 474 /* Enqueue/Dequeue operations */ 475 476 /** 477 * Enqueue a burst of ML inferences for processing on an ML device. 478 * 479 * The rte_ml_enqueue_burst() function is invoked to place ML inference 480 * operations on the queue *qp_id* of the device designated by its *dev_id*. 481 * 482 * The *nb_ops* parameter is the number of inferences to process which are 483 * supplied in the *ops* array of *rte_ml_op* structures. 484 * 485 * The rte_ml_enqueue_burst() function returns the number of inferences it 486 * actually enqueued for processing. A return value equal to *nb_ops* means that 487 * all packets have been enqueued. 488 * 489 * @param dev_id 490 * The identifier of the device. 491 * @param qp_id 492 * The index of the queue pair which inferences are to be enqueued for processing. 493 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 494 * *rte_ml_dev_configure*. 495 * @param ops 496 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 497 * ML inferences to be processed. 498 * @param nb_ops 499 * The number of operations to process. 500 * 501 * @return 502 * The number of inference operations actually enqueued to the ML device. 503 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 504 * is full or if invalid parameters are specified in a *rte_ml_op*. 505 */ 506 __rte_experimental 507 uint16_t 508 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 509 510 /** 511 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 512 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 513 * in the *ops* array. 514 * 515 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 516 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 517 * 518 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 519 * and this is likely to signify that other processed operations remain in the devices output queue. 520 * Application implementing a "retrieve as many processed operations as possible" policy can check 521 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 522 * *nb_ops* is returned. 523 * 524 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 525 * the corresponding overhead. 526 * 527 * @param dev_id 528 * The identifier of the device. 529 * @param qp_id 530 * The index of the queue pair from which to retrieve processed packets. 531 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 532 * rte_ml_dev_configure(). 533 * @param ops 534 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 535 * store *nb_ops* pointers in it. 536 * @param nb_ops 537 * The maximum number of inferences to dequeue. 538 * 539 * @return 540 * The number of operations actually dequeued, which is the number of pointers 541 * to *rte_ml_op* structures effectively supplied to the *ops* array. 542 */ 543 __rte_experimental 544 uint16_t 545 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 546 547 /** 548 * Verbose error structure definition. 549 */ 550 struct rte_ml_op_error { 551 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 552 uint64_t errcode; /**< Vendor specific error code. */ 553 }; 554 555 /** 556 * Get PMD specific error information for an ML op. 557 * 558 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 559 * This API allows to get PMD specific error details. 560 * 561 * @param[in] dev_id 562 * Device identifier 563 * @param[in] op 564 * Handle of ML operation 565 * @param[in] error 566 * Address of structure rte_ml_op_error to be filled 567 * 568 * @return 569 * - Returns 0 on success 570 * - Returns negative value on failure 571 */ 572 __rte_experimental 573 int 574 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 575 576 /* Statistics operations */ 577 578 /** Device statistics. */ 579 struct rte_ml_dev_stats { 580 uint64_t enqueued_count; 581 /**< Count of all operations enqueued */ 582 uint64_t dequeued_count; 583 /**< Count of all operations dequeued */ 584 uint64_t enqueue_err_count; 585 /**< Total error count on operations enqueued */ 586 uint64_t dequeue_err_count; 587 /**< Total error count on operations dequeued */ 588 }; 589 590 /** 591 * Retrieve the general I/O statistics of a device. 592 * 593 * @param dev_id 594 * The identifier of the device. 595 * @param stats 596 * Pointer to structure to where statistics will be copied. 597 * On error, this location may or may not have been modified. 598 * @return 599 * - 0 on success 600 * - -EINVAL: If invalid parameter pointer is provided. 601 */ 602 __rte_experimental 603 int 604 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 605 606 /** 607 * Reset the statistics of a device. 608 * 609 * @param dev_id 610 * The identifier of the device. 611 */ 612 __rte_experimental 613 void 614 rte_ml_dev_stats_reset(int16_t dev_id); 615 616 /** 617 * Selects the component of the mldev to retrieve statistics from. 618 */ 619 enum rte_ml_dev_xstats_mode { 620 RTE_ML_DEV_XSTATS_DEVICE, 621 /**< Device xstats */ 622 RTE_ML_DEV_XSTATS_MODEL, 623 /**< Model xstats */ 624 }; 625 626 /** 627 * A name-key lookup element for extended statistics. 628 * 629 * This structure is used to map between names and ID numbers for extended ML device statistics. 630 */ 631 struct rte_ml_dev_xstats_map { 632 uint16_t id; 633 /**< xstat identifier */ 634 char name[RTE_ML_STR_MAX]; 635 /**< xstat name */ 636 }; 637 638 /** 639 * Retrieve names of extended statistics of an ML device. 640 * 641 * @param dev_id 642 * The identifier of the device. 643 * @param mode 644 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 645 * @param model_id 646 * Used to specify the model number in model mode, and is ignored in device mode. 647 * @param[out] xstats_map 648 * Block of memory to insert names and ids into. Must be at least size in capacity. If set to 649 * NULL, function returns required capacity. The id values returned can be passed to 650 * *rte_ml_dev_xstats_get* to select statistics. 651 * @param size 652 * Capacity of xstats_names (number of xstats_map). 653 * @return 654 * - Positive value lower or equal to size: success. The return value is the number of entries 655 * filled in the stats table. 656 * - Positive value higher than size: error, the given statistics table is too small. The return 657 * value corresponds to the size that should be given to succeed. The entries in the table are not 658 * valid and shall not be used by the caller. 659 * - Negative value on error: 660 * -ENODEV for invalid *dev_id*. 661 * -EINVAL for invalid mode, model parameters. 662 * -ENOTSUP if the device doesn't support this function. 663 */ 664 __rte_experimental 665 int 666 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 667 struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); 668 669 /** 670 * Retrieve the value of a single stat by requesting it by name. 671 * 672 * @param dev_id 673 * The identifier of the device. 674 * @param name 675 * Name of stat name to retrieve. 676 * @param[out] stat_id 677 * If non-NULL, the numerical id of the stat will be returned, so that further requests for the 678 * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a 679 * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1. 680 * @param[out] value 681 * Value of the stat to be returned. 682 * @return 683 * - Zero: No error. 684 * - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported. 685 */ 686 __rte_experimental 687 int 688 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 689 690 /** 691 * Retrieve extended statistics of an ML device. 692 * 693 * @param dev_id 694 * The identifier of the device. 695 * @param mode 696 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 697 * @param model_id 698 * Used to specify the model id in model mode, and is ignored in device mode. 699 * @param stat_ids 700 * ID numbers of the stats to get. The ids can be got from the stat position in the stat list from 701 * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 702 * @param[out] values 703 * Values for each stats request by ID. 704 * @param nb_ids 705 * Number of stats requested. 706 * @return 707 * - Positive value: number of stat entries filled into the values array 708 * - Negative value on error: 709 * -ENODEV for invalid *dev_id*. 710 * -EINVAL for invalid mode, model id or stat id parameters. 711 * -ENOTSUP if the device doesn't support this function. 712 */ 713 __rte_experimental 714 int 715 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 716 const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids); 717 718 /** 719 * Reset the values of the xstats of the selected component in the device. 720 * 721 * @param dev_id 722 * The identifier of the device. 723 * @param mode 724 * Mode of the statistics to reset. Choose from device or model. 725 * @param model_id 726 * Model stats to reset. 0 and positive values select models, while -1 indicates all models. 727 * @param stat_ids 728 * Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be 729 * reset. If non-NULL, must point to array of at least *nb_ids* size. 730 * @param nb_ids 731 * The number of ids available from the *ids* array. Ignored when ids is NULL. 732 * @return 733 * - Zero: successfully reset the statistics. 734 * - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. 735 */ 736 __rte_experimental 737 int 738 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 739 const uint16_t stat_ids[], uint16_t nb_ids); 740 741 /** 742 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 743 * 744 * @param dev_id 745 * The identifier of the device. 746 * @param fd 747 * A pointer to a file for output. 748 * @return 749 * - 0: on success. 750 * - <0: on failure. 751 */ 752 __rte_experimental 753 int 754 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 755 756 /** 757 * Trigger the ML device self test. 758 * 759 * @param dev_id 760 * The identifier of the device. 761 * @return 762 * - 0: Selftest successful. 763 * - -ENOTSUP: if the device doesn't support selftest. 764 * - other values < 0 on failure. 765 */ 766 __rte_experimental 767 int 768 rte_ml_dev_selftest(int16_t dev_id); 769 770 /* Model operations */ 771 772 /** ML model load parameters 773 * 774 * Parameters required to load an ML model. 775 */ 776 struct rte_ml_model_params { 777 void *addr; 778 /**< Address of model buffer */ 779 size_t size; 780 /**< Size of model buffer */ 781 }; 782 783 /** 784 * Load an ML model to the device. 785 * 786 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 787 * 788 * @param[in] dev_id 789 * The identifier of the device. 790 * @param[in] params 791 * Parameters for the model to be loaded. 792 * @param[out] model_id 793 * Identifier of the model loaded. 794 * 795 * @return 796 * - 0: Success, Model loaded. 797 * - < 0: Failure, Error code of the model load driver function. 798 */ 799 __rte_experimental 800 int 801 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 802 803 /** 804 * Unload an ML model from the device. 805 * 806 * @param[in] dev_id 807 * The identifier of the device. 808 * @param[in] model_id 809 * Identifier of the model to be unloaded. 810 * 811 * @return 812 * - 0: Success, Model unloaded. 813 * - < 0: Failure, Error code of the model unload driver function. 814 */ 815 __rte_experimental 816 int 817 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 818 819 /** 820 * Start an ML model for the given device ID. 821 * 822 * Start an ML model to accept inference requests. 823 * 824 * @param[in] dev_id 825 * The identifier of the device. 826 * @param[in] model_id 827 * Identifier of the model to be started. 828 * 829 * @return 830 * - 0: Success, Model loaded. 831 * - < 0: Failure, Error code of the model start driver function. 832 */ 833 __rte_experimental 834 int 835 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 836 837 /** 838 * Stop an ML model for the given device ID. 839 * 840 * Model stop would disable the ML model to be used for inference jobs. 841 * All inference jobs must have been completed before model stop is attempted. 842 843 * @param[in] dev_id 844 * The identifier of the device. 845 * @param[in] model_id 846 * Identifier of the model to be stopped. 847 * 848 * @return 849 * - 0: Success, Model unloaded. 850 * - < 0: Failure, Error code of the model stop driver function. 851 */ 852 __rte_experimental 853 int 854 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 855 856 /** 857 * Input and output data types. ML models can operate on reduced precision 858 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 859 * This enum is used to represent the lower precision integer and floating point types used 860 * by ML models. 861 */ 862 enum rte_ml_io_type { 863 RTE_ML_IO_TYPE_UNKNOWN = 0, 864 /**< Invalid or unknown type */ 865 RTE_ML_IO_TYPE_INT8, 866 /**< 8-bit integer */ 867 RTE_ML_IO_TYPE_UINT8, 868 /**< 8-bit unsigned integer */ 869 RTE_ML_IO_TYPE_INT16, 870 /**< 16-bit integer */ 871 RTE_ML_IO_TYPE_UINT16, 872 /**< 16-bit unsigned integer */ 873 RTE_ML_IO_TYPE_INT32, 874 /**< 32-bit integer */ 875 RTE_ML_IO_TYPE_UINT32, 876 /**< 32-bit unsigned integer */ 877 RTE_ML_IO_TYPE_INT64, 878 /**< 32-bit integer */ 879 RTE_ML_IO_TYPE_UINT64, 880 /**< 32-bit unsigned integer */ 881 RTE_ML_IO_TYPE_FP8, 882 /**< 8-bit floating point number */ 883 RTE_ML_IO_TYPE_FP16, 884 /**< IEEE 754 16-bit floating point number */ 885 RTE_ML_IO_TYPE_FP32, 886 /**< IEEE 754 32-bit floating point number */ 887 RTE_ML_IO_TYPE_BFLOAT16 888 /**< 16-bit brain floating point number. */ 889 }; 890 891 /** ML I/O buffer layout */ 892 enum rte_ml_io_layout { 893 RTE_ML_IO_LAYOUT_PACKED, 894 /**< All inputs for the model should packed in a single buffer with 895 * no padding between individual inputs. The buffer is expected to 896 * be aligned to rte_ml_dev_info::align_size. 897 * 898 * When I/O segmentation is supported by the device, the packed 899 * data can be split into multiple segments. In this case, each 900 * segment is expected to be aligned to rte_ml_dev_info::align_size 901 * 902 * Same applies to output. 903 * 904 * @see struct rte_ml_dev_info::max_segments 905 */ 906 RTE_ML_IO_LAYOUT_SPLIT 907 /**< Each input for the model should be stored as separate buffers 908 * and each input should be aligned to rte_ml_dev_info::align_size. 909 * 910 * When I/O segmentation is supported, each input can be split into 911 * multiple segments. In this case, each segment is expected to be 912 * aligned to rte_ml_dev_info::align_size 913 * 914 * Same applies to output. 915 * 916 * @see struct rte_ml_dev_info::max_segments 917 */ 918 }; 919 920 /** 921 * Input and output data information structure 922 * 923 * Specifies the type and shape of input and output data. 924 */ 925 struct rte_ml_io_info { 926 char name[RTE_ML_STR_MAX]; 927 /**< Name of data */ 928 uint32_t nb_dims; 929 /**< Number of dimensions in shape */ 930 uint32_t *shape; 931 /**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */ 932 enum rte_ml_io_type type; 933 /**< Type of data 934 * @see enum rte_ml_io_type 935 */ 936 uint64_t nb_elements; 937 /** Number of elements in tensor */ 938 uint64_t size; 939 /** Size of tensor in bytes */ 940 }; 941 942 /** Model information structure */ 943 struct rte_ml_model_info { 944 char name[RTE_ML_STR_MAX]; 945 /**< Model name. */ 946 char version[RTE_ML_STR_MAX]; 947 /**< Model version */ 948 uint16_t model_id; 949 /**< Model ID */ 950 uint16_t device_id; 951 /**< Device ID */ 952 enum rte_ml_io_layout io_layout; 953 /**< I/O buffer layout for the model */ 954 uint16_t min_batches; 955 /**< Minimum number of batches that the model can process 956 * in one inference request 957 */ 958 uint16_t max_batches; 959 /**< Maximum number of batches that the model can process 960 * in one inference request 961 */ 962 uint32_t nb_inputs; 963 /**< Number of inputs */ 964 const struct rte_ml_io_info *input_info; 965 /**< Input info array. Array size is equal to nb_inputs */ 966 uint32_t nb_outputs; 967 /**< Number of outputs */ 968 const struct rte_ml_io_info *output_info; 969 /**< Output info array. Array size is equal to nb_output */ 970 uint64_t wb_size; 971 /**< Size of model weights and bias */ 972 }; 973 974 /** 975 * Get ML model information. 976 * 977 * @param[in] dev_id 978 * The identifier of the device. 979 * @param[in] model_id 980 * Identifier for the model created 981 * @param[out] model_info 982 * Pointer to a model info structure 983 * 984 * @return 985 * - Returns 0 on success 986 * - Returns negative value on failure 987 */ 988 __rte_experimental 989 int 990 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 991 992 /** 993 * Update the model parameters without unloading model. 994 * 995 * Update model parameters such as weights and bias without unloading the model. 996 * rte_ml_model_stop() must be called before invoking this API. 997 * 998 * @param[in] dev_id 999 * The identifier of the device. 1000 * @param[in] model_id 1001 * Identifier for the model created 1002 * @param[in] buffer 1003 * Pointer to the model weights and bias buffer. 1004 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 1005 * 1006 * @return 1007 * - Returns 0 on success 1008 * - Returns negative value on failure 1009 */ 1010 __rte_experimental 1011 int 1012 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 1013 1014 /* IO operations */ 1015 1016 /** 1017 * Quantize input data. 1018 * 1019 * Quantization converts data from a higher precision types to a lower precision types to improve 1020 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1021 * Types of dequantized data and quantized data are specified by the model. 1022 * 1023 * @param[in] dev_id 1024 * The identifier of the device. 1025 * @param[in] model_id 1026 * Identifier for the model 1027 * @param[in] dbuffer 1028 * Address of dequantized input data 1029 * @param[in] qbuffer 1030 * Address of quantized input data 1031 * 1032 * @return 1033 * - Returns 0 on success 1034 * - Returns negative value on failure 1035 */ 1036 __rte_experimental 1037 int 1038 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, 1039 struct rte_ml_buff_seg **qbuffer); 1040 1041 /** 1042 * Dequantize output data. 1043 * 1044 * Dequantization converts data from a lower precision type to a higher precision type. 1045 * Types of quantized data and dequantized are specified by the model. 1046 * 1047 * @param[in] dev_id 1048 * The identifier of the device. 1049 * @param[in] model_id 1050 * Identifier for the model 1051 * @param[in] qbuffer 1052 * Address of quantized output data 1053 * @param[in] dbuffer 1054 * Address of dequantized output data 1055 * 1056 * @return 1057 * - Returns 0 on success 1058 * - Returns negative value on failure 1059 */ 1060 __rte_experimental 1061 int 1062 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer, 1063 struct rte_ml_buff_seg **dbuffer); 1064 1065 /* ML op pool operations */ 1066 1067 /** 1068 * Create an ML operation pool 1069 * 1070 * @param name 1071 * ML operations pool name 1072 * @param nb_elts 1073 * Number of elements in pool 1074 * @param cache_size 1075 * Number of elements to cache on lcore, see 1076 * *rte_mempool_create* for further details about cache size 1077 * @param user_size 1078 * Size of private data to allocate for user with each operation 1079 * @param socket_id 1080 * Socket to identifier allocate memory on 1081 * @return 1082 * - On success pointer to mempool 1083 * - On failure NULL 1084 */ 1085 __rte_experimental 1086 struct rte_mempool * 1087 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1088 uint16_t user_size, int socket_id); 1089 1090 /** 1091 * Free an ML operation pool 1092 * 1093 * @param mempool 1094 * A pointer to the mempool structure. 1095 * If NULL then, the function does nothing. 1096 */ 1097 __rte_experimental 1098 void 1099 rte_ml_op_pool_free(struct rte_mempool *mempool); 1100 1101 #ifdef __cplusplus 1102 } 1103 #endif 1104 1105 #endif /* RTE_MLDEV_H */ 1106