1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 148 #define RTE_MLDEV_LOG(level, fmt, args...) \ 149 rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args) 150 151 #define RTE_ML_STR_MAX 128 152 /**< Maximum length of name string */ 153 154 #define RTE_MLDEV_DEFAULT_MAX 32 155 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 156 157 /* Device operations */ 158 159 /** 160 * Initialize the device array before probing devices. If not called, the first device probed would 161 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 162 * 163 * @param dev_max 164 * Maximum number of devices. 165 * 166 * @return 167 * 0 on success, -rte_errno otherwise: 168 * - ENOMEM if out of memory 169 * - EINVAL if 0 size 170 * - EBUSY if already initialized 171 */ 172 __rte_experimental 173 int 174 rte_ml_dev_init(size_t dev_max); 175 176 /** 177 * Get the total number of ML devices that have been successfully initialised. 178 * 179 * @return 180 * - The total number of usable ML devices. 181 */ 182 __rte_experimental 183 uint16_t 184 rte_ml_dev_count(void); 185 186 /** 187 * Check if the device is in ready state. 188 * 189 * @param dev_id 190 * The identifier of the device. 191 * 192 * @return 193 * - 0 if device state is not in ready state. 194 * - 1 if device state is ready state. 195 */ 196 __rte_experimental 197 int 198 rte_ml_dev_is_valid_dev(int16_t dev_id); 199 200 /** 201 * Return the NUMA socket to which a device is connected. 202 * 203 * @param dev_id 204 * The identifier of the device. 205 * 206 * @return 207 * - The NUMA socket id to which the device is connected 208 * - 0 If the socket could not be determined. 209 * - -EINVAL: if the dev_id value is not valid. 210 */ 211 __rte_experimental 212 int 213 rte_ml_dev_socket_id(int16_t dev_id); 214 215 /** ML device information */ 216 struct rte_ml_dev_info { 217 const char *driver_name; 218 /**< Driver name */ 219 uint16_t max_models; 220 /**< Maximum number of models supported by the device. 221 * @see struct rte_ml_dev_config::nb_models 222 */ 223 uint16_t max_queue_pairs; 224 /**< Maximum number of queues pairs supported by the device. 225 * @see struct rte_ml_dev_config::nb_queue_pairs 226 */ 227 uint16_t max_desc; 228 /**< Maximum allowed number of descriptors for queue pair by the device. 229 * @see struct rte_ml_dev_qp_conf::nb_desc 230 */ 231 uint16_t max_io; 232 /**< Maximum number of inputs/outputs supported per model. */ 233 uint16_t max_segments; 234 /**< Maximum number of scatter-gather entries supported by the device. 235 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 236 */ 237 uint16_t align_size; 238 /**< Alignment size of IO buffers used by the device. */ 239 }; 240 241 /** 242 * Retrieve the information of the device. 243 * 244 * @param dev_id 245 * The identifier of the device. 246 * @param dev_info 247 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 248 * 249 * @return 250 * - 0: Success, driver updates the information of the ML device 251 * - < 0: Error code returned by the driver info get function. 252 */ 253 __rte_experimental 254 int 255 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 256 257 /** ML device configuration structure */ 258 struct rte_ml_dev_config { 259 int socket_id; 260 /**< Socket to allocate resources on. */ 261 uint16_t nb_models; 262 /**< Number of models to be loaded on the device. 263 * This value cannot exceed the max_models which is previously provided in 264 * struct rte_ml_dev_info::max_models 265 */ 266 uint16_t nb_queue_pairs; 267 /**< Number of queue pairs to configure on this device. 268 * This value cannot exceed the max_models which is previously provided in 269 * struct rte_ml_dev_info::max_queue_pairs 270 */ 271 }; 272 273 /** 274 * Configure an ML device. 275 * 276 * This function must be invoked first before any other function in the API. 277 * 278 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 279 * rte_ml_dev_close() is called. 280 * 281 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 282 * this ML device. 283 * 284 * @param dev_id 285 * The identifier of the device to configure. 286 * @param config 287 * The ML device configuration structure. 288 * 289 * @return 290 * - 0: Success, device configured. 291 * - < 0: Error code returned by the driver configuration function. 292 */ 293 __rte_experimental 294 int 295 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 296 297 /* Forward declaration */ 298 struct rte_ml_op; 299 300 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 301 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 302 303 /** ML device queue pair configuration structure. */ 304 struct rte_ml_dev_qp_conf { 305 uint32_t nb_desc; 306 /**< Number of descriptors per queue pair. 307 * This value cannot exceed the max_desc which previously provided in 308 * struct rte_ml_dev_info:max_desc 309 */ 310 rte_ml_dev_stop_flush_t cb; 311 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 312 * Value NULL is allowed, in which case callback will not be invoked. 313 * This function can be used to properly dispose of outstanding ML ops from all 314 * queue pairs, for example ops containing memory pointers. 315 * @see rte_ml_dev_stop() 316 */ 317 }; 318 319 /** 320 * Set up a queue pair for a device. This should only be called when the device is stopped. 321 * 322 * @param dev_id 323 * The identifier of the device. 324 * @param queue_pair_id 325 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 326 * previously supplied to rte_ml_dev_configure(). 327 * @param qp_conf 328 * The pointer to the configuration data to be used for the queue pair. 329 * @param socket_id 330 * The *socket_id* argument is the socket identifier in case of NUMA. 331 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 332 * for the queue pair. 333 * 334 * @return 335 * - 0: Success, queue pair correctly set up. 336 * - < 0: Queue pair configuration failed. 337 */ 338 __rte_experimental 339 int 340 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 341 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 342 343 /** 344 * Start an ML device. 345 * 346 * The device start step consists of setting the configured features and enabling the ML device 347 * to accept inference jobs. 348 * 349 * @param dev_id 350 * The identifier of the device. 351 * 352 * @return 353 * - 0: Success, device started. 354 * - <0: Error code of the driver device start function. 355 */ 356 __rte_experimental 357 int 358 rte_ml_dev_start(int16_t dev_id); 359 360 /** 361 * Stop an ML device. A stopped device cannot accept inference jobs. 362 * The device can be restarted with a call to rte_ml_dev_start(). 363 * 364 * @param dev_id 365 * The identifier of the device. 366 * 367 * @return 368 * - 0: Success, device stopped. 369 * - <0: Error code of the driver device stop function. 370 */ 371 __rte_experimental 372 int 373 rte_ml_dev_stop(int16_t dev_id); 374 375 /** 376 * Close an ML device. The device cannot be restarted! 377 * 378 * @param dev_id 379 * The identifier of the device. 380 * 381 * @return 382 * - 0 on successfully closing device. 383 * - <0 on failure to close device. 384 */ 385 __rte_experimental 386 int 387 rte_ml_dev_close(int16_t dev_id); 388 389 /** Status of ML operation */ 390 enum rte_ml_op_status { 391 RTE_ML_OP_STATUS_SUCCESS = 0, 392 /**< Operation completed successfully */ 393 RTE_ML_OP_STATUS_NOT_PROCESSED, 394 /**< Operation has not yet been processed by the device. */ 395 RTE_ML_OP_STATUS_ERROR, 396 /**< Operation completed with error. 397 * Application can invoke rte_ml_op_error_get() to get PMD specific 398 * error code if needed. 399 */ 400 }; 401 402 /** ML operation's input and output buffer representation as scatter gather list 403 */ 404 struct rte_ml_buff_seg { 405 rte_iova_t iova_addr; 406 /**< IOVA address of segment buffer. */ 407 void *addr; 408 /**< Virtual address of segment buffer. */ 409 uint32_t length; 410 /**< Segment length. */ 411 uint32_t reserved; 412 /**< Reserved for future use. */ 413 struct rte_ml_buff_seg *next; 414 /**< Points to next segment. Value NULL represents the last segment. */ 415 }; 416 417 /** 418 * ML Operation. 419 * 420 * This structure contains data related to performing an ML operation on the buffers using 421 * the model specified through model_id. 422 */ 423 struct rte_ml_op { 424 uint16_t model_id; 425 /**< Model ID to be used for the operation. */ 426 uint16_t nb_batches; 427 /**< Number of batches. Minimum value must be one. 428 * Input buffer must hold inference data for each batch as contiguous. 429 */ 430 uint32_t reserved; 431 /**< Reserved for future use. */ 432 struct rte_mempool *mempool; 433 /**< Pool from which operation is allocated. */ 434 struct rte_ml_buff_seg **input; 435 /**< Array of buffer segments to hold the inference input data. 436 * 437 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 438 * the array is 1. 439 * 440 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 441 * the array is rte_ml_model_info::nb_inputs. 442 * 443 * @see struct rte_ml_dev_info::io_layout 444 */ 445 struct rte_ml_buff_seg **output; 446 /**< Array of buffer segments to hold the inference output data. 447 * 448 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 449 * the array is 1. 450 * 451 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 452 * the array is rte_ml_model_info::nb_outputs. 453 * 454 * @see struct rte_ml_dev_info::io_layout 455 */ 456 union { 457 uint64_t user_u64; 458 /**< User data as uint64_t.*/ 459 void *user_ptr; 460 /**< User data as void*.*/ 461 }; 462 enum rte_ml_op_status status; 463 /**< Operation status. */ 464 uint64_t impl_opaque; 465 /**< Implementation specific opaque value. 466 * An implementation may use this field to hold 467 * implementation specific value to share between 468 * dequeue and enqueue operation. 469 * The application should not modify this field. 470 */ 471 } __rte_cache_aligned; 472 473 /* Enqueue/Dequeue operations */ 474 475 /** 476 * Enqueue a burst of ML inferences for processing on an ML device. 477 * 478 * The rte_ml_enqueue_burst() function is invoked to place ML inference 479 * operations on the queue *qp_id* of the device designated by its *dev_id*. 480 * 481 * The *nb_ops* parameter is the number of inferences to process which are 482 * supplied in the *ops* array of *rte_ml_op* structures. 483 * 484 * The rte_ml_enqueue_burst() function returns the number of inferences it 485 * actually enqueued for processing. A return value equal to *nb_ops* means that 486 * all packets have been enqueued. 487 * 488 * @param dev_id 489 * The identifier of the device. 490 * @param qp_id 491 * The index of the queue pair which inferences are to be enqueued for processing. 492 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 493 * *rte_ml_dev_configure*. 494 * @param ops 495 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 496 * ML inferences to be processed. 497 * @param nb_ops 498 * The number of operations to process. 499 * 500 * @return 501 * The number of inference operations actually enqueued to the ML device. 502 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 503 * is full or if invalid parameters are specified in a *rte_ml_op*. 504 */ 505 __rte_experimental 506 uint16_t 507 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 508 509 /** 510 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 511 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 512 * in the *ops* array. 513 * 514 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 515 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 516 * 517 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 518 * and this is likely to signify that other processed operations remain in the devices output queue. 519 * Application implementing a "retrieve as many processed operations as possible" policy can check 520 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 521 * *nb_ops* is returned. 522 * 523 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 524 * the corresponding overhead. 525 * 526 * @param dev_id 527 * The identifier of the device. 528 * @param qp_id 529 * The index of the queue pair from which to retrieve processed packets. 530 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 531 * rte_ml_dev_configure(). 532 * @param ops 533 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 534 * store *nb_ops* pointers in it. 535 * @param nb_ops 536 * The maximum number of inferences to dequeue. 537 * 538 * @return 539 * The number of operations actually dequeued, which is the number of pointers 540 * to *rte_ml_op* structures effectively supplied to the *ops* array. 541 */ 542 __rte_experimental 543 uint16_t 544 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 545 546 /** 547 * Verbose error structure definition. 548 */ 549 struct rte_ml_op_error { 550 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 551 uint64_t errcode; /**< Vendor specific error code. */ 552 }; 553 554 /** 555 * Get PMD specific error information for an ML op. 556 * 557 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 558 * This API allows to get PMD specific error details. 559 * 560 * @param[in] dev_id 561 * Device identifier 562 * @param[in] op 563 * Handle of ML operation 564 * @param[in] error 565 * Address of structure rte_ml_op_error to be filled 566 * 567 * @return 568 * - Returns 0 on success 569 * - Returns negative value on failure 570 */ 571 __rte_experimental 572 int 573 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 574 575 /* Statistics operations */ 576 577 /** Device statistics. */ 578 struct rte_ml_dev_stats { 579 uint64_t enqueued_count; 580 /**< Count of all operations enqueued */ 581 uint64_t dequeued_count; 582 /**< Count of all operations dequeued */ 583 uint64_t enqueue_err_count; 584 /**< Total error count on operations enqueued */ 585 uint64_t dequeue_err_count; 586 /**< Total error count on operations dequeued */ 587 }; 588 589 /** 590 * Retrieve the general I/O statistics of a device. 591 * 592 * @param dev_id 593 * The identifier of the device. 594 * @param stats 595 * Pointer to structure to where statistics will be copied. 596 * On error, this location may or may not have been modified. 597 * @return 598 * - 0 on success 599 * - -EINVAL: If invalid parameter pointer is provided. 600 */ 601 __rte_experimental 602 int 603 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 604 605 /** 606 * Reset the statistics of a device. 607 * 608 * @param dev_id 609 * The identifier of the device. 610 */ 611 __rte_experimental 612 void 613 rte_ml_dev_stats_reset(int16_t dev_id); 614 615 /** 616 * Selects the component of the mldev to retrieve statistics from. 617 */ 618 enum rte_ml_dev_xstats_mode { 619 RTE_ML_DEV_XSTATS_DEVICE, 620 /**< Device xstats */ 621 RTE_ML_DEV_XSTATS_MODEL, 622 /**< Model xstats */ 623 }; 624 625 /** 626 * A name-key lookup element for extended statistics. 627 * 628 * This structure is used to map between names and ID numbers for extended ML device statistics. 629 */ 630 struct rte_ml_dev_xstats_map { 631 uint16_t id; 632 /**< xstat identifier */ 633 char name[RTE_ML_STR_MAX]; 634 /**< xstat name */ 635 }; 636 637 /** 638 * Retrieve names of extended statistics of an ML device. 639 * 640 * @param dev_id 641 * The identifier of the device. 642 * @param mode 643 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 644 * @param model_id 645 * Used to specify the model number in model mode, and is ignored in device mode. 646 * @param[out] xstats_map 647 * Block of memory to insert names and ids into. Must be at least size in capacity. If set to 648 * NULL, function returns required capacity. The id values returned can be passed to 649 * *rte_ml_dev_xstats_get* to select statistics. 650 * @param size 651 * Capacity of xstats_names (number of xstats_map). 652 * @return 653 * - Positive value lower or equal to size: success. The return value is the number of entries 654 * filled in the stats table. 655 * - Positive value higher than size: error, the given statistics table is too small. The return 656 * value corresponds to the size that should be given to succeed. The entries in the table are not 657 * valid and shall not be used by the caller. 658 * - Negative value on error: 659 * -ENODEV for invalid *dev_id*. 660 * -EINVAL for invalid mode, model parameters. 661 * -ENOTSUP if the device doesn't support this function. 662 */ 663 __rte_experimental 664 int 665 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 666 struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); 667 668 /** 669 * Retrieve the value of a single stat by requesting it by name. 670 * 671 * @param dev_id 672 * The identifier of the device. 673 * @param name 674 * Name of stat name to retrieve. 675 * @param[out] stat_id 676 * If non-NULL, the numerical id of the stat will be returned, so that further requests for the 677 * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a 678 * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1. 679 * @param[out] value 680 * Value of the stat to be returned. 681 * @return 682 * - Zero: No error. 683 * - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported. 684 */ 685 __rte_experimental 686 int 687 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 688 689 /** 690 * Retrieve extended statistics of an ML device. 691 * 692 * @param dev_id 693 * The identifier of the device. 694 * @param mode 695 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 696 * @param model_id 697 * Used to specify the model id in model mode, and is ignored in device mode. 698 * @param stat_ids 699 * ID numbers of the stats to get. The ids can be got from the stat position in the stat list from 700 * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 701 * @param[out] values 702 * Values for each stats request by ID. 703 * @param nb_ids 704 * Number of stats requested. 705 * @return 706 * - Positive value: number of stat entries filled into the values array 707 * - Negative value on error: 708 * -ENODEV for invalid *dev_id*. 709 * -EINVAL for invalid mode, model id or stat id parameters. 710 * -ENOTSUP if the device doesn't support this function. 711 */ 712 __rte_experimental 713 int 714 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 715 const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids); 716 717 /** 718 * Reset the values of the xstats of the selected component in the device. 719 * 720 * @param dev_id 721 * The identifier of the device. 722 * @param mode 723 * Mode of the statistics to reset. Choose from device or model. 724 * @param model_id 725 * Model stats to reset. 0 and positive values select models, while -1 indicates all models. 726 * @param stat_ids 727 * Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be 728 * reset. If non-NULL, must point to array of at least *nb_ids* size. 729 * @param nb_ids 730 * The number of ids available from the *ids* array. Ignored when ids is NULL. 731 * @return 732 * - Zero: successfully reset the statistics. 733 * - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. 734 */ 735 __rte_experimental 736 int 737 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 738 const uint16_t stat_ids[], uint16_t nb_ids); 739 740 /** 741 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 742 * 743 * @param dev_id 744 * The identifier of the device. 745 * @param fd 746 * A pointer to a file for output. 747 * @return 748 * - 0: on success. 749 * - <0: on failure. 750 */ 751 __rte_experimental 752 int 753 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 754 755 /** 756 * Trigger the ML device self test. 757 * 758 * @param dev_id 759 * The identifier of the device. 760 * @return 761 * - 0: Selftest successful. 762 * - -ENOTSUP: if the device doesn't support selftest. 763 * - other values < 0 on failure. 764 */ 765 __rte_experimental 766 int 767 rte_ml_dev_selftest(int16_t dev_id); 768 769 /* Model operations */ 770 771 /** ML model load parameters 772 * 773 * Parameters required to load an ML model. 774 */ 775 struct rte_ml_model_params { 776 void *addr; 777 /**< Address of model buffer */ 778 size_t size; 779 /**< Size of model buffer */ 780 }; 781 782 /** 783 * Load an ML model to the device. 784 * 785 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 786 * 787 * @param[in] dev_id 788 * The identifier of the device. 789 * @param[in] params 790 * Parameters for the model to be loaded. 791 * @param[out] model_id 792 * Identifier of the model loaded. 793 * 794 * @return 795 * - 0: Success, Model loaded. 796 * - < 0: Failure, Error code of the model load driver function. 797 */ 798 __rte_experimental 799 int 800 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 801 802 /** 803 * Unload an ML model from the device. 804 * 805 * @param[in] dev_id 806 * The identifier of the device. 807 * @param[in] model_id 808 * Identifier of the model to be unloaded. 809 * 810 * @return 811 * - 0: Success, Model unloaded. 812 * - < 0: Failure, Error code of the model unload driver function. 813 */ 814 __rte_experimental 815 int 816 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 817 818 /** 819 * Start an ML model for the given device ID. 820 * 821 * Start an ML model to accept inference requests. 822 * 823 * @param[in] dev_id 824 * The identifier of the device. 825 * @param[in] model_id 826 * Identifier of the model to be started. 827 * 828 * @return 829 * - 0: Success, Model loaded. 830 * - < 0: Failure, Error code of the model start driver function. 831 */ 832 __rte_experimental 833 int 834 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 835 836 /** 837 * Stop an ML model for the given device ID. 838 * 839 * Model stop would disable the ML model to be used for inference jobs. 840 * All inference jobs must have been completed before model stop is attempted. 841 842 * @param[in] dev_id 843 * The identifier of the device. 844 * @param[in] model_id 845 * Identifier of the model to be stopped. 846 * 847 * @return 848 * - 0: Success, Model unloaded. 849 * - < 0: Failure, Error code of the model stop driver function. 850 */ 851 __rte_experimental 852 int 853 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 854 855 /** 856 * Input and output data types. ML models can operate on reduced precision 857 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 858 * This enum is used to represent the lower precision integer and floating point types used 859 * by ML models. 860 */ 861 enum rte_ml_io_type { 862 RTE_ML_IO_TYPE_UNKNOWN = 0, 863 /**< Invalid or unknown type */ 864 RTE_ML_IO_TYPE_INT8, 865 /**< 8-bit integer */ 866 RTE_ML_IO_TYPE_UINT8, 867 /**< 8-bit unsigned integer */ 868 RTE_ML_IO_TYPE_INT16, 869 /**< 16-bit integer */ 870 RTE_ML_IO_TYPE_UINT16, 871 /**< 16-bit unsigned integer */ 872 RTE_ML_IO_TYPE_INT32, 873 /**< 32-bit integer */ 874 RTE_ML_IO_TYPE_UINT32, 875 /**< 32-bit unsigned integer */ 876 RTE_ML_IO_TYPE_FP8, 877 /**< 8-bit floating point number */ 878 RTE_ML_IO_TYPE_FP16, 879 /**< IEEE 754 16-bit floating point number */ 880 RTE_ML_IO_TYPE_FP32, 881 /**< IEEE 754 32-bit floating point number */ 882 RTE_ML_IO_TYPE_BFLOAT16 883 /**< 16-bit brain floating point number. */ 884 }; 885 886 /** ML I/O buffer layout */ 887 enum rte_ml_io_layout { 888 RTE_ML_IO_LAYOUT_PACKED, 889 /**< All inputs for the model should packed in a single buffer with 890 * no padding between individual inputs. The buffer is expected to 891 * be aligned to rte_ml_dev_info::align_size. 892 * 893 * When I/O segmentation is supported by the device, the packed 894 * data can be split into multiple segments. In this case, each 895 * segment is expected to be aligned to rte_ml_dev_info::align_size 896 * 897 * Same applies to output. 898 * 899 * @see struct rte_ml_dev_info::max_segments 900 */ 901 RTE_ML_IO_LAYOUT_SPLIT 902 /**< Each input for the model should be stored as separate buffers 903 * and each input should be aligned to rte_ml_dev_info::align_size. 904 * 905 * When I/O segmentation is supported, each input can be split into 906 * multiple segments. In this case, each segment is expected to be 907 * aligned to rte_ml_dev_info::align_size 908 * 909 * Same applies to output. 910 * 911 * @see struct rte_ml_dev_info::max_segments 912 */ 913 }; 914 915 /** 916 * Input and output data information structure 917 * 918 * Specifies the type and shape of input and output data. 919 */ 920 struct rte_ml_io_info { 921 char name[RTE_ML_STR_MAX]; 922 /**< Name of data */ 923 uint32_t nb_dims; 924 /**< Number of dimensions in shape */ 925 uint32_t *shape; 926 /**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */ 927 enum rte_ml_io_type type; 928 /**< Type of data 929 * @see enum rte_ml_io_type 930 */ 931 uint64_t nb_elements; 932 /** Number of elements in tensor */ 933 uint64_t size; 934 /** Size of tensor in bytes */ 935 }; 936 937 /** Model information structure */ 938 struct rte_ml_model_info { 939 char name[RTE_ML_STR_MAX]; 940 /**< Model name. */ 941 char version[RTE_ML_STR_MAX]; 942 /**< Model version */ 943 uint16_t model_id; 944 /**< Model ID */ 945 uint16_t device_id; 946 /**< Device ID */ 947 enum rte_ml_io_layout io_layout; 948 /**< I/O buffer layout for the model */ 949 uint16_t min_batches; 950 /**< Minimum number of batches that the model can process 951 * in one inference request 952 */ 953 uint16_t max_batches; 954 /**< Maximum number of batches that the model can process 955 * in one inference request 956 */ 957 uint32_t nb_inputs; 958 /**< Number of inputs */ 959 const struct rte_ml_io_info *input_info; 960 /**< Input info array. Array size is equal to nb_inputs */ 961 uint32_t nb_outputs; 962 /**< Number of outputs */ 963 const struct rte_ml_io_info *output_info; 964 /**< Output info array. Array size is equal to nb_output */ 965 uint64_t wb_size; 966 /**< Size of model weights and bias */ 967 }; 968 969 /** 970 * Get ML model information. 971 * 972 * @param[in] dev_id 973 * The identifier of the device. 974 * @param[in] model_id 975 * Identifier for the model created 976 * @param[out] model_info 977 * Pointer to a model info structure 978 * 979 * @return 980 * - Returns 0 on success 981 * - Returns negative value on failure 982 */ 983 __rte_experimental 984 int 985 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 986 987 /** 988 * Update the model parameters without unloading model. 989 * 990 * Update model parameters such as weights and bias without unloading the model. 991 * rte_ml_model_stop() must be called before invoking this API. 992 * 993 * @param[in] dev_id 994 * The identifier of the device. 995 * @param[in] model_id 996 * Identifier for the model created 997 * @param[in] buffer 998 * Pointer to the model weights and bias buffer. 999 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 1000 * 1001 * @return 1002 * - Returns 0 on success 1003 * - Returns negative value on failure 1004 */ 1005 __rte_experimental 1006 int 1007 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 1008 1009 /* IO operations */ 1010 1011 /** 1012 * Quantize input data. 1013 * 1014 * Quantization converts data from a higher precision types to a lower precision types to improve 1015 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1016 * Types of dequantized data and quantized data are specified by the model. 1017 * 1018 * @param[in] dev_id 1019 * The identifier of the device. 1020 * @param[in] model_id 1021 * Identifier for the model 1022 * @param[in] dbuffer 1023 * Address of dequantized input data 1024 * @param[in] qbuffer 1025 * Address of quantized input data 1026 * 1027 * @return 1028 * - Returns 0 on success 1029 * - Returns negative value on failure 1030 */ 1031 __rte_experimental 1032 int 1033 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, 1034 struct rte_ml_buff_seg **qbuffer); 1035 1036 /** 1037 * Dequantize output data. 1038 * 1039 * Dequantization converts data from a lower precision type to a higher precision type. 1040 * Types of quantized data and dequantized are specified by the model. 1041 * 1042 * @param[in] dev_id 1043 * The identifier of the device. 1044 * @param[in] model_id 1045 * Identifier for the model 1046 * @param[in] qbuffer 1047 * Address of quantized output data 1048 * @param[in] dbuffer 1049 * Address of dequantized output data 1050 * 1051 * @return 1052 * - Returns 0 on success 1053 * - Returns negative value on failure 1054 */ 1055 __rte_experimental 1056 int 1057 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer, 1058 struct rte_ml_buff_seg **dbuffer); 1059 1060 /* ML op pool operations */ 1061 1062 /** 1063 * Create an ML operation pool 1064 * 1065 * @param name 1066 * ML operations pool name 1067 * @param nb_elts 1068 * Number of elements in pool 1069 * @param cache_size 1070 * Number of elements to cache on lcore, see 1071 * *rte_mempool_create* for further details about cache size 1072 * @param user_size 1073 * Size of private data to allocate for user with each operation 1074 * @param socket_id 1075 * Socket to identifier allocate memory on 1076 * @return 1077 * - On success pointer to mempool 1078 * - On failure NULL 1079 */ 1080 __rte_experimental 1081 struct rte_mempool * 1082 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1083 uint16_t user_size, int socket_id); 1084 1085 /** 1086 * Free an ML operation pool 1087 * 1088 * @param mempool 1089 * A pointer to the mempool structure. 1090 * If NULL then, the function does nothing. 1091 */ 1092 __rte_experimental 1093 void 1094 rte_ml_op_pool_free(struct rte_mempool *mempool); 1095 1096 #ifdef __cplusplus 1097 } 1098 #endif 1099 1100 #endif /* RTE_MLDEV_H */ 1101