1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 148 #define RTE_MLDEV_LOG(level, fmt, args...) \ 149 rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args) 150 151 #define RTE_ML_STR_MAX 128 152 /**< Maximum length of name string */ 153 154 #define RTE_MLDEV_DEFAULT_MAX 32 155 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 156 157 /* Device operations */ 158 159 /** 160 * Initialize the device array before probing devices. If not called, the first device probed would 161 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 162 * 163 * @param dev_max 164 * Maximum number of devices. 165 * 166 * @return 167 * 0 on success, -rte_errno otherwise: 168 * - ENOMEM if out of memory 169 * - EINVAL if 0 size 170 * - EBUSY if already initialized 171 */ 172 __rte_experimental 173 int 174 rte_ml_dev_init(size_t dev_max); 175 176 /** 177 * Get the total number of ML devices that have been successfully initialised. 178 * 179 * @return 180 * - The total number of usable ML devices. 181 */ 182 __rte_experimental 183 uint16_t 184 rte_ml_dev_count(void); 185 186 /** 187 * Check if the device is in ready state. 188 * 189 * @param dev_id 190 * The identifier of the device. 191 * 192 * @return 193 * - 0 if device state is not in ready state. 194 * - 1 if device state is ready state. 195 */ 196 __rte_experimental 197 int 198 rte_ml_dev_is_valid_dev(int16_t dev_id); 199 200 /** 201 * Return the NUMA socket to which a device is connected. 202 * 203 * @param dev_id 204 * The identifier of the device. 205 * 206 * @return 207 * - The NUMA socket id to which the device is connected 208 * - 0 If the socket could not be determined. 209 * - -EINVAL: if the dev_id value is not valid. 210 */ 211 __rte_experimental 212 int 213 rte_ml_dev_socket_id(int16_t dev_id); 214 215 /** ML device information */ 216 struct rte_ml_dev_info { 217 const char *driver_name; 218 /**< Driver name */ 219 uint16_t max_models; 220 /**< Maximum number of models supported by the device. 221 * @see struct rte_ml_dev_config::nb_models 222 */ 223 uint16_t max_queue_pairs; 224 /**< Maximum number of queues pairs supported by the device. 225 * @see struct rte_ml_dev_config::nb_queue_pairs 226 */ 227 uint16_t max_desc; 228 /**< Maximum allowed number of descriptors for queue pair by the device. 229 * @see struct rte_ml_dev_qp_conf::nb_desc 230 */ 231 uint16_t max_segments; 232 /**< Maximum number of scatter-gather entries supported by the device. 233 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 234 */ 235 uint16_t min_align_size; 236 /**< Minimum alignment size of IO buffers used by the device. */ 237 }; 238 239 /** 240 * Retrieve the information of the device. 241 * 242 * @param dev_id 243 * The identifier of the device. 244 * @param dev_info 245 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 246 * 247 * @return 248 * - 0: Success, driver updates the information of the ML device 249 * - < 0: Error code returned by the driver info get function. 250 */ 251 __rte_experimental 252 int 253 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 254 255 /** ML device configuration structure */ 256 struct rte_ml_dev_config { 257 int socket_id; 258 /**< Socket to allocate resources on. */ 259 uint16_t nb_models; 260 /**< Number of models to be loaded on the device. 261 * This value cannot exceed the max_models which is previously provided in 262 * struct rte_ml_dev_info::max_models 263 */ 264 uint16_t nb_queue_pairs; 265 /**< Number of queue pairs to configure on this device. 266 * This value cannot exceed the max_models which is previously provided in 267 * struct rte_ml_dev_info::max_queue_pairs 268 */ 269 }; 270 271 /** 272 * Configure an ML device. 273 * 274 * This function must be invoked first before any other function in the API. 275 * 276 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 277 * rte_ml_dev_close() is called. 278 * 279 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 280 * this ML device. 281 * 282 * @param dev_id 283 * The identifier of the device to configure. 284 * @param config 285 * The ML device configuration structure. 286 * 287 * @return 288 * - 0: Success, device configured. 289 * - < 0: Error code returned by the driver configuration function. 290 */ 291 __rte_experimental 292 int 293 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 294 295 /* Forward declaration */ 296 struct rte_ml_op; 297 298 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 299 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 300 301 /** ML device queue pair configuration structure. */ 302 struct rte_ml_dev_qp_conf { 303 uint32_t nb_desc; 304 /**< Number of descriptors per queue pair. 305 * This value cannot exceed the max_desc which previously provided in 306 * struct rte_ml_dev_info:max_desc 307 */ 308 rte_ml_dev_stop_flush_t cb; 309 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 310 * Value NULL is allowed, in which case callback will not be invoked. 311 * This function can be used to properly dispose of outstanding ML ops from all 312 * queue pairs, for example ops containing memory pointers. 313 * @see rte_ml_dev_stop() 314 */ 315 }; 316 317 /** 318 * Set up a queue pair for a device. This should only be called when the device is stopped. 319 * 320 * @param dev_id 321 * The identifier of the device. 322 * @param queue_pair_id 323 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 324 * previously supplied to rte_ml_dev_configure(). 325 * @param qp_conf 326 * The pointer to the configuration data to be used for the queue pair. 327 * @param socket_id 328 * The *socket_id* argument is the socket identifier in case of NUMA. 329 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 330 * for the queue pair. 331 * 332 * @return 333 * - 0: Success, queue pair correctly set up. 334 * - < 0: Queue pair configuration failed. 335 */ 336 __rte_experimental 337 int 338 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 339 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 340 341 /** 342 * Start an ML device. 343 * 344 * The device start step consists of setting the configured features and enabling the ML device 345 * to accept inference jobs. 346 * 347 * @param dev_id 348 * The identifier of the device. 349 * 350 * @return 351 * - 0: Success, device started. 352 * - <0: Error code of the driver device start function. 353 */ 354 __rte_experimental 355 int 356 rte_ml_dev_start(int16_t dev_id); 357 358 /** 359 * Stop an ML device. A stopped device cannot accept inference jobs. 360 * The device can be restarted with a call to rte_ml_dev_start(). 361 * 362 * @param dev_id 363 * The identifier of the device. 364 * 365 * @return 366 * - 0: Success, device stopped. 367 * - <0: Error code of the driver device stop function. 368 */ 369 __rte_experimental 370 int 371 rte_ml_dev_stop(int16_t dev_id); 372 373 /** 374 * Close an ML device. The device cannot be restarted! 375 * 376 * @param dev_id 377 * The identifier of the device. 378 * 379 * @return 380 * - 0 on successfully closing device. 381 * - <0 on failure to close device. 382 */ 383 __rte_experimental 384 int 385 rte_ml_dev_close(int16_t dev_id); 386 387 /** Status of ML operation */ 388 enum rte_ml_op_status { 389 RTE_ML_OP_STATUS_SUCCESS = 0, 390 /**< Operation completed successfully */ 391 RTE_ML_OP_STATUS_NOT_PROCESSED, 392 /**< Operation has not yet been processed by the device. */ 393 RTE_ML_OP_STATUS_ERROR, 394 /**< Operation completed with error. 395 * Application can invoke rte_ml_op_error_get() to get PMD specific 396 * error code if needed. 397 */ 398 }; 399 400 /** ML operation's input and output buffer representation as scatter gather list 401 */ 402 struct rte_ml_buff_seg { 403 rte_iova_t iova_addr; 404 /**< IOVA address of segment buffer. */ 405 void *addr; 406 /**< Virtual address of segment buffer. */ 407 uint32_t length; 408 /**< Segment length. */ 409 uint32_t reserved; 410 /**< Reserved for future use. */ 411 struct rte_ml_buff_seg *next; 412 /**< Points to next segment. Value NULL represents the last segment. */ 413 }; 414 415 /** 416 * ML Operation. 417 * 418 * This structure contains data related to performing an ML operation on the buffers using 419 * the model specified through model_id. 420 */ 421 struct rte_ml_op { 422 uint16_t model_id; 423 /**< Model ID to be used for the operation. */ 424 uint16_t nb_batches; 425 /**< Number of batches. Minimum value must be one. 426 * Input buffer must hold inference data for each batch as contiguous. 427 */ 428 uint32_t reserved; 429 /**< Reserved for future use. */ 430 struct rte_mempool *mempool; 431 /**< Pool from which operation is allocated. */ 432 struct rte_ml_buff_seg input; 433 /**< Input buffer to hold the inference data. */ 434 struct rte_ml_buff_seg output; 435 /**< Output buffer to hold the inference output by the driver. */ 436 RTE_STD_C11 437 union { 438 uint64_t user_u64; 439 /**< User data as uint64_t.*/ 440 void *user_ptr; 441 /**< User data as void*.*/ 442 }; 443 enum rte_ml_op_status status; 444 /**< Operation status. */ 445 uint64_t impl_opaque; 446 /**< Implementation specific opaque value. 447 * An implementation may use this field to hold 448 * implementation specific value to share between 449 * dequeue and enqueue operation. 450 * The application should not modify this field. 451 */ 452 } __rte_cache_aligned; 453 454 /* Enqueue/Dequeue operations */ 455 456 /** 457 * Enqueue a burst of ML inferences for processing on an ML device. 458 * 459 * The rte_ml_enqueue_burst() function is invoked to place ML inference 460 * operations on the queue *qp_id* of the device designated by its *dev_id*. 461 * 462 * The *nb_ops* parameter is the number of inferences to process which are 463 * supplied in the *ops* array of *rte_ml_op* structures. 464 * 465 * The rte_ml_enqueue_burst() function returns the number of inferences it 466 * actually enqueued for processing. A return value equal to *nb_ops* means that 467 * all packets have been enqueued. 468 * 469 * @param dev_id 470 * The identifier of the device. 471 * @param qp_id 472 * The index of the queue pair which inferences are to be enqueued for processing. 473 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 474 * *rte_ml_dev_configure*. 475 * @param ops 476 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 477 * ML inferences to be processed. 478 * @param nb_ops 479 * The number of operations to process. 480 * 481 * @return 482 * The number of inference operations actually enqueued to the ML device. 483 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 484 * is full or if invalid parameters are specified in a *rte_ml_op*. 485 */ 486 __rte_experimental 487 uint16_t 488 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 489 490 /** 491 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 492 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 493 * in the *ops* array. 494 * 495 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 496 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 497 * 498 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 499 * and this is likely to signify that other processed operations remain in the devices output queue. 500 * Application implementing a "retrieve as many processed operations as possible" policy can check 501 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 502 * *nb_ops* is returned. 503 * 504 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 505 * the corresponding overhead. 506 * 507 * @param dev_id 508 * The identifier of the device. 509 * @param qp_id 510 * The index of the queue pair from which to retrieve processed packets. 511 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 512 * rte_ml_dev_configure(). 513 * @param ops 514 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 515 * store *nb_ops* pointers in it. 516 * @param nb_ops 517 * The maximum number of inferences to dequeue. 518 * 519 * @return 520 * The number of operations actually dequeued, which is the number of pointers 521 * to *rte_ml_op* structures effectively supplied to the *ops* array. 522 */ 523 __rte_experimental 524 uint16_t 525 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 526 527 /** 528 * Verbose error structure definition. 529 */ 530 struct rte_ml_op_error { 531 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 532 uint64_t errcode; /**< Vendor specific error code. */ 533 }; 534 535 /** 536 * Get PMD specific error information for an ML op. 537 * 538 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 539 * This API allows to get PMD specific error details. 540 * 541 * @param[in] dev_id 542 * Device identifier 543 * @param[in] op 544 * Handle of ML operation 545 * @param[in] error 546 * Address of structure rte_ml_op_error to be filled 547 * 548 * @return 549 * - Returns 0 on success 550 * - Returns negative value on failure 551 */ 552 __rte_experimental 553 int 554 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 555 556 /* Statistics operations */ 557 558 /** Device statistics. */ 559 struct rte_ml_dev_stats { 560 uint64_t enqueued_count; 561 /**< Count of all operations enqueued */ 562 uint64_t dequeued_count; 563 /**< Count of all operations dequeued */ 564 uint64_t enqueue_err_count; 565 /**< Total error count on operations enqueued */ 566 uint64_t dequeue_err_count; 567 /**< Total error count on operations dequeued */ 568 }; 569 570 /** 571 * Retrieve the general I/O statistics of a device. 572 * 573 * @param dev_id 574 * The identifier of the device. 575 * @param stats 576 * Pointer to structure to where statistics will be copied. 577 * On error, this location may or may not have been modified. 578 * @return 579 * - 0 on success 580 * - -EINVAL: If invalid parameter pointer is provided. 581 */ 582 __rte_experimental 583 int 584 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 585 586 /** 587 * Reset the statistics of a device. 588 * 589 * @param dev_id 590 * The identifier of the device. 591 */ 592 __rte_experimental 593 void 594 rte_ml_dev_stats_reset(int16_t dev_id); 595 596 /** 597 * A name-key lookup element for extended statistics. 598 * 599 * This structure is used to map between names and ID numbers for extended ML device statistics. 600 */ 601 struct rte_ml_dev_xstats_map { 602 uint16_t id; 603 /**< xstat identifier */ 604 char name[RTE_ML_STR_MAX]; 605 /**< xstat name */ 606 }; 607 608 /** 609 * Retrieve names of extended statistics of an ML device. 610 * 611 * @param dev_id 612 * The identifier of the device. 613 * @param[out] xstats_map 614 * Block of memory to insert id and names into. Must be at least size in capacity. 615 * If set to NULL, function returns required capacity. 616 * @param size 617 * Capacity of xstats_map (number of name-id maps). 618 * 619 * @return 620 * - Positive value on success: 621 * - The return value is the number of entries filled in the stats map. 622 * - If xstats_map set to NULL then required capacity for xstats_map. 623 * - Negative value on error: 624 * - -ENODEV: for invalid *dev_id*. 625 * - -ENOTSUP: if the device doesn't support this function. 626 */ 627 __rte_experimental 628 int 629 rte_ml_dev_xstats_names_get(int16_t dev_id, struct rte_ml_dev_xstats_map *xstats_map, 630 uint32_t size); 631 632 /** 633 * Retrieve the value of a single stat by requesting it by name. 634 * 635 * @param dev_id 636 * The identifier of the device. 637 * @param name 638 * The stat name to retrieve. 639 * @param stat_id 640 * If non-NULL, the numerical id of the stat will be returned, so that further requests for 641 * the stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to 642 * scan a list of names for the stat. 643 * @param[out] value 644 * Must be non-NULL, retrieved xstat value will be stored in this address. 645 * 646 * @return 647 * - 0: Successfully retrieved xstat value. 648 * - -EINVAL: invalid parameters. 649 * - -ENOTSUP: if not supported. 650 */ 651 __rte_experimental 652 int 653 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 654 655 /** 656 * Retrieve extended statistics of an ML device. 657 * 658 * @param dev_id 659 * The identifier of the device. 660 * @param stat_ids 661 * The id numbers of the stats to get. The ids can be fetched from the stat position in the 662 * stat list from rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 663 * @param values 664 * The values for each stats request by ID. 665 * @param nb_ids 666 * The number of stats requested. 667 * @return 668 * - Positive value: number of stat entries filled into the values array 669 * - Negative value on error: 670 * - -ENODEV: for invalid *dev_id*. 671 * - -ENOTSUP: if the device doesn't support this function. 672 */ 673 __rte_experimental 674 int 675 rte_ml_dev_xstats_get(int16_t dev_id, const uint16_t *stat_ids, uint64_t *values, uint16_t nb_ids); 676 677 /** 678 * Reset the values of the xstats of the selected component in the device. 679 * 680 * @param dev_id 681 * The identifier of the device. 682 * @param stat_ids 683 * Selects specific statistics to be reset. When NULL, all statistics will be reset. 684 * If non-NULL, must point to array of at least *nb_ids* size. 685 * @param nb_ids 686 * The number of ids available from the *ids* array. Ignored when ids is NULL. 687 * @return 688 * - 0: Successfully reset the statistics to zero. 689 * - -EINVAL: invalid parameters. 690 * - -ENOTSUP: if not supported. 691 */ 692 __rte_experimental 693 int 694 rte_ml_dev_xstats_reset(int16_t dev_id, const uint16_t *stat_ids, uint16_t nb_ids); 695 696 /* Utility operations */ 697 698 /** 699 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 700 * 701 * @param dev_id 702 * The identifier of the device. 703 * @param fd 704 * A pointer to a file for output. 705 * @return 706 * - 0: on success. 707 * - <0: on failure. 708 */ 709 __rte_experimental 710 int 711 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 712 713 /** 714 * Trigger the ML device self test. 715 * 716 * @param dev_id 717 * The identifier of the device. 718 * @return 719 * - 0: Selftest successful. 720 * - -ENOTSUP: if the device doesn't support selftest. 721 * - other values < 0 on failure. 722 */ 723 __rte_experimental 724 int 725 rte_ml_dev_selftest(int16_t dev_id); 726 727 /* Model operations */ 728 729 /** ML model load parameters 730 * 731 * Parameters required to load an ML model. 732 */ 733 struct rte_ml_model_params { 734 void *addr; 735 /**< Address of model buffer */ 736 size_t size; 737 /**< Size of model buffer */ 738 }; 739 740 /** 741 * Load an ML model to the device. 742 * 743 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 744 * 745 * @param[in] dev_id 746 * The identifier of the device. 747 * @param[in] params 748 * Parameters for the model to be loaded. 749 * @param[out] model_id 750 * Identifier of the model loaded. 751 * 752 * @return 753 * - 0: Success, Model loaded. 754 * - < 0: Failure, Error code of the model load driver function. 755 */ 756 __rte_experimental 757 int 758 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 759 760 /** 761 * Unload an ML model from the device. 762 * 763 * @param[in] dev_id 764 * The identifier of the device. 765 * @param[in] model_id 766 * Identifier of the model to be unloaded. 767 * 768 * @return 769 * - 0: Success, Model unloaded. 770 * - < 0: Failure, Error code of the model unload driver function. 771 */ 772 __rte_experimental 773 int 774 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 775 776 /** 777 * Start an ML model for the given device ID. 778 * 779 * Start an ML model to accept inference requests. 780 * 781 * @param[in] dev_id 782 * The identifier of the device. 783 * @param[in] model_id 784 * Identifier of the model to be started. 785 * 786 * @return 787 * - 0: Success, Model loaded. 788 * - < 0: Failure, Error code of the model start driver function. 789 */ 790 __rte_experimental 791 int 792 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 793 794 /** 795 * Stop an ML model for the given device ID. 796 * 797 * Model stop would disable the ML model to be used for inference jobs. 798 * All inference jobs must have been completed before model stop is attempted. 799 800 * @param[in] dev_id 801 * The identifier of the device. 802 * @param[in] model_id 803 * Identifier of the model to be stopped. 804 * 805 * @return 806 * - 0: Success, Model unloaded. 807 * - < 0: Failure, Error code of the model stop driver function. 808 */ 809 __rte_experimental 810 int 811 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 812 813 /** 814 * Input and output data types. ML models can operate on reduced precision 815 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 816 * This enum is used to represent the lower precision integer and floating point types used 817 * by ML models. 818 */ 819 enum rte_ml_io_type { 820 RTE_ML_IO_TYPE_UNKNOWN = 0, 821 /**< Invalid or unknown type */ 822 RTE_ML_IO_TYPE_INT8, 823 /**< 8-bit integer */ 824 RTE_ML_IO_TYPE_UINT8, 825 /**< 8-bit unsigned integer */ 826 RTE_ML_IO_TYPE_INT16, 827 /**< 16-bit integer */ 828 RTE_ML_IO_TYPE_UINT16, 829 /**< 16-bit unsigned integer */ 830 RTE_ML_IO_TYPE_INT32, 831 /**< 32-bit integer */ 832 RTE_ML_IO_TYPE_UINT32, 833 /**< 32-bit unsigned integer */ 834 RTE_ML_IO_TYPE_FP8, 835 /**< 8-bit floating point number */ 836 RTE_ML_IO_TYPE_FP16, 837 /**< IEEE 754 16-bit floating point number */ 838 RTE_ML_IO_TYPE_FP32, 839 /**< IEEE 754 32-bit floating point number */ 840 RTE_ML_IO_TYPE_BFLOAT16 841 /**< 16-bit brain floating point number. */ 842 }; 843 844 /** 845 * Input and output format. This is used to represent the encoding type of multi-dimensional 846 * used by ML models. 847 */ 848 enum rte_ml_io_format { 849 RTE_ML_IO_FORMAT_NCHW = 1, 850 /**< Batch size (N) x channels (C) x height (H) x width (W) */ 851 RTE_ML_IO_FORMAT_NHWC, 852 /**< Batch size (N) x height (H) x width (W) x channels (C) */ 853 RTE_ML_IO_FORMAT_CHWN, 854 /**< Channels (C) x height (H) x width (W) x batch size (N) */ 855 RTE_ML_IO_FORMAT_3D, 856 /**< Format to represent a 3 dimensional data */ 857 RTE_ML_IO_FORMAT_2D, 858 /**< Format to represent matrix data */ 859 RTE_ML_IO_FORMAT_1D, 860 /**< Format to represent vector data */ 861 RTE_ML_IO_FORMAT_SCALAR, 862 /**< Format to represent scalar data */ 863 }; 864 865 /** 866 * Input and output shape. This structure represents the encoding format and dimensions 867 * of the tensor or vector. 868 * 869 * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used 870 * for the data would depend on the format. Unused dimensions to be set to 1. 871 */ 872 struct rte_ml_io_shape { 873 enum rte_ml_io_format format; 874 /**< Format of the data */ 875 uint32_t w; 876 /**< First dimension */ 877 uint32_t x; 878 /**< Second dimension */ 879 uint32_t y; 880 /**< Third dimension */ 881 uint32_t z; 882 /**< Fourth dimension */ 883 }; 884 885 /** Input and output data information structure 886 * 887 * Specifies the type and shape of input and output data. 888 */ 889 struct rte_ml_io_info { 890 char name[RTE_ML_STR_MAX]; 891 /**< Name of data */ 892 struct rte_ml_io_shape shape; 893 /**< Shape of data */ 894 enum rte_ml_io_type qtype; 895 /**< Type of quantized data */ 896 enum rte_ml_io_type dtype; 897 /**< Type of de-quantized data */ 898 }; 899 900 /** Model information structure */ 901 struct rte_ml_model_info { 902 char name[RTE_ML_STR_MAX]; 903 /**< Model name. */ 904 char version[RTE_ML_STR_MAX]; 905 /**< Model version */ 906 uint16_t model_id; 907 /**< Model ID */ 908 uint16_t device_id; 909 /**< Device ID */ 910 uint16_t batch_size; 911 /**< Maximum number of batches that the model can process simultaneously */ 912 uint32_t nb_inputs; 913 /**< Number of inputs */ 914 const struct rte_ml_io_info *input_info; 915 /**< Input info array. Array size is equal to nb_inputs */ 916 uint32_t nb_outputs; 917 /**< Number of outputs */ 918 const struct rte_ml_io_info *output_info; 919 /**< Output info array. Array size is equal to nb_output */ 920 uint64_t wb_size; 921 /**< Size of model weights and bias */ 922 }; 923 924 /** 925 * Get ML model information. 926 * 927 * @param[in] dev_id 928 * The identifier of the device. 929 * @param[in] model_id 930 * Identifier for the model created 931 * @param[out] model_info 932 * Pointer to a model info structure 933 * 934 * @return 935 * - Returns 0 on success 936 * - Returns negative value on failure 937 */ 938 __rte_experimental 939 int 940 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 941 942 /** 943 * Update the model parameters without unloading model. 944 * 945 * Update model parameters such as weights and bias without unloading the model. 946 * rte_ml_model_stop() must be called before invoking this API. 947 * 948 * @param[in] dev_id 949 * The identifier of the device. 950 * @param[in] model_id 951 * Identifier for the model created 952 * @param[in] buffer 953 * Pointer to the model weights and bias buffer. 954 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 955 * 956 * @return 957 * - Returns 0 on success 958 * - Returns negative value on failure 959 */ 960 __rte_experimental 961 int 962 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 963 964 /* IO operations */ 965 966 /** 967 * Get size of quantized and dequantized input buffers. 968 * 969 * Calculate the size of buffers required for quantized and dequantized input data. 970 * This API would return the buffer sizes for the number of batches provided and would 971 * consider the alignment requirements as per the PMD. Input sizes computed by this API can 972 * be used by the application to allocate buffers. 973 * 974 * @param[in] dev_id 975 * The identifier of the device. 976 * @param[in] model_id 977 * Identifier for the model created 978 * @param[in] nb_batches 979 * Number of batches of input to be processed in a single inference job 980 * @param[out] input_qsize 981 * Quantized input size pointer. 982 * NULL value is allowed, in which case input_qsize is not calculated by the driver. 983 * @param[out] input_dsize 984 * Dequantized input size pointer. 985 * NULL value is allowed, in which case input_dsize is not calculated by the driver. 986 * 987 * @return 988 * - Returns 0 on success 989 * - Returns negative value on failure 990 */ 991 __rte_experimental 992 int 993 rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 994 uint64_t *input_qsize, uint64_t *input_dsize); 995 996 /** 997 * Get size of quantized and dequantized output buffers. 998 * 999 * Calculate the size of buffers required for quantized and dequantized output data. 1000 * This API would return the buffer sizes for the number of batches provided and would consider 1001 * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the 1002 * application to allocate buffers. 1003 * 1004 * @param[in] dev_id 1005 * The identifier of the device. 1006 * @param[in] model_id 1007 * Identifier for the model created 1008 * @param[in] nb_batches 1009 * Number of batches of input to be processed in a single inference job 1010 * @param[out] output_qsize 1011 * Quantized output size pointer. 1012 * NULL value is allowed, in which case output_qsize is not calculated by the driver. 1013 * @param[out] output_dsize 1014 * Dequantized output size pointer. 1015 * NULL value is allowed, in which case output_dsize is not calculated by the driver. 1016 * 1017 * @return 1018 * - Returns 0 on success 1019 * - Returns negative value on failure 1020 */ 1021 __rte_experimental 1022 int 1023 rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, 1024 uint64_t *output_qsize, uint64_t *output_dsize); 1025 1026 /** 1027 * Quantize input data. 1028 * 1029 * Quantization converts data from a higher precision types to a lower precision types to improve 1030 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1031 * Types of dequantized data and quantized data are specified by the model. 1032 * 1033 * @param[in] dev_id 1034 * The identifier of the device. 1035 * @param[in] model_id 1036 * Identifier for the model 1037 * @param[in] nb_batches 1038 * Number of batches in the dequantized input buffer 1039 * @param[in] dbuffer 1040 * Address of dequantized input data 1041 * @param[in] qbuffer 1042 * Address of quantized input data 1043 * 1044 * @return 1045 * - Returns 0 on success 1046 * - Returns negative value on failure 1047 */ 1048 __rte_experimental 1049 int 1050 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer, 1051 void *qbuffer); 1052 1053 /** 1054 * Dequantize output data. 1055 * 1056 * Dequantization converts data from a lower precision type to a higher precision type. 1057 * Types of quantized data and dequantized are specified by the model. 1058 * 1059 * @param[in] dev_id 1060 * The identifier of the device. 1061 * @param[in] model_id 1062 * Identifier for the model 1063 * @param[in] nb_batches 1064 * Number of batches in the dequantized output buffer 1065 * @param[in] qbuffer 1066 * Address of quantized output data 1067 * @param[in] dbuffer 1068 * Address of dequantized output data 1069 * 1070 * @return 1071 * - Returns 0 on success 1072 * - Returns negative value on failure 1073 */ 1074 __rte_experimental 1075 int 1076 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer, 1077 void *dbuffer); 1078 1079 /* ML op pool operations */ 1080 1081 /** 1082 * Create an ML operation pool 1083 * 1084 * @param name 1085 * ML operations pool name 1086 * @param nb_elts 1087 * Number of elements in pool 1088 * @param cache_size 1089 * Number of elements to cache on lcore, see 1090 * *rte_mempool_create* for further details about cache size 1091 * @param user_size 1092 * Size of private data to allocate for user with each operation 1093 * @param socket_id 1094 * Socket to identifier allocate memory on 1095 * @return 1096 * - On success pointer to mempool 1097 * - On failure NULL 1098 */ 1099 __rte_experimental 1100 struct rte_mempool * 1101 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1102 uint16_t user_size, int socket_id); 1103 1104 /** 1105 * Free an ML operation pool 1106 * 1107 * @param mempool 1108 * A pointer to the mempool structure. 1109 * If NULL then, the function does nothing. 1110 */ 1111 __rte_experimental 1112 void 1113 rte_ml_op_pool_free(struct rte_mempool *mempool); 1114 1115 #ifdef __cplusplus 1116 } 1117 #endif 1118 1119 #endif /* RTE_MLDEV_H */ 1120