1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #ifndef RTE_MLDEV_H 6 #define RTE_MLDEV_H 7 8 /** 9 * @file rte_mldev.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * ML (Machine Learning) device API. 16 * 17 * The ML framework is built on the following model: 18 * 19 * 20 * +-----------------+ rte_ml_[en|de]queue_burst() 21 * | | | 22 * | Machine o------+ +--------+ | 23 * | Learning | | | queue | | +------+ 24 * | Inference o------+-----o |<===o===>|Core 0| 25 * | Engine | | | pair 0 | +------+ 26 * | o----+ | +--------+ 27 * | | | | 28 * +-----------------+ | | +--------+ 29 * ^ | | | queue | +------+ 30 * | | +-----o |<=======>|Core 1| 31 * | | | pair 1 | +------+ 32 * | | +--------+ 33 * +--------+--------+ | 34 * | +-------------+ | | +--------+ 35 * | | Model 0 | | | | queue | +------+ 36 * | +-------------+ | +-------o |<=======>|Core N| 37 * | +-------------+ | | pair N | +------+ 38 * | | Model 1 | | +--------+ 39 * | +-------------+ | 40 * | +-------------+ |<------> rte_ml_model_load() 41 * | | Model .. | |-------> rte_ml_model_info_get() 42 * | +-------------+ |<------- rte_ml_model_start() 43 * | +-------------+ |<------- rte_ml_model_stop() 44 * | | Model N | |<------- rte_ml_model_params_update() 45 * | +-------------+ |<------- rte_ml_model_unload() 46 * +-----------------+ 47 * 48 * ML Device: A hardware or software-based implementation of ML device API for 49 * running inferences using a pre-trained ML model. 50 * 51 * ML Model: An ML model is an algorithm trained over a dataset. A model consists of 52 * procedure/algorithm and data/pattern required to make predictions on live data. 53 * Once the model is created and trained outside of the DPDK scope, the model can be loaded 54 * via rte_ml_model_load() and then start it using rte_ml_model_start() API. 55 * The rte_ml_model_params_update() can be used to update the model parameters such as weight 56 * and bias without unloading the model using rte_ml_model_unload(). 57 * 58 * ML Inference: ML inference is the process of feeding data to the model via 59 * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated 60 * outputs/predictions from the started model. 61 * 62 * In all functions of the ML device API, the ML device is designated by an 63 * integer >= 0 named as device identifier *dev_id*. 64 * 65 * The functions exported by the ML device API to setup a device designated by 66 * its device identifier must be invoked in the following order: 67 * 68 * - rte_ml_dev_configure() 69 * - rte_ml_dev_queue_pair_setup() 70 * - rte_ml_dev_start() 71 * 72 * A model is required to run the inference operations with the user specified inputs. 73 * Application needs to invoke the ML model API in the following order before queueing 74 * inference jobs. 75 * 76 * - rte_ml_model_load() 77 * - rte_ml_model_start() 78 * 79 * A model can be loaded on a device only after the device has been configured and can be 80 * started or stopped only after a device has been started. 81 * 82 * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. 83 * The information would include the shape and type of input and output required for the inference. 84 * 85 * Data quantization and dequantization is one of the main aspects in ML domain. This involves 86 * conversion of input data from a higher precision to a lower precision data type and vice-versa 87 * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and 88 * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input 89 * and output buffers holding data for multiple batches. 90 * 91 * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the 92 * size of quantized and de-quantized multi-batch input and output buffers. 93 * 94 * User can optionally update the model parameters with rte_ml_model_params_update() after 95 * invoking rte_ml_model_stop() API on a given model ID. 96 * 97 * The application can invoke, in any order, the functions exported by the ML API to enqueue 98 * inference jobs and dequeue inference response. 99 * 100 * If the application wants to change the device configuration (i.e., call 101 * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the 102 * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then 103 * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API 104 * for the given model. The application does not need to call rte_ml_dev_stop() API for 105 * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. 106 * 107 * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in 108 * start state after invoking rte_ml_model_start() API, then the application can call 109 * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. 110 * 111 * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. 112 * 113 * Typical application utilisation of the ML API will follow the following 114 * programming flow. 115 * 116 * - rte_ml_dev_configure() 117 * - rte_ml_dev_queue_pair_setup() 118 * - rte_ml_model_load() 119 * - rte_ml_dev_start() 120 * - rte_ml_model_start() 121 * - rte_ml_model_info_get() 122 * - rte_ml_enqueue_burst() 123 * - rte_ml_dequeue_burst() 124 * - rte_ml_model_stop() 125 * - rte_ml_model_unload() 126 * - rte_ml_dev_stop() 127 * - rte_ml_dev_close() 128 * 129 * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD 130 * are lock-free functions which assume to not be invoked in parallel on different logical cores 131 * on the same target object. For instance, the dequeue function of a poll mode driver cannot be 132 * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function 133 * can be invoked in parallel by different logical core on different queue pair. 134 * It is the responsibility of the user application to enforce this rule. 135 */ 136 137 #include <rte_common.h> 138 #include <rte_log.h> 139 #include <rte_mempool.h> 140 141 #ifdef __cplusplus 142 extern "C" { 143 #endif 144 145 /* Logging Macro */ 146 extern int rte_ml_dev_logtype; 147 #define RTE_LOGTYPE_MLDEV rte_ml_dev_logtype 148 149 #define RTE_MLDEV_LOG(level, ...) \ 150 RTE_LOG_LINE_PREFIX(level, MLDEV, "%s(): ", __func__, __VA_ARGS__) 151 152 #define RTE_ML_STR_MAX 128 153 /**< Maximum length of name string */ 154 155 #define RTE_MLDEV_DEFAULT_MAX 32 156 /** Maximum number of devices if rte_ml_dev_init() is not called. */ 157 158 /* Device operations */ 159 160 /** 161 * Initialize the device array before probing devices. If not called, the first device probed would 162 * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. 163 * 164 * @param dev_max 165 * Maximum number of devices. 166 * 167 * @return 168 * 0 on success, -rte_errno otherwise: 169 * - ENOMEM if out of memory 170 * - EINVAL if 0 size 171 * - EBUSY if already initialized 172 */ 173 __rte_experimental 174 int 175 rte_ml_dev_init(size_t dev_max); 176 177 /** 178 * Get the total number of ML devices that have been successfully initialised. 179 * 180 * @return 181 * - The total number of usable ML devices. 182 */ 183 __rte_experimental 184 uint16_t 185 rte_ml_dev_count(void); 186 187 /** 188 * Check if the device is in ready state. 189 * 190 * @param dev_id 191 * The identifier of the device. 192 * 193 * @return 194 * - 0 if device state is not in ready state. 195 * - 1 if device state is ready state. 196 */ 197 __rte_experimental 198 int 199 rte_ml_dev_is_valid_dev(int16_t dev_id); 200 201 /** 202 * Return the NUMA socket to which a device is connected. 203 * 204 * @param dev_id 205 * The identifier of the device. 206 * 207 * @return 208 * - The NUMA socket id to which the device is connected 209 * - 0 If the socket could not be determined. 210 * - -EINVAL: if the dev_id value is not valid. 211 */ 212 __rte_experimental 213 int 214 rte_ml_dev_socket_id(int16_t dev_id); 215 216 /** ML device information */ 217 struct rte_ml_dev_info { 218 const char *driver_name; 219 /**< Driver name */ 220 uint16_t max_models; 221 /**< Maximum number of models supported by the device. 222 * @see struct rte_ml_dev_config::nb_models 223 */ 224 uint16_t max_queue_pairs; 225 /**< Maximum number of queues pairs supported by the device. 226 * @see struct rte_ml_dev_config::nb_queue_pairs 227 */ 228 uint16_t max_desc; 229 /**< Maximum allowed number of descriptors for queue pair by the device. 230 * @see struct rte_ml_dev_qp_conf::nb_desc 231 */ 232 uint16_t max_io; 233 /**< Maximum number of inputs/outputs supported per model. */ 234 uint16_t max_segments; 235 /**< Maximum number of scatter-gather entries supported by the device. 236 * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next 237 */ 238 uint16_t align_size; 239 /**< Alignment size of IO buffers used by the device. */ 240 }; 241 242 /** 243 * Retrieve the information of the device. 244 * 245 * @param dev_id 246 * The identifier of the device. 247 * @param dev_info 248 * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. 249 * 250 * @return 251 * - 0: Success, driver updates the information of the ML device 252 * - < 0: Error code returned by the driver info get function. 253 */ 254 __rte_experimental 255 int 256 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); 257 258 /** ML device configuration structure */ 259 struct rte_ml_dev_config { 260 int socket_id; 261 /**< Socket to allocate resources on. */ 262 uint16_t nb_models; 263 /**< Number of models to be loaded on the device. 264 * This value cannot exceed the max_models which is previously provided in 265 * struct rte_ml_dev_info::max_models 266 */ 267 uint16_t nb_queue_pairs; 268 /**< Number of queue pairs to configure on this device. 269 * This value cannot exceed the max_models which is previously provided in 270 * struct rte_ml_dev_info::max_queue_pairs 271 */ 272 }; 273 274 /** 275 * Configure an ML device. 276 * 277 * This function must be invoked first before any other function in the API. 278 * 279 * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after 280 * rte_ml_dev_close() is called. 281 * 282 * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for 283 * this ML device. 284 * 285 * @param dev_id 286 * The identifier of the device to configure. 287 * @param config 288 * The ML device configuration structure. 289 * 290 * @return 291 * - 0: Success, device configured. 292 * - < 0: Error code returned by the driver configuration function. 293 */ 294 __rte_experimental 295 int 296 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); 297 298 /* Forward declaration */ 299 struct rte_ml_op; 300 301 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ 302 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); 303 304 /** ML device queue pair configuration structure. */ 305 struct rte_ml_dev_qp_conf { 306 uint32_t nb_desc; 307 /**< Number of descriptors per queue pair. 308 * This value cannot exceed the max_desc which previously provided in 309 * struct rte_ml_dev_info:max_desc 310 */ 311 rte_ml_dev_stop_flush_t cb; 312 /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. 313 * Value NULL is allowed, in which case callback will not be invoked. 314 * This function can be used to properly dispose of outstanding ML ops from all 315 * queue pairs, for example ops containing memory pointers. 316 * @see rte_ml_dev_stop() 317 */ 318 }; 319 320 /** 321 * Get the number of queue pairs on a specific ML device. 322 * 323 * @param dev_id 324 * The identifier of the device. 325 * 326 * @return 327 * - The number of configured queue pairs. 328 */ 329 __rte_experimental 330 uint16_t 331 rte_ml_dev_queue_pair_count(int16_t dev_id); 332 333 /** 334 * Set up a queue pair for a device. This should only be called when the device is stopped. 335 * 336 * @param dev_id 337 * The identifier of the device. 338 * @param queue_pair_id 339 * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] 340 * previously supplied to rte_ml_dev_configure(). 341 * @param qp_conf 342 * The pointer to the configuration data to be used for the queue pair. 343 * @param socket_id 344 * The *socket_id* argument is the socket identifier in case of NUMA. 345 * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated 346 * for the queue pair. 347 * 348 * @return 349 * - 0: Success, queue pair correctly set up. 350 * - < 0: Queue pair configuration failed. 351 */ 352 __rte_experimental 353 int 354 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, 355 const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); 356 357 /** 358 * Start an ML device. 359 * 360 * The device start step consists of setting the configured features and enabling the ML device 361 * to accept inference jobs. 362 * 363 * @param dev_id 364 * The identifier of the device. 365 * 366 * @return 367 * - 0: Success, device started. 368 * - <0: Error code of the driver device start function. 369 */ 370 __rte_experimental 371 int 372 rte_ml_dev_start(int16_t dev_id); 373 374 /** 375 * Stop an ML device. A stopped device cannot accept inference jobs. 376 * The device can be restarted with a call to rte_ml_dev_start(). 377 * 378 * @param dev_id 379 * The identifier of the device. 380 * 381 * @return 382 * - 0: Success, device stopped. 383 * - <0: Error code of the driver device stop function. 384 */ 385 __rte_experimental 386 int 387 rte_ml_dev_stop(int16_t dev_id); 388 389 /** 390 * Close an ML device. The device cannot be restarted! 391 * 392 * @param dev_id 393 * The identifier of the device. 394 * 395 * @return 396 * - 0 on successfully closing device. 397 * - <0 on failure to close device. 398 */ 399 __rte_experimental 400 int 401 rte_ml_dev_close(int16_t dev_id); 402 403 /** Status of ML operation */ 404 enum rte_ml_op_status { 405 RTE_ML_OP_STATUS_SUCCESS = 0, 406 /**< Operation completed successfully */ 407 RTE_ML_OP_STATUS_NOT_PROCESSED, 408 /**< Operation has not yet been processed by the device. */ 409 RTE_ML_OP_STATUS_ERROR, 410 /**< Operation completed with error. 411 * Application can invoke rte_ml_op_error_get() to get PMD specific 412 * error code if needed. 413 */ 414 }; 415 416 /** ML operation's input and output buffer representation as scatter gather list 417 */ 418 struct rte_ml_buff_seg { 419 rte_iova_t iova_addr; 420 /**< IOVA address of segment buffer. */ 421 void *addr; 422 /**< Virtual address of segment buffer. */ 423 uint32_t length; 424 /**< Segment length. */ 425 uint32_t reserved; 426 /**< Reserved for future use. */ 427 struct rte_ml_buff_seg *next; 428 /**< Points to next segment. Value NULL represents the last segment. */ 429 }; 430 431 /** 432 * ML Operation. 433 * 434 * This structure contains data related to performing an ML operation on the buffers using 435 * the model specified through model_id. 436 */ 437 struct __rte_cache_aligned rte_ml_op { 438 uint16_t model_id; 439 /**< Model ID to be used for the operation. */ 440 uint16_t nb_batches; 441 /**< Number of batches. Minimum value must be one. 442 * Input buffer must hold inference data for each batch as contiguous. 443 */ 444 uint32_t reserved; 445 /**< Reserved for future use. */ 446 struct rte_mempool *mempool; 447 /**< Pool from which operation is allocated. */ 448 struct rte_ml_buff_seg **input; 449 /**< Array of buffer segments to hold the inference input data. 450 * 451 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 452 * the array is 1. 453 * 454 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 455 * the array is rte_ml_model_info::nb_inputs. 456 * 457 * @see struct rte_ml_dev_info::io_layout 458 */ 459 struct rte_ml_buff_seg **output; 460 /**< Array of buffer segments to hold the inference output data. 461 * 462 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of 463 * the array is 1. 464 * 465 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of 466 * the array is rte_ml_model_info::nb_outputs. 467 * 468 * @see struct rte_ml_dev_info::io_layout 469 */ 470 union { 471 uint64_t user_u64; 472 /**< User data as uint64_t.*/ 473 void *user_ptr; 474 /**< User data as void*.*/ 475 }; 476 enum rte_ml_op_status status; 477 /**< Operation status. */ 478 uint64_t impl_opaque; 479 /**< Implementation specific opaque value. 480 * An implementation may use this field to hold 481 * implementation specific value to share between 482 * dequeue and enqueue operation. 483 * The application should not modify this field. 484 */ 485 }; 486 487 /* Enqueue/Dequeue operations */ 488 489 /** 490 * Enqueue a burst of ML inferences for processing on an ML device. 491 * 492 * The rte_ml_enqueue_burst() function is invoked to place ML inference 493 * operations on the queue *qp_id* of the device designated by its *dev_id*. 494 * 495 * The *nb_ops* parameter is the number of inferences to process which are 496 * supplied in the *ops* array of *rte_ml_op* structures. 497 * 498 * The rte_ml_enqueue_burst() function returns the number of inferences it 499 * actually enqueued for processing. A return value equal to *nb_ops* means that 500 * all packets have been enqueued. 501 * 502 * @param dev_id 503 * The identifier of the device. 504 * @param qp_id 505 * The index of the queue pair which inferences are to be enqueued for processing. 506 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 507 * *rte_ml_dev_configure*. 508 * @param ops 509 * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the 510 * ML inferences to be processed. 511 * @param nb_ops 512 * The number of operations to process. 513 * 514 * @return 515 * The number of inference operations actually enqueued to the ML device. 516 * The return value can be less than the value of the *nb_ops* parameter when the ML device queue 517 * is full or if invalid parameters are specified in a *rte_ml_op*. 518 */ 519 __rte_experimental 520 uint16_t 521 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 522 523 /** 524 * Dequeue a burst of processed ML inferences operations from a queue on the ML device. 525 * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied 526 * in the *ops* array. 527 * 528 * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, 529 * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. 530 * 531 * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, 532 * and this is likely to signify that other processed operations remain in the devices output queue. 533 * Application implementing a "retrieve as many processed operations as possible" policy can check 534 * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than 535 * *nb_ops* is returned. 536 * 537 * The rte_ml_dequeue_burst() function does not provide any error notification to avoid 538 * the corresponding overhead. 539 * 540 * @param dev_id 541 * The identifier of the device. 542 * @param qp_id 543 * The index of the queue pair from which to retrieve processed packets. 544 * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to 545 * rte_ml_dev_configure(). 546 * @param ops 547 * The address of an array of pointers to *rte_ml_op* structures that must be large enough to 548 * store *nb_ops* pointers in it. 549 * @param nb_ops 550 * The maximum number of inferences to dequeue. 551 * 552 * @return 553 * The number of operations actually dequeued, which is the number of pointers 554 * to *rte_ml_op* structures effectively supplied to the *ops* array. 555 */ 556 __rte_experimental 557 uint16_t 558 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); 559 560 /** 561 * Verbose error structure definition. 562 */ 563 struct rte_ml_op_error { 564 char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ 565 uint64_t errcode; /**< Vendor specific error code. */ 566 }; 567 568 /** 569 * Get PMD specific error information for an ML op. 570 * 571 * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, 572 * This API allows to get PMD specific error details. 573 * 574 * @param[in] dev_id 575 * Device identifier 576 * @param[in] op 577 * Handle of ML operation 578 * @param[in] error 579 * Address of structure rte_ml_op_error to be filled 580 * 581 * @return 582 * - Returns 0 on success 583 * - Returns negative value on failure 584 */ 585 __rte_experimental 586 int 587 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); 588 589 /* Statistics operations */ 590 591 /** Device statistics. */ 592 struct rte_ml_dev_stats { 593 uint64_t enqueued_count; 594 /**< Count of all operations enqueued */ 595 uint64_t dequeued_count; 596 /**< Count of all operations dequeued */ 597 uint64_t enqueue_err_count; 598 /**< Total error count on operations enqueued */ 599 uint64_t dequeue_err_count; 600 /**< Total error count on operations dequeued */ 601 }; 602 603 /** 604 * Retrieve the general I/O statistics of a device. 605 * 606 * @param dev_id 607 * The identifier of the device. 608 * @param stats 609 * Pointer to structure to where statistics will be copied. 610 * On error, this location may or may not have been modified. 611 * @return 612 * - 0 on success 613 * - -EINVAL: If invalid parameter pointer is provided. 614 */ 615 __rte_experimental 616 int 617 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); 618 619 /** 620 * Reset the statistics of a device. 621 * 622 * @param dev_id 623 * The identifier of the device. 624 */ 625 __rte_experimental 626 void 627 rte_ml_dev_stats_reset(int16_t dev_id); 628 629 /** 630 * Selects the component of the mldev to retrieve statistics from. 631 */ 632 enum rte_ml_dev_xstats_mode { 633 RTE_ML_DEV_XSTATS_DEVICE, 634 /**< Device xstats */ 635 RTE_ML_DEV_XSTATS_MODEL, 636 /**< Model xstats */ 637 }; 638 639 /** 640 * A name-key lookup element for extended statistics. 641 * 642 * This structure is used to map between names and ID numbers for extended ML device statistics. 643 */ 644 struct rte_ml_dev_xstats_map { 645 uint16_t id; 646 /**< xstat identifier */ 647 char name[RTE_ML_STR_MAX]; 648 /**< xstat name */ 649 }; 650 651 /** 652 * Retrieve names of extended statistics of an ML device. 653 * 654 * @param dev_id 655 * The identifier of the device. 656 * @param mode 657 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 658 * @param model_id 659 * Used to specify the model number in model mode, and is ignored in device mode. 660 * @param[out] xstats_map 661 * Block of memory to insert names and ids into. Must be at least size in capacity. If set to 662 * NULL, function returns required capacity. The id values returned can be passed to 663 * *rte_ml_dev_xstats_get* to select statistics. 664 * @param size 665 * Capacity of xstats_names (number of xstats_map). 666 * @return 667 * - Positive value lower or equal to size: success. The return value is the number of entries 668 * filled in the stats table. 669 * - Positive value higher than size: error, the given statistics table is too small. The return 670 * value corresponds to the size that should be given to succeed. The entries in the table are not 671 * valid and shall not be used by the caller. 672 * - Negative value on error: 673 * -ENODEV for invalid *dev_id*. 674 * -EINVAL for invalid mode, model parameters. 675 * -ENOTSUP if the device doesn't support this function. 676 */ 677 __rte_experimental 678 int 679 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 680 struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); 681 682 /** 683 * Retrieve the value of a single stat by requesting it by name. 684 * 685 * @param dev_id 686 * The identifier of the device. 687 * @param name 688 * Name of stat name to retrieve. 689 * @param[out] stat_id 690 * If non-NULL, the numerical id of the stat will be returned, so that further requests for the 691 * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a 692 * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1. 693 * @param[out] value 694 * Value of the stat to be returned. 695 * @return 696 * - Zero: No error. 697 * - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported. 698 */ 699 __rte_experimental 700 int 701 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); 702 703 /** 704 * Retrieve extended statistics of an ML device. 705 * 706 * @param dev_id 707 * The identifier of the device. 708 * @param mode 709 * Mode of statistics to retrieve. Choices include the device statistics and model statistics. 710 * @param model_id 711 * Used to specify the model id in model mode, and is ignored in device mode. 712 * @param stat_ids 713 * ID numbers of the stats to get. The ids can be got from the stat position in the stat list from 714 * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). 715 * @param[out] values 716 * Values for each stats request by ID. 717 * @param nb_ids 718 * Number of stats requested. 719 * @return 720 * - Positive value: number of stat entries filled into the values array 721 * - Negative value on error: 722 * -ENODEV for invalid *dev_id*. 723 * -EINVAL for invalid mode, model id or stat id parameters. 724 * -ENOTSUP if the device doesn't support this function. 725 */ 726 __rte_experimental 727 int 728 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 729 const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids); 730 731 /** 732 * Reset the values of the xstats of the selected component in the device. 733 * 734 * @param dev_id 735 * The identifier of the device. 736 * @param mode 737 * Mode of the statistics to reset. Choose from device or model. 738 * @param model_id 739 * Model stats to reset. 0 and positive values select models, while -1 indicates all models. 740 * @param stat_ids 741 * Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be 742 * reset. If non-NULL, must point to array of at least *nb_ids* size. 743 * @param nb_ids 744 * The number of ids available from the *ids* array. Ignored when ids is NULL. 745 * @return 746 * - Zero: successfully reset the statistics. 747 * - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported. 748 */ 749 __rte_experimental 750 int 751 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id, 752 const uint16_t stat_ids[], uint16_t nb_ids); 753 754 /** 755 * Dump internal information about *dev_id* to the FILE* provided in *fd*. 756 * 757 * @param dev_id 758 * The identifier of the device. 759 * @param fd 760 * A pointer to a file for output. 761 * @return 762 * - 0: on success. 763 * - <0: on failure. 764 */ 765 __rte_experimental 766 int 767 rte_ml_dev_dump(int16_t dev_id, FILE *fd); 768 769 /** 770 * Trigger the ML device self test. 771 * 772 * @param dev_id 773 * The identifier of the device. 774 * @return 775 * - 0: Selftest successful. 776 * - -ENOTSUP: if the device doesn't support selftest. 777 * - other values < 0 on failure. 778 */ 779 __rte_experimental 780 int 781 rte_ml_dev_selftest(int16_t dev_id); 782 783 /* Model operations */ 784 785 /** ML model load parameters 786 * 787 * Parameters required to load an ML model. 788 */ 789 struct rte_ml_model_params { 790 void *addr; 791 /**< Address of model buffer */ 792 size_t size; 793 /**< Size of model buffer */ 794 }; 795 796 /** 797 * Load an ML model to the device. 798 * 799 * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. 800 * 801 * @param[in] dev_id 802 * The identifier of the device. 803 * @param[in] params 804 * Parameters for the model to be loaded. 805 * @param[out] model_id 806 * Identifier of the model loaded. 807 * 808 * @return 809 * - 0: Success, Model loaded. 810 * - < 0: Failure, Error code of the model load driver function. 811 */ 812 __rte_experimental 813 int 814 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); 815 816 /** 817 * Unload an ML model from the device. 818 * 819 * @param[in] dev_id 820 * The identifier of the device. 821 * @param[in] model_id 822 * Identifier of the model to be unloaded. 823 * 824 * @return 825 * - 0: Success, Model unloaded. 826 * - < 0: Failure, Error code of the model unload driver function. 827 */ 828 __rte_experimental 829 int 830 rte_ml_model_unload(int16_t dev_id, uint16_t model_id); 831 832 /** 833 * Start an ML model for the given device ID. 834 * 835 * Start an ML model to accept inference requests. 836 * 837 * @param[in] dev_id 838 * The identifier of the device. 839 * @param[in] model_id 840 * Identifier of the model to be started. 841 * 842 * @return 843 * - 0: Success, Model loaded. 844 * - < 0: Failure, Error code of the model start driver function. 845 */ 846 __rte_experimental 847 int 848 rte_ml_model_start(int16_t dev_id, uint16_t model_id); 849 850 /** 851 * Stop an ML model for the given device ID. 852 * 853 * Model stop would disable the ML model to be used for inference jobs. 854 * All inference jobs must have been completed before model stop is attempted. 855 856 * @param[in] dev_id 857 * The identifier of the device. 858 * @param[in] model_id 859 * Identifier of the model to be stopped. 860 * 861 * @return 862 * - 0: Success, Model unloaded. 863 * - < 0: Failure, Error code of the model stop driver function. 864 */ 865 __rte_experimental 866 int 867 rte_ml_model_stop(int16_t dev_id, uint16_t model_id); 868 869 /** 870 * Input and output data types. ML models can operate on reduced precision 871 * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. 872 * This enum is used to represent the lower precision integer and floating point types used 873 * by ML models. 874 */ 875 enum rte_ml_io_type { 876 RTE_ML_IO_TYPE_UNKNOWN = 0, 877 /**< Invalid or unknown type */ 878 RTE_ML_IO_TYPE_INT8, 879 /**< 8-bit integer */ 880 RTE_ML_IO_TYPE_UINT8, 881 /**< 8-bit unsigned integer */ 882 RTE_ML_IO_TYPE_INT16, 883 /**< 16-bit integer */ 884 RTE_ML_IO_TYPE_UINT16, 885 /**< 16-bit unsigned integer */ 886 RTE_ML_IO_TYPE_INT32, 887 /**< 32-bit integer */ 888 RTE_ML_IO_TYPE_UINT32, 889 /**< 32-bit unsigned integer */ 890 RTE_ML_IO_TYPE_INT64, 891 /**< 32-bit integer */ 892 RTE_ML_IO_TYPE_UINT64, 893 /**< 32-bit unsigned integer */ 894 RTE_ML_IO_TYPE_FP8, 895 /**< 8-bit floating point number */ 896 RTE_ML_IO_TYPE_FP16, 897 /**< IEEE 754 16-bit floating point number */ 898 RTE_ML_IO_TYPE_FP32, 899 /**< IEEE 754 32-bit floating point number */ 900 RTE_ML_IO_TYPE_BFLOAT16 901 /**< 16-bit brain floating point number. */ 902 }; 903 904 /** ML I/O buffer layout */ 905 enum rte_ml_io_layout { 906 RTE_ML_IO_LAYOUT_PACKED, 907 /**< All inputs for the model should packed in a single buffer with 908 * no padding between individual inputs. The buffer is expected to 909 * be aligned to rte_ml_dev_info::align_size. 910 * 911 * When I/O segmentation is supported by the device, the packed 912 * data can be split into multiple segments. In this case, each 913 * segment is expected to be aligned to rte_ml_dev_info::align_size 914 * 915 * Same applies to output. 916 * 917 * @see struct rte_ml_dev_info::max_segments 918 */ 919 RTE_ML_IO_LAYOUT_SPLIT 920 /**< Each input for the model should be stored as separate buffers 921 * and each input should be aligned to rte_ml_dev_info::align_size. 922 * 923 * When I/O segmentation is supported, each input can be split into 924 * multiple segments. In this case, each segment is expected to be 925 * aligned to rte_ml_dev_info::align_size 926 * 927 * Same applies to output. 928 * 929 * @see struct rte_ml_dev_info::max_segments 930 */ 931 }; 932 933 /** 934 * Input and output data information structure 935 * 936 * Specifies the type and shape of input and output data. 937 */ 938 struct rte_ml_io_info { 939 char name[RTE_ML_STR_MAX]; 940 /**< Name of data */ 941 uint32_t nb_dims; 942 /**< Number of dimensions in shape */ 943 uint32_t *shape; 944 /**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */ 945 enum rte_ml_io_type type; 946 /**< Type of data 947 * @see enum rte_ml_io_type 948 */ 949 uint64_t nb_elements; 950 /**< Number of elements in tensor */ 951 uint64_t size; 952 /**< Size of tensor in bytes */ 953 float scale; 954 /**< Scale factor */ 955 int64_t zero_point; 956 /**< Zero point */ 957 }; 958 959 /** Model information structure */ 960 struct rte_ml_model_info { 961 char name[RTE_ML_STR_MAX]; 962 /**< Model name. */ 963 char version[RTE_ML_STR_MAX]; 964 /**< Model version */ 965 uint16_t model_id; 966 /**< Model ID */ 967 uint16_t device_id; 968 /**< Device ID */ 969 enum rte_ml_io_layout io_layout; 970 /**< I/O buffer layout for the model */ 971 uint16_t min_batches; 972 /**< Minimum number of batches that the model can process 973 * in one inference request 974 */ 975 uint16_t max_batches; 976 /**< Maximum number of batches that the model can process 977 * in one inference request 978 */ 979 uint32_t nb_inputs; 980 /**< Number of inputs */ 981 const struct rte_ml_io_info *input_info; 982 /**< Input info array. Array size is equal to nb_inputs */ 983 uint32_t nb_outputs; 984 /**< Number of outputs */ 985 const struct rte_ml_io_info *output_info; 986 /**< Output info array. Array size is equal to nb_output */ 987 uint64_t wb_size; 988 /**< Size of model weights and bias */ 989 }; 990 991 /** 992 * Get ML model information. 993 * 994 * @param[in] dev_id 995 * The identifier of the device. 996 * @param[in] model_id 997 * Identifier for the model created 998 * @param[out] model_info 999 * Pointer to a model info structure 1000 * 1001 * @return 1002 * - Returns 0 on success 1003 * - Returns negative value on failure 1004 */ 1005 __rte_experimental 1006 int 1007 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); 1008 1009 /** 1010 * Update the model parameters without unloading model. 1011 * 1012 * Update model parameters such as weights and bias without unloading the model. 1013 * rte_ml_model_stop() must be called before invoking this API. 1014 * 1015 * @param[in] dev_id 1016 * The identifier of the device. 1017 * @param[in] model_id 1018 * Identifier for the model created 1019 * @param[in] buffer 1020 * Pointer to the model weights and bias buffer. 1021 * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. 1022 * 1023 * @return 1024 * - Returns 0 on success 1025 * - Returns negative value on failure 1026 */ 1027 __rte_experimental 1028 int 1029 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); 1030 1031 /* IO operations */ 1032 1033 /** 1034 * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit 1035 * integer format (INT8). 1036 * 1037 * @param[in] fp32 1038 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1039 * @param[out] i8 1040 * Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes. 1041 * @param[in] nb_elements 1042 * Number of elements in the buffer. 1043 * @param[in] scale 1044 * Scale factor for conversion. 1045 * @param[in] zero_point 1046 * Zero point for conversion. 1047 * 1048 * @return 1049 * - 0, Success. 1050 * - < 0, Error code on failure. 1051 */ 1052 __rte_experimental 1053 int 1054 rte_ml_io_float32_to_int8(const void *fp32, void *i8, uint64_t nb_elements, float scale, 1055 int8_t zero_point); 1056 1057 /** 1058 * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision 1059 * floating format (float32). 1060 * 1061 * @param[in] i8 1062 * Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes. 1063 * @param[out] fp32 1064 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1065 * @param[in] nb_elements 1066 * Number of elements in the buffer. 1067 * @param[in] scale 1068 * Scale factor for conversion. 1069 * @param[in] zero_point 1070 * Zero point for conversion. 1071 * 1072 * @return 1073 * - 0, Success. 1074 * - < 0, Error code on failure. 1075 */ 1076 __rte_experimental 1077 int 1078 rte_ml_io_int8_to_float32(const void *i8, void *fp32, uint64_t nb_elements, float scale, 1079 int8_t zero_point); 1080 1081 /** 1082 * Convert a buffer containing numbers in single precision floating format (float32) to unsigned 1083 * 8-bit integer format (UINT8). 1084 * 1085 * @param[in] fp32 1086 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1087 * @param[out] ui8 1088 * Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes. 1089 * @param[in] nb_elements 1090 * Number of elements in the buffer. 1091 * @param[in] scale 1092 * Scale factor for conversion. 1093 * @param[in] zero_point 1094 * Zero point for conversion. 1095 * 1096 * @return 1097 * - 0, Success. 1098 * - < 0, Error code on failure. 1099 */ 1100 __rte_experimental 1101 int 1102 rte_ml_io_float32_to_uint8(const void *fp32, void *ui8, uint64_t nb_elements, float scale, 1103 uint8_t zero_point); 1104 1105 /** 1106 * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision 1107 * floating format (float32). 1108 * 1109 * @param[in] ui8 1110 * Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes. 1111 * @param[out] fp32 1112 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1113 * @param[in] nb_elements 1114 * Number of elements in the buffer. 1115 * @param[in] scale 1116 * Scale factor for conversion. 1117 * @param[in] zero_point 1118 * Zero point for conversion. 1119 * 1120 * @return 1121 * - 0, Success. 1122 * - < 0, Error code on failure. 1123 */ 1124 __rte_experimental 1125 int 1126 rte_ml_io_uint8_to_float32(const void *ui8, void *fp32, uint64_t nb_elements, float scale, 1127 uint8_t zero_point); 1128 1129 /** 1130 * Convert a buffer containing numbers in single precision floating format (float32) to signed 1131 * 16-bit integer format (INT16). 1132 * 1133 * @param[in] fp32 1134 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1135 * @param[out] i16 1136 * Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1137 * @param[in] nb_elements 1138 * Number of elements in the buffer. 1139 * @param[in] scale 1140 * Scale factor for conversion. 1141 * @param[in] zero_point 1142 * Zero point for conversion. 1143 * 1144 * @return 1145 * - 0, Success. 1146 * - < 0, Error code on failure. 1147 */ 1148 __rte_experimental 1149 int 1150 rte_ml_io_float32_to_int16(const void *fp32, void *i16, uint64_t nb_elements, float scale, 1151 int16_t zero_point); 1152 1153 /** 1154 * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision 1155 * floating format (float32). 1156 * 1157 * @param[in] i16 1158 * Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1159 * @param[out] fp32 1160 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1161 * @param[in] nb_elements 1162 * Number of elements in the buffer. 1163 * @param[in] scale 1164 * Scale factor for conversion. 1165 * @param[in] zero_point 1166 * Zero point for conversion. 1167 * 1168 * @return 1169 * - 0, Success. 1170 * - < 0, Error code on failure. 1171 */ 1172 __rte_experimental 1173 int 1174 rte_ml_io_int16_to_float32(const void *i16, void *fp32, uint64_t nb_elements, float scale, 1175 int16_t zero_point); 1176 1177 /** 1178 * Convert a buffer containing numbers in single precision floating format (float32) to unsigned 1179 * 16-bit integer format (UINT16). 1180 * 1181 * @param[in] fp32 1182 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1183 * @param[out] ui16 1184 * Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1185 * @param[in] nb_elements 1186 * Number of elements in the buffer. 1187 * @param[in] scale 1188 * Scale factor for conversion. 1189 * @param[in] zero_point 1190 * Zero point for conversion. 1191 * 1192 * @return 1193 * - 0, Success. 1194 * - < 0, Error code on failure. 1195 */ 1196 __rte_experimental 1197 int 1198 rte_ml_io_float32_to_uint16(const void *fp32, void *ui16, uint64_t nb_elements, float scale, 1199 uint16_t zero_point); 1200 1201 /** 1202 * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single 1203 * precision floating format (float32). 1204 * 1205 * @param[in] ui16 1206 * Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1207 * @param[out] fp32 1208 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1209 * @param[in] nb_elements 1210 * Number of elements in the buffer. 1211 * @param[in] scale 1212 * Scale factor for conversion. 1213 * @param[in] zero_point 1214 * Zero point for conversion. 1215 * 1216 * @return 1217 * - 0, Success. 1218 * - < 0, Error code on failure. 1219 */ 1220 __rte_experimental 1221 int 1222 rte_ml_io_uint16_to_float32(const void *ui16, void *fp32, uint64_t nb_elements, float scale, 1223 uint16_t zero_point); 1224 1225 /** 1226 * Convert a buffer containing numbers in single precision floating format (float32) to signed 1227 * 32-bit integer format (INT32). 1228 * 1229 * @param[in] fp32 1230 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1231 * @param[out] i32 1232 * Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1233 * @param[in] nb_elements 1234 * Number of elements in the buffer. 1235 * @param[in] scale 1236 * Scale factor for conversion. 1237 * @param[in] zero_point 1238 * Zero point for conversion. 1239 * 1240 * @return 1241 * - 0, Success. 1242 * - < 0, Error code on failure. 1243 */ 1244 __rte_experimental 1245 int 1246 rte_ml_io_float32_to_int32(const void *fp32, void *i32, uint64_t nb_elements, float scale, 1247 int32_t zero_point); 1248 1249 /** 1250 * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision 1251 * floating format (float32). 1252 * 1253 * @param[in] i32 1254 * Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1255 * @param[out] fp32 1256 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1257 * @param[in] nb_elements 1258 * Number of elements in the buffer. 1259 * @param[in] scale 1260 * Scale factor for conversion. 1261 * @param[in] zero_point 1262 * Zero point for conversion. 1263 * 1264 * @return 1265 * - 0, Success. 1266 * - < 0, Error code on failure. 1267 */ 1268 1269 __rte_experimental 1270 int 1271 rte_ml_io_int32_to_float32(const void *i32, void *fp32, uint64_t nb_elements, float scale, 1272 int32_t zero_point); 1273 1274 /** 1275 * Convert a buffer containing numbers in single precision floating format (float32) to unsigned 1276 * 32-bit integer format (UINT32). 1277 * 1278 * @param[in] fp32 1279 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1280 * @param[out] ui32 1281 * Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1282 * @param[in] nb_elements 1283 * Number of elements in the buffer. 1284 * @param[in] scale 1285 * Scale factor for conversion. 1286 * @param[in] zero_point 1287 * Zero point for conversion. 1288 * 1289 * @return 1290 * - 0, Success. 1291 * - < 0, Error code on failure. 1292 */ 1293 __rte_experimental 1294 int 1295 rte_ml_io_float32_to_uint32(const void *fp32, void *ui32, uint64_t nb_elements, float scale, 1296 uint32_t zero_point); 1297 1298 /** 1299 * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single 1300 * precision floating format (float32). 1301 * 1302 * @param[in] ui32 1303 * Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1304 * @param[out] fp32 1305 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1306 * @param[in] nb_elements 1307 * Number of elements in the buffer. 1308 * @param[in] scale 1309 * Scale factor for conversion. 1310 * @param[in] zero_point 1311 * Zero point for conversion. 1312 * 1313 * @return 1314 * - 0, Success. 1315 * - < 0, Error code on failure. 1316 */ 1317 __rte_experimental 1318 int 1319 rte_ml_io_uint32_to_float32(const void *ui32, void *fp32, uint64_t nb_elements, float scale, 1320 uint32_t zero_point); 1321 1322 /** 1323 * Convert a buffer containing numbers in single precision floating format (float32) to signed 1324 * 64-bit integer format (INT64). 1325 * 1326 * @param[in] fp32 1327 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1328 * @param[out] i64 1329 * Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1330 * @param[in] nb_elements 1331 * Number of elements in the buffer. 1332 * @param[in] scale 1333 * Scale factor for conversion. 1334 * @param[in] zero_point 1335 * Zero point for conversion. 1336 * 1337 * @return 1338 * - 0, Success. 1339 * - < 0, Error code on failure. 1340 */ 1341 __rte_experimental 1342 int 1343 rte_ml_io_float32_to_int64(const void *fp32, void *i64, uint64_t nb_elements, float scale, 1344 int64_t zero_point); 1345 1346 /** 1347 * Convert a buffer containing numbers in signed 64-bit integer format (INT64) to single precision 1348 * floating format (float32). 1349 * 1350 * @param[in] i64 1351 * Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1352 * @param[out] fp32 1353 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1354 * @param[in] nb_elements 1355 * Number of elements in the buffer. 1356 * @param[in] scale 1357 * Scale factor for conversion. 1358 * @param[in] zero_point 1359 * Zero point for conversion. 1360 * 1361 * @return 1362 * - 0, Success. 1363 * - < 0, Error code on failure. 1364 */ 1365 __rte_experimental 1366 int 1367 rte_ml_io_int64_to_float32(const void *i64, void *fp32, uint64_t nb_elements, float scale, 1368 int64_t zero_point); 1369 1370 /** 1371 * Convert a buffer containing numbers in single precision floating format (float32) to unsigned 1372 * 64-bit integer format (UINT64). 1373 * 1374 * @param[in] fp32 1375 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1376 * @param[out] ui64 1377 * Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1378 * @param[in] nb_elements 1379 * Number of elements in the buffer. 1380 * @param[in] scale 1381 * Scale factor for conversion. 1382 * @param[in] zero_point 1383 * Zero point for conversion. 1384 * 1385 * @return 1386 * - 0, Success. 1387 * - < 0, Error code on failure. 1388 */ 1389 __rte_experimental 1390 int 1391 rte_ml_io_float32_to_uint64(const void *fp32, void *ui64, uint64_t nb_elements, float scale, 1392 uint64_t zero_point); 1393 1394 /** 1395 * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64) to single 1396 *precision floating format (float32). 1397 * 1398 * @param[in] ui64 1399 * Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1400 * @param[out] fp32 1401 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1402 * @param[in] nb_elements 1403 * Number of elements in the buffer. 1404 * @param[in] scale 1405 * Scale factor for conversion. 1406 * @param[in] zero_point 1407 * Zero point for conversion. 1408 * 1409 * @return 1410 * - 0, Success. 1411 * - < 0, Error code on failure. 1412 */ 1413 __rte_experimental 1414 int 1415 rte_ml_io_uint64_to_float32(const void *ui64, void *fp32, uint64_t nb_elements, float scale, 1416 uint64_t zero_point); 1417 1418 /** 1419 * Convert a buffer containing numbers in single precision floating format (float32) to half 1420 * precision floating point format (FP16). 1421 * 1422 * @param[in] fp32 1423 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes. 1424 * @param[out] fp16 1425 * Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1426 * @param[in] nb_elements 1427 * Number of elements in the buffer. 1428 * 1429 * @return 1430 * - 0, Success. 1431 * - < 0, Error code on failure. 1432 */ 1433 __rte_experimental 1434 int 1435 rte_ml_io_float32_to_float16(const void *fp32, void *fp16, uint64_t nb_elements); 1436 1437 /** 1438 * Convert a buffer containing numbers in half precision floating format (FP16) to single precision 1439 * floating point format (float32). 1440 * 1441 * @param[in] fp16 1442 * Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1443 * @param[out] fp32 1444 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1445 * @param[in] nb_elements 1446 * Number of elements in the buffer. 1447 * 1448 * @return 1449 * - 0, Success. 1450 * - < 0, Error code on failure. 1451 */ 1452 __rte_experimental 1453 int 1454 rte_ml_io_float16_to_float32(const void *fp16, void *fp32, uint64_t nb_elements); 1455 1456 /** 1457 * Convert a buffer containing numbers in single precision floating format (float32) to brain 1458 * floating point format (bfloat16). 1459 * 1460 * @param[in] fp32 1461 * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes. 1462 * @param[out] bf16 1463 * Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes. 1464 * @param[in] nb_elements 1465 * Number of elements in the buffer. 1466 * 1467 * @return 1468 * - 0, Success. 1469 * - < 0, Error code on failure. 1470 */ 1471 __rte_experimental 1472 int 1473 rte_ml_io_float32_to_bfloat16(const void *fp32, void *bf16, uint64_t nb_elements); 1474 1475 /** 1476 * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision 1477 * floating point format (float32). 1478 * 1479 * @param[in] bf16 1480 * Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) 1481 * bytes. 1482 * @param[out] fp32 1483 * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes. 1484 * @param[in] nb_elements 1485 * Number of elements in the buffer. 1486 * 1487 * @return 1488 * - 0, Success. 1489 * - < 0, Error code on failure. 1490 */ 1491 __rte_experimental 1492 int 1493 rte_ml_io_bfloat16_to_float32(const void *bf16, void *fp32, uint64_t nb_elements); 1494 1495 /** 1496 * Quantize input data. 1497 * 1498 * Quantization converts data from a higher precision types to a lower precision types to improve 1499 * the throughput and efficiency of the model execution with minimal loss of accuracy. 1500 * Types of dequantized data and quantized data are specified by the model. 1501 * 1502 * @param[in] dev_id 1503 * The identifier of the device. 1504 * @param[in] model_id 1505 * Identifier for the model 1506 * @param[in] dbuffer 1507 * Address of dequantized input data 1508 * @param[in] qbuffer 1509 * Address of quantized input data 1510 * 1511 * @return 1512 * - Returns 0 on success 1513 * - Returns negative value on failure 1514 */ 1515 __rte_experimental 1516 int 1517 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, 1518 struct rte_ml_buff_seg **qbuffer); 1519 1520 /** 1521 * Dequantize output data. 1522 * 1523 * Dequantization converts data from a lower precision type to a higher precision type. 1524 * Types of quantized data and dequantized are specified by the model. 1525 * 1526 * @param[in] dev_id 1527 * The identifier of the device. 1528 * @param[in] model_id 1529 * Identifier for the model 1530 * @param[in] qbuffer 1531 * Address of quantized output data 1532 * @param[in] dbuffer 1533 * Address of dequantized output data 1534 * 1535 * @return 1536 * - Returns 0 on success 1537 * - Returns negative value on failure 1538 */ 1539 __rte_experimental 1540 int 1541 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer, 1542 struct rte_ml_buff_seg **dbuffer); 1543 1544 /* ML op pool operations */ 1545 1546 /** 1547 * Create an ML operation pool 1548 * 1549 * @param name 1550 * ML operations pool name 1551 * @param nb_elts 1552 * Number of elements in pool 1553 * @param cache_size 1554 * Number of elements to cache on lcore, see 1555 * *rte_mempool_create* for further details about cache size 1556 * @param user_size 1557 * Size of private data to allocate for user with each operation 1558 * @param socket_id 1559 * Socket to identifier allocate memory on 1560 * @return 1561 * - On success pointer to mempool 1562 * - On failure NULL 1563 */ 1564 __rte_experimental 1565 struct rte_mempool * 1566 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, 1567 uint16_t user_size, int socket_id); 1568 1569 /** 1570 * Free an ML operation pool 1571 * 1572 * @param mempool 1573 * A pointer to the mempool structure. 1574 * If NULL then, the function does nothing. 1575 */ 1576 __rte_experimental 1577 void 1578 rte_ml_op_pool_free(struct rte_mempool *mempool); 1579 1580 #ifdef __cplusplus 1581 } 1582 #endif 1583 1584 #endif /* RTE_MLDEV_H */ 1585