xref: /dpdk/lib/mldev/rte_mldev.h (revision fe8eba692c59a92c9308f1fe429b101b9f2377bf)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_H
6 #define RTE_MLDEV_H
7 
8 /**
9  * @file rte_mldev.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * ML (Machine Learning) device API.
16  *
17  * The ML framework is built on the following model:
18  *
19  *
20  *     +-----------------+               rte_ml_[en|de]queue_burst()
21  *     |                 |                          |
22  *     |     Machine     o------+     +--------+    |
23  *     |     Learning    |      |     | queue  |    |    +------+
24  *     |     Inference   o------+-----o        |<===o===>|Core 0|
25  *     |     Engine      |      |     | pair 0 |         +------+
26  *     |                 o----+ |     +--------+
27  *     |                 |    | |
28  *     +-----------------+    | |     +--------+
29  *              ^             | |     | queue  |         +------+
30  *              |             | +-----o        |<=======>|Core 1|
31  *              |             |       | pair 1 |         +------+
32  *              |             |       +--------+
33  *     +--------+--------+    |
34  *     | +-------------+ |    |       +--------+
35  *     | |   Model 0   | |    |       | queue  |         +------+
36  *     | +-------------+ |    +-------o        |<=======>|Core N|
37  *     | +-------------+ |            | pair N |         +------+
38  *     | |   Model 1   | |            +--------+
39  *     | +-------------+ |
40  *     | +-------------+ |<------> rte_ml_model_load()
41  *     | |   Model ..  | |-------> rte_ml_model_info_get()
42  *     | +-------------+ |<------- rte_ml_model_start()
43  *     | +-------------+ |<------- rte_ml_model_stop()
44  *     | |   Model N   | |<------- rte_ml_model_params_update()
45  *     | +-------------+ |<------- rte_ml_model_unload()
46  *     +-----------------+
47  *
48  * ML Device: A hardware or software-based implementation of ML device API for
49  * running inferences using a pre-trained ML model.
50  *
51  * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
52  * procedure/algorithm and data/pattern required to make predictions on live data.
53  * Once the model is created and trained outside of the DPDK scope, the model can be loaded
54  * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
55  * The rte_ml_model_params_update() can be used to update the model parameters such as weight
56  * and bias without unloading the model using rte_ml_model_unload().
57  *
58  * ML Inference: ML inference is the process of feeding data to the model via
59  * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
60  * outputs/predictions from the started model.
61  *
62  * In all functions of the ML device API, the ML device is designated by an
63  * integer >= 0 named as device identifier *dev_id*.
64  *
65  * The functions exported by the ML device API to setup a device designated by
66  * its device identifier must be invoked in the following order:
67  *
68  *      - rte_ml_dev_configure()
69  *      - rte_ml_dev_queue_pair_setup()
70  *      - rte_ml_dev_start()
71  *
72  * A model is required to run the inference operations with the user specified inputs.
73  * Application needs to invoke the ML model API in the following order before queueing
74  * inference jobs.
75  *
76  *      - rte_ml_model_load()
77  *      - rte_ml_model_start()
78  *
79  * A model can be loaded on a device only after the device has been configured and can be
80  * started or stopped only after a device has been started.
81  *
82  * The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
83  * The information would include the shape and type of input and output required for the inference.
84  *
85  * Data quantization and dequantization is one of the main aspects in ML domain. This involves
86  * conversion of input data from a higher precision to a lower precision data type and vice-versa
87  * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
88  * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
89  * and output buffers holding data for multiple batches.
90  *
91  * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
92  * size of quantized and de-quantized multi-batch input and output buffers.
93  *
94  * User can optionally update the model parameters with rte_ml_model_params_update() after
95  * invoking rte_ml_model_stop() API on a given model ID.
96  *
97  * The application can invoke, in any order, the functions exported by the ML API to enqueue
98  * inference jobs and dequeue inference response.
99  *
100  * If the application wants to change the device configuration (i.e., call
101  * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
102  * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
103  * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
104  * for the given model. The application does not need to call rte_ml_dev_stop() API for
105  * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
106  *
107  * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
108  * start state after invoking rte_ml_model_start() API, then the application can call
109  * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
110  *
111  * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
112  *
113  * Typical application utilisation of the ML API will follow the following
114  * programming flow.
115  *
116  * - rte_ml_dev_configure()
117  * - rte_ml_dev_queue_pair_setup()
118  * - rte_ml_model_load()
119  * - rte_ml_dev_start()
120  * - rte_ml_model_start()
121  * - rte_ml_model_info_get()
122  * - rte_ml_enqueue_burst()
123  * - rte_ml_dequeue_burst()
124  * - rte_ml_model_stop()
125  * - rte_ml_model_unload()
126  * - rte_ml_dev_stop()
127  * - rte_ml_dev_close()
128  *
129  * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
130  * are lock-free functions which assume to not be invoked in parallel on different logical cores
131  * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
132  * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
133  * can be invoked in parallel by different logical core on different queue pair.
134  * It is the responsibility of the user application to enforce this rule.
135  */
136 
137 #include <rte_common.h>
138 #include <rte_log.h>
139 #include <rte_mempool.h>
140 
141 #ifdef __cplusplus
142 extern "C" {
143 #endif
144 
145 /* Logging Macro */
146 extern int rte_ml_dev_logtype;
147 #define RTE_LOGTYPE_MLDEV rte_ml_dev_logtype
148 
149 #define RTE_MLDEV_LOG(level, ...) \
150 	RTE_LOG_LINE_PREFIX(level, MLDEV, "%s(): ", __func__, __VA_ARGS__)
151 
152 #define RTE_ML_STR_MAX 128
153 /**< Maximum length of name string */
154 
155 #define RTE_MLDEV_DEFAULT_MAX 32
156 /** Maximum number of devices if rte_ml_dev_init() is not called. */
157 
158 /* Device operations */
159 
160 /**
161  * Initialize the device array before probing devices. If not called, the first device probed would
162  * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
163  *
164  * @param dev_max
165  *   Maximum number of devices.
166  *
167  * @return
168  *   0 on success, -rte_errno otherwise:
169  *   - ENOMEM if out of memory
170  *   - EINVAL if 0 size
171  *   - EBUSY if already initialized
172  */
173 __rte_experimental
174 int
175 rte_ml_dev_init(size_t dev_max);
176 
177 /**
178  * Get the total number of ML devices that have been successfully initialised.
179  *
180  * @return
181  *   - The total number of usable ML devices.
182  */
183 __rte_experimental
184 uint16_t
185 rte_ml_dev_count(void);
186 
187 /**
188  * Check if the device is in ready state.
189  *
190  * @param dev_id
191  *   The identifier of the device.
192  *
193  * @return
194  *   - 0 if device state is not in ready state.
195  *   - 1 if device state is ready state.
196  */
197 __rte_experimental
198 int
199 rte_ml_dev_is_valid_dev(int16_t dev_id);
200 
201 /**
202  * Return the NUMA socket to which a device is connected.
203  *
204  * @param dev_id
205  *   The identifier of the device.
206  *
207  * @return
208  *   - The NUMA socket id to which the device is connected
209  *   - 0 If the socket could not be determined.
210  *   - -EINVAL: if the dev_id value is not valid.
211  */
212 __rte_experimental
213 int
214 rte_ml_dev_socket_id(int16_t dev_id);
215 
216 /**  ML device information */
217 struct rte_ml_dev_info {
218 	const char *driver_name;
219 	/**< Driver name */
220 	uint16_t max_models;
221 	/**< Maximum number of models supported by the device.
222 	 * @see struct rte_ml_dev_config::nb_models
223 	 */
224 	uint16_t max_queue_pairs;
225 	/**< Maximum number of queues pairs supported by the device.
226 	 * @see struct rte_ml_dev_config::nb_queue_pairs
227 	 */
228 	uint16_t max_desc;
229 	/**< Maximum allowed number of descriptors for queue pair by the device.
230 	 * @see struct rte_ml_dev_qp_conf::nb_desc
231 	 */
232 	uint16_t max_io;
233 	/**< Maximum number of inputs/outputs supported per model. */
234 	uint16_t max_segments;
235 	/**< Maximum number of scatter-gather entries supported by the device.
236 	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
237 	 */
238 	uint16_t align_size;
239 	/**< Alignment size of IO buffers used by the device. */
240 };
241 
242 /**
243  * Retrieve the information of the device.
244  *
245  * @param dev_id
246  *   The identifier of the device.
247  * @param dev_info
248  *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
249  *
250  * @return
251  *   - 0: Success, driver updates the information of the ML device
252  *   - < 0: Error code returned by the driver info get function.
253  */
254 __rte_experimental
255 int
256 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
257 
258 /** ML device configuration structure */
259 struct rte_ml_dev_config {
260 	int socket_id;
261 	/**< Socket to allocate resources on. */
262 	uint16_t nb_models;
263 	/**< Number of models to be loaded on the device.
264 	 * This value cannot exceed the max_models which is previously provided in
265 	 * struct rte_ml_dev_info::max_models
266 	 */
267 	uint16_t nb_queue_pairs;
268 	/**< Number of queue pairs to configure on this device.
269 	 * This value cannot exceed the max_models which is previously provided in
270 	 * struct rte_ml_dev_info::max_queue_pairs
271 	 */
272 };
273 
274 /**
275  * Configure an ML device.
276  *
277  * This function must be invoked first before any other function in the API.
278  *
279  * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
280  * rte_ml_dev_close() is called.
281  *
282  * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
283  * this ML device.
284  *
285  * @param dev_id
286  *   The identifier of the device to configure.
287  * @param config
288  *   The ML device configuration structure.
289  *
290  * @return
291  *   - 0: Success, device configured.
292  *   - < 0: Error code returned by the driver configuration function.
293  */
294 __rte_experimental
295 int
296 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
297 
298 /* Forward declaration */
299 struct rte_ml_op;
300 
301 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
302 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
303 
304 /** ML device queue pair configuration structure. */
305 struct rte_ml_dev_qp_conf {
306 	uint32_t nb_desc;
307 	/**< Number of descriptors per queue pair.
308 	 * This value cannot exceed the max_desc which previously provided in
309 	 * struct rte_ml_dev_info:max_desc
310 	 */
311 	rte_ml_dev_stop_flush_t cb;
312 	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
313 	 * Value NULL is allowed, in which case callback will not be invoked.
314 	 * This function can be used to properly dispose of outstanding ML ops from all
315 	 * queue pairs, for example ops containing  memory pointers.
316 	 * @see rte_ml_dev_stop()
317 	 */
318 };
319 
320 /**
321  * Get the number of queue pairs on a specific ML device.
322  *
323  * @param dev_id
324  *   The identifier of the device.
325  *
326  * @return
327  *   - The number of configured queue pairs.
328  */
329 __rte_experimental
330 uint16_t
331 rte_ml_dev_queue_pair_count(int16_t dev_id);
332 
333 /**
334  * Set up a queue pair for a device. This should only be called when the device is stopped.
335  *
336  * @param dev_id
337  *   The identifier of the device.
338  * @param queue_pair_id
339  *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
340  * previously supplied to rte_ml_dev_configure().
341  * @param qp_conf
342  *   The pointer to the configuration data to be used for the queue pair.
343  * @param socket_id
344  *   The *socket_id* argument is the socket identifier in case of NUMA.
345  * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
346  * for the queue pair.
347  *
348  * @return
349  *   - 0: Success, queue pair correctly set up.
350  *   - < 0: Queue pair configuration failed.
351  */
352 __rte_experimental
353 int
354 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
355 			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
356 
357 /**
358  * Start an ML device.
359  *
360  * The device start step consists of setting the configured features and enabling the ML device
361  * to accept inference jobs.
362  *
363  * @param dev_id
364  *   The identifier of the device.
365  *
366  * @return
367  *   - 0: Success, device started.
368  *   - <0: Error code of the driver device start function.
369  */
370 __rte_experimental
371 int
372 rte_ml_dev_start(int16_t dev_id);
373 
374 /**
375  * Stop an ML device. A stopped device cannot accept inference jobs.
376  * The device can be restarted with a call to rte_ml_dev_start().
377  *
378  * @param dev_id
379  *   The identifier of the device.
380  *
381  * @return
382  *   - 0: Success, device stopped.
383  *   - <0: Error code of the driver device stop function.
384  */
385 __rte_experimental
386 int
387 rte_ml_dev_stop(int16_t dev_id);
388 
389 /**
390  * Close an ML device. The device cannot be restarted!
391  *
392  * @param dev_id
393  *   The identifier of the device.
394  *
395  * @return
396  *  - 0 on successfully closing device.
397  *  - <0 on failure to close device.
398  */
399 __rte_experimental
400 int
401 rte_ml_dev_close(int16_t dev_id);
402 
403 /** Status of ML operation */
404 enum rte_ml_op_status {
405 	RTE_ML_OP_STATUS_SUCCESS = 0,
406 	/**< Operation completed successfully */
407 	RTE_ML_OP_STATUS_NOT_PROCESSED,
408 	/**< Operation has not yet been processed by the device. */
409 	RTE_ML_OP_STATUS_ERROR,
410 	/**< Operation completed with error.
411 	 * Application can invoke rte_ml_op_error_get() to get PMD specific
412 	 * error code if needed.
413 	 */
414 };
415 
416 /** ML operation's input and output buffer representation as scatter gather list
417  */
418 struct rte_ml_buff_seg {
419 	rte_iova_t iova_addr;
420 	/**< IOVA address of segment buffer. */
421 	void *addr;
422 	/**< Virtual address of segment buffer. */
423 	uint32_t length;
424 	/**< Segment length. */
425 	uint32_t reserved;
426 	/**< Reserved for future use. */
427 	struct rte_ml_buff_seg *next;
428 	/**< Points to next segment. Value NULL represents the last segment. */
429 };
430 
431 /**
432  * ML Operation.
433  *
434  * This structure contains data related to performing an ML operation on the buffers using
435  * the model specified through model_id.
436  */
437 struct __rte_cache_aligned rte_ml_op {
438 	uint16_t model_id;
439 	/**< Model ID to be used for the operation. */
440 	uint16_t nb_batches;
441 	/**< Number of batches. Minimum value must be one.
442 	 * Input buffer must hold inference data for each batch as contiguous.
443 	 */
444 	uint32_t reserved;
445 	/**< Reserved for future use. */
446 	struct rte_mempool *mempool;
447 	/**< Pool from which operation is allocated. */
448 	struct rte_ml_buff_seg **input;
449 	/**< Array of buffer segments to hold the inference input data.
450 	 *
451 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
452 	 * the array is 1.
453 	 *
454 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
455 	 * the array is rte_ml_model_info::nb_inputs.
456 	 *
457 	 * @see struct rte_ml_dev_info::io_layout
458 	 */
459 	struct rte_ml_buff_seg **output;
460 	/**< Array of buffer segments to hold the inference output data.
461 	 *
462 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
463 	 * the array is 1.
464 	 *
465 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
466 	 * the array is rte_ml_model_info::nb_outputs.
467 	 *
468 	 * @see struct rte_ml_dev_info::io_layout
469 	 */
470 	union {
471 		uint64_t user_u64;
472 		/**< User data as uint64_t.*/
473 		void *user_ptr;
474 		/**< User data as void*.*/
475 	};
476 	enum rte_ml_op_status status;
477 	/**< Operation status. */
478 	uint64_t impl_opaque;
479 	/**< Implementation specific opaque value.
480 	 * An implementation may use this field to hold
481 	 * implementation specific value to share between
482 	 * dequeue and enqueue operation.
483 	 * The application should not modify this field.
484 	 */
485 };
486 
487 /* Enqueue/Dequeue operations */
488 
489 /**
490  * Enqueue a burst of ML inferences for processing on an ML device.
491  *
492  * The rte_ml_enqueue_burst() function is invoked to place ML inference
493  * operations on the queue *qp_id* of the device designated by its *dev_id*.
494  *
495  * The *nb_ops* parameter is the number of inferences to process which are
496  * supplied in the *ops* array of *rte_ml_op* structures.
497  *
498  * The rte_ml_enqueue_burst() function returns the number of inferences it
499  * actually enqueued for processing. A return value equal to *nb_ops* means that
500  * all packets have been enqueued.
501  *
502  * @param dev_id
503  *   The identifier of the device.
504  * @param qp_id
505  *   The index of the queue pair which inferences are to be enqueued for processing.
506  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
507  * *rte_ml_dev_configure*.
508  * @param ops
509  *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
510  * ML inferences to be processed.
511  * @param nb_ops
512  *   The number of operations to process.
513  *
514  * @return
515  *   The number of inference operations actually enqueued to the ML device.
516  * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
517  * is full or if invalid parameters are specified in a *rte_ml_op*.
518  */
519 __rte_experimental
520 uint16_t
521 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
522 
523 /**
524  * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
525  * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
526  * in the *ops* array.
527  *
528  * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
529  * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
530  *
531  * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
532  * and this is likely to signify that other processed operations remain in the devices output queue.
533  * Application implementing a "retrieve as many processed operations as possible" policy can check
534  * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
535  * *nb_ops* is returned.
536  *
537  * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
538  * the corresponding overhead.
539  *
540  * @param dev_id
541  *   The identifier of the device.
542  * @param qp_id
543  *   The index of the queue pair from which to retrieve processed packets.
544  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
545  * rte_ml_dev_configure().
546  * @param ops
547  *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
548  * store *nb_ops* pointers in it.
549  * @param nb_ops
550  *   The maximum number of inferences to dequeue.
551  *
552  * @return
553  *   The number of operations actually dequeued, which is the number of pointers
554  * to *rte_ml_op* structures effectively supplied to the *ops* array.
555  */
556 __rte_experimental
557 uint16_t
558 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
559 
560 /**
561  * Verbose error structure definition.
562  */
563 struct rte_ml_op_error {
564 	char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
565 	uint64_t errcode;	      /**< Vendor specific error code. */
566 };
567 
568 /**
569  * Get PMD specific error information for an ML op.
570  *
571  * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
572  * This API allows to get PMD specific error details.
573  *
574  * @param[in] dev_id
575  *   Device identifier
576  * @param[in] op
577  *   Handle of ML operation
578  * @param[in] error
579  *   Address of structure rte_ml_op_error to be filled
580  *
581  * @return
582  *   - Returns 0 on success
583  *   - Returns negative value on failure
584  */
585 __rte_experimental
586 int
587 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
588 
589 /* Statistics operations */
590 
591 /** Device statistics. */
592 struct rte_ml_dev_stats {
593 	uint64_t enqueued_count;
594 	/**< Count of all operations enqueued */
595 	uint64_t dequeued_count;
596 	/**< Count of all operations dequeued */
597 	uint64_t enqueue_err_count;
598 	/**< Total error count on operations enqueued */
599 	uint64_t dequeue_err_count;
600 	/**< Total error count on operations dequeued */
601 };
602 
603 /**
604  * Retrieve the general I/O statistics of a device.
605  *
606  * @param dev_id
607  *   The identifier of the device.
608  * @param stats
609  *   Pointer to structure to where statistics will be copied.
610  * On error, this location may or may not have been modified.
611  * @return
612  *   - 0 on success
613  *   - -EINVAL: If invalid parameter pointer is provided.
614  */
615 __rte_experimental
616 int
617 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
618 
619 /**
620  * Reset the statistics of a device.
621  *
622  * @param dev_id
623  *   The identifier of the device.
624  */
625 __rte_experimental
626 void
627 rte_ml_dev_stats_reset(int16_t dev_id);
628 
629 /**
630  * Selects the component of the mldev to retrieve statistics from.
631  */
632 enum rte_ml_dev_xstats_mode {
633 	RTE_ML_DEV_XSTATS_DEVICE,
634 	/**< Device xstats */
635 	RTE_ML_DEV_XSTATS_MODEL,
636 	/**< Model xstats */
637 };
638 
639 /**
640  * A name-key lookup element for extended statistics.
641  *
642  * This structure is used to map between names and ID numbers for extended ML device statistics.
643  */
644 struct rte_ml_dev_xstats_map {
645 	uint16_t id;
646 	/**< xstat identifier */
647 	char name[RTE_ML_STR_MAX];
648 	/**< xstat name */
649 };
650 
651 /**
652  * Retrieve names of extended statistics of an ML device.
653  *
654  * @param dev_id
655  *   The identifier of the device.
656  * @param mode
657  *   Mode of statistics to retrieve. Choices include the device statistics and model statistics.
658  * @param model_id
659  *   Used to specify the model number in model mode, and is ignored in device mode.
660  * @param[out] xstats_map
661  *   Block of memory to insert names and ids into. Must be at least size in capacity. If set to
662  * NULL, function returns required capacity. The id values returned can be passed to
663  * *rte_ml_dev_xstats_get* to select statistics.
664  * @param size
665  *   Capacity of xstats_names (number of xstats_map).
666  * @return
667  *   - Positive value lower or equal to size: success. The return value is the number of entries
668  * filled in the stats table.
669  *   - Positive value higher than size: error, the given statistics table is too small. The return
670  * value corresponds to the size that should be given to succeed. The entries in the table are not
671  * valid and shall not be used by the caller.
672  *   - Negative value on error:
673  *        -ENODEV for invalid *dev_id*.
674  *        -EINVAL for invalid mode, model parameters.
675  *        -ENOTSUP if the device doesn't support this function.
676  */
677 __rte_experimental
678 int
679 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
680 			    struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
681 
682 /**
683  * Retrieve the value of a single stat by requesting it by name.
684  *
685  * @param dev_id
686  *   The identifier of the device.
687  * @param name
688  *   Name of stat name to retrieve.
689  * @param[out] stat_id
690  *   If non-NULL, the numerical id of the stat will be returned, so that further requests for the
691  * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a
692  * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1.
693  * @param[out] value
694  *   Value of the stat to be returned.
695  * @return
696  *   - Zero: No error.
697  *   - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
698  */
699 __rte_experimental
700 int
701 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
702 
703 /**
704  * Retrieve extended statistics of an ML device.
705  *
706  * @param dev_id
707  *   The identifier of the device.
708  * @param mode
709  *  Mode of statistics to retrieve. Choices include the device statistics and model statistics.
710  * @param model_id
711  *   Used to specify the model id in model mode, and is ignored in device mode.
712  * @param stat_ids
713  *   ID numbers of the stats to get. The ids can be got from the stat position in the stat list from
714  * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
715  * @param[out] values
716  *   Values for each stats request by ID.
717  * @param nb_ids
718  *   Number of stats requested.
719  * @return
720  *   - Positive value: number of stat entries filled into the values array
721  *   - Negative value on error:
722  *        -ENODEV for invalid *dev_id*.
723  *        -EINVAL for invalid mode, model id or stat id parameters.
724  *        -ENOTSUP if the device doesn't support this function.
725  */
726 __rte_experimental
727 int
728 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
729 		      const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids);
730 
731 /**
732  * Reset the values of the xstats of the selected component in the device.
733  *
734  * @param dev_id
735  *   The identifier of the device.
736  * @param mode
737  *   Mode of the statistics to reset. Choose from device or model.
738  * @param model_id
739  *   Model stats to reset. 0 and positive values select models, while -1 indicates all models.
740  * @param stat_ids
741  *   Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be
742  * reset. If non-NULL, must point to array of at least *nb_ids* size.
743  * @param nb_ids
744  *   The number of ids available from the *ids* array. Ignored when ids is NULL.
745  * @return
746  *   - Zero: successfully reset the statistics.
747  *   - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
748  */
749 __rte_experimental
750 int
751 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
752 			const uint16_t stat_ids[], uint16_t nb_ids);
753 
754 /**
755  * Dump internal information about *dev_id* to the FILE* provided in *fd*.
756  *
757  * @param dev_id
758  *   The identifier of the device.
759  * @param fd
760  *   A pointer to a file for output.
761  * @return
762  *   - 0: on success.
763  *   - <0: on failure.
764  */
765 __rte_experimental
766 int
767 rte_ml_dev_dump(int16_t dev_id, FILE *fd);
768 
769 /**
770  * Trigger the ML device self test.
771  *
772  * @param dev_id
773  *   The identifier of the device.
774  * @return
775  *   - 0: Selftest successful.
776  *   - -ENOTSUP: if the device doesn't support selftest.
777  *   - other values < 0 on failure.
778  */
779 __rte_experimental
780 int
781 rte_ml_dev_selftest(int16_t dev_id);
782 
783 /* Model operations */
784 
785 /** ML model load parameters
786  *
787  * Parameters required to load an ML model.
788  */
789 struct rte_ml_model_params {
790 	void *addr;
791 	/**< Address of model buffer */
792 	size_t size;
793 	/**< Size of model buffer */
794 };
795 
796 /**
797  * Load an ML model to the device.
798  *
799  * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
800  *
801  * @param[in] dev_id
802  *   The identifier of the device.
803  * @param[in] params
804  *   Parameters for the model to be loaded.
805  * @param[out] model_id
806  *   Identifier of the model loaded.
807  *
808  * @return
809  *   - 0: Success, Model loaded.
810  *   - < 0: Failure, Error code of the model load driver function.
811  */
812 __rte_experimental
813 int
814 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
815 
816 /**
817  * Unload an ML model from the device.
818  *
819  * @param[in] dev_id
820  *   The identifier of the device.
821  * @param[in] model_id
822  *   Identifier of the model to be unloaded.
823  *
824  * @return
825  *   - 0: Success, Model unloaded.
826  *   - < 0: Failure, Error code of the model unload driver function.
827  */
828 __rte_experimental
829 int
830 rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
831 
832 /**
833  * Start an ML model for the given device ID.
834  *
835  * Start an ML model to accept inference requests.
836  *
837  * @param[in] dev_id
838  *   The identifier of the device.
839  * @param[in] model_id
840  *   Identifier of the model to be started.
841  *
842  * @return
843  *   - 0: Success, Model loaded.
844  *   - < 0: Failure, Error code of the model start driver function.
845  */
846 __rte_experimental
847 int
848 rte_ml_model_start(int16_t dev_id, uint16_t model_id);
849 
850 /**
851  * Stop an ML model for the given device ID.
852  *
853  * Model stop would disable the ML model to be used for inference jobs.
854  * All inference jobs must have been completed before model stop is attempted.
855 
856  * @param[in] dev_id
857  *   The identifier of the device.
858  * @param[in] model_id
859  *   Identifier of the model to be stopped.
860  *
861  * @return
862  *   - 0: Success, Model unloaded.
863  *   - < 0: Failure, Error code of the model stop driver function.
864  */
865 __rte_experimental
866 int
867 rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
868 
869 /**
870  * Input and output data types. ML models can operate on reduced precision
871  * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
872  * This enum is used to represent the lower precision integer and floating point types used
873  * by ML models.
874  */
875 enum rte_ml_io_type {
876 	RTE_ML_IO_TYPE_UNKNOWN = 0,
877 	/**< Invalid or unknown type */
878 	RTE_ML_IO_TYPE_INT8,
879 	/**< 8-bit integer */
880 	RTE_ML_IO_TYPE_UINT8,
881 	/**< 8-bit unsigned integer */
882 	RTE_ML_IO_TYPE_INT16,
883 	/**< 16-bit integer */
884 	RTE_ML_IO_TYPE_UINT16,
885 	/**< 16-bit unsigned integer */
886 	RTE_ML_IO_TYPE_INT32,
887 	/**< 32-bit integer */
888 	RTE_ML_IO_TYPE_UINT32,
889 	/**< 32-bit unsigned integer */
890 	RTE_ML_IO_TYPE_INT64,
891 	/**< 32-bit integer */
892 	RTE_ML_IO_TYPE_UINT64,
893 	/**< 32-bit unsigned integer */
894 	RTE_ML_IO_TYPE_FP8,
895 	/**< 8-bit floating point number */
896 	RTE_ML_IO_TYPE_FP16,
897 	/**< IEEE 754 16-bit floating point number */
898 	RTE_ML_IO_TYPE_FP32,
899 	/**< IEEE 754 32-bit floating point number */
900 	RTE_ML_IO_TYPE_BFLOAT16
901 	/**< 16-bit brain floating point number. */
902 };
903 
904 /** ML I/O buffer layout */
905 enum rte_ml_io_layout {
906 	RTE_ML_IO_LAYOUT_PACKED,
907 	/**< All inputs for the model should packed in a single buffer with
908 	 * no padding between individual inputs. The buffer is expected to
909 	 * be aligned to rte_ml_dev_info::align_size.
910 	 *
911 	 * When I/O segmentation is supported by the device, the packed
912 	 * data can be split into multiple segments. In this case, each
913 	 * segment is expected to be aligned to rte_ml_dev_info::align_size
914 	 *
915 	 * Same applies to output.
916 	 *
917 	 * @see struct rte_ml_dev_info::max_segments
918 	 */
919 	RTE_ML_IO_LAYOUT_SPLIT
920 	/**< Each input for the model should be stored as separate buffers
921 	 * and each input should be aligned to rte_ml_dev_info::align_size.
922 	 *
923 	 * When I/O segmentation is supported, each input can be split into
924 	 * multiple segments. In this case, each segment is expected to be
925 	 * aligned to rte_ml_dev_info::align_size
926 	 *
927 	 * Same applies to output.
928 	 *
929 	 * @see struct rte_ml_dev_info::max_segments
930 	 */
931 };
932 
933 /**
934  * Input and output data information structure
935  *
936  * Specifies the type and shape of input and output data.
937  */
938 struct rte_ml_io_info {
939 	char name[RTE_ML_STR_MAX];
940 	/**< Name of data */
941 	uint32_t nb_dims;
942 	/**< Number of dimensions in shape */
943 	uint32_t *shape;
944 	/**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */
945 	enum rte_ml_io_type type;
946 	/**< Type of data
947 	 * @see enum rte_ml_io_type
948 	 */
949 	uint64_t nb_elements;
950 	/**< Number of elements in tensor */
951 	uint64_t size;
952 	/**< Size of tensor in bytes */
953 	float scale;
954 	/**< Scale factor */
955 	int64_t zero_point;
956 	/**< Zero point */
957 };
958 
959 /** Model information structure */
960 struct rte_ml_model_info {
961 	char name[RTE_ML_STR_MAX];
962 	/**< Model name. */
963 	char version[RTE_ML_STR_MAX];
964 	/**< Model version */
965 	uint16_t model_id;
966 	/**< Model ID */
967 	uint16_t device_id;
968 	/**< Device ID */
969 	enum rte_ml_io_layout io_layout;
970 	/**< I/O buffer layout for the model */
971 	uint16_t min_batches;
972 	/**< Minimum number of batches that the model can process
973 	 * in one inference request
974 	 */
975 	uint16_t max_batches;
976 	/**< Maximum number of batches that the model can process
977 	 * in one inference request
978 	 */
979 	uint32_t nb_inputs;
980 	/**< Number of inputs */
981 	const struct rte_ml_io_info *input_info;
982 	/**< Input info array. Array size is equal to nb_inputs */
983 	uint32_t nb_outputs;
984 	/**< Number of outputs */
985 	const struct rte_ml_io_info *output_info;
986 	/**< Output info array. Array size is equal to nb_output */
987 	uint64_t wb_size;
988 	/**< Size of model weights and bias */
989 };
990 
991 /**
992  * Get ML model information.
993  *
994  * @param[in] dev_id
995  *   The identifier of the device.
996  * @param[in] model_id
997  *   Identifier for the model created
998  * @param[out] model_info
999  *   Pointer to a model info structure
1000  *
1001  * @return
1002  *   - Returns 0 on success
1003  *   - Returns negative value on failure
1004  */
1005 __rte_experimental
1006 int
1007 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
1008 
1009 /**
1010  * Update the model parameters without unloading model.
1011  *
1012  * Update model parameters such as weights and bias without unloading the model.
1013  * rte_ml_model_stop() must be called before invoking this API.
1014  *
1015  * @param[in] dev_id
1016  *   The identifier of the device.
1017  * @param[in] model_id
1018  *   Identifier for the model created
1019  * @param[in] buffer
1020  *   Pointer to the model weights and bias buffer.
1021  * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
1022  *
1023  * @return
1024  *   - Returns 0 on success
1025  *   - Returns negative value on failure
1026  */
1027 __rte_experimental
1028 int
1029 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
1030 
1031 /* IO operations */
1032 
1033 /**
1034  * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit
1035  * integer format (INT8).
1036  *
1037  * @param[in] fp32
1038  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1039  * @param[out] i8
1040  *      Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
1041  * @param[in] nb_elements
1042  *      Number of elements in the buffer.
1043  * @param[in] scale
1044  *      Scale factor for conversion.
1045  * @param[in] zero_point
1046  *      Zero point for conversion.
1047  *
1048  * @return
1049  *      - 0, Success.
1050  *      - < 0, Error code on failure.
1051  */
1052 __rte_experimental
1053 int
1054 rte_ml_io_float32_to_int8(const void *fp32, void *i8, uint64_t nb_elements, float scale,
1055 			  int8_t zero_point);
1056 
1057 /**
1058  * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision
1059  * floating format (float32).
1060  *
1061  * @param[in] i8
1062  *      Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
1063  * @param[out] fp32
1064  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1065  * @param[in] nb_elements
1066  *      Number of elements in the buffer.
1067  * @param[in] scale
1068  *      Scale factor for conversion.
1069  * @param[in] zero_point
1070  *      Zero point for conversion.
1071  *
1072  * @return
1073  *      - 0, Success.
1074  *      - < 0, Error code on failure.
1075  */
1076 __rte_experimental
1077 int
1078 rte_ml_io_int8_to_float32(const void *i8, void *fp32, uint64_t nb_elements, float scale,
1079 			  int8_t zero_point);
1080 
1081 /**
1082  * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
1083  * 8-bit integer format (UINT8).
1084  *
1085  * @param[in] fp32
1086  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1087  * @param[out] ui8
1088  *      Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
1089  * @param[in] nb_elements
1090  *      Number of elements in the buffer.
1091  * @param[in] scale
1092  *      Scale factor for conversion.
1093  * @param[in] zero_point
1094  *      Zero point for conversion.
1095  *
1096  * @return
1097  *      - 0, Success.
1098  *      - < 0, Error code on failure.
1099  */
1100 __rte_experimental
1101 int
1102 rte_ml_io_float32_to_uint8(const void *fp32, void *ui8, uint64_t nb_elements, float scale,
1103 			   uint8_t zero_point);
1104 
1105 /**
1106  * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision
1107  * floating format (float32).
1108  *
1109  * @param[in] ui8
1110  *      Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
1111  * @param[out] fp32
1112  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1113  * @param[in] nb_elements
1114  *      Number of elements in the buffer.
1115  * @param[in] scale
1116  *      Scale factor for conversion.
1117  * @param[in] zero_point
1118  *      Zero point for conversion.
1119  *
1120  * @return
1121  *      - 0, Success.
1122  *      - < 0, Error code on failure.
1123  */
1124 __rte_experimental
1125 int
1126 rte_ml_io_uint8_to_float32(const void *ui8, void *fp32, uint64_t nb_elements, float scale,
1127 			   uint8_t zero_point);
1128 
1129 /**
1130  * Convert a buffer containing numbers in single precision floating format (float32) to signed
1131  * 16-bit integer format (INT16).
1132  *
1133  * @param[in] fp32
1134  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1135  * @param[out] i16
1136  *      Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1137  * @param[in] nb_elements
1138  *      Number of elements in the buffer.
1139  * @param[in] scale
1140  *      Scale factor for conversion.
1141  * @param[in] zero_point
1142  *      Zero point for conversion.
1143  *
1144  * @return
1145  *      - 0, Success.
1146  *      - < 0, Error code on failure.
1147  */
1148 __rte_experimental
1149 int
1150 rte_ml_io_float32_to_int16(const void *fp32, void *i16, uint64_t nb_elements, float scale,
1151 			   int16_t zero_point);
1152 
1153 /**
1154  * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision
1155  * floating format (float32).
1156  *
1157  * @param[in] i16
1158  *      Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1159  * @param[out] fp32
1160  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1161  * @param[in] nb_elements
1162  *      Number of elements in the buffer.
1163  * @param[in] scale
1164  *      Scale factor for conversion.
1165  * @param[in] zero_point
1166  *      Zero point for conversion.
1167  *
1168  * @return
1169  *      - 0, Success.
1170  *      - < 0, Error code on failure.
1171  */
1172 __rte_experimental
1173 int
1174 rte_ml_io_int16_to_float32(const void *i16, void *fp32, uint64_t nb_elements, float scale,
1175 			   int16_t zero_point);
1176 
1177 /**
1178  * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
1179  * 16-bit integer format (UINT16).
1180  *
1181  * @param[in] fp32
1182  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1183  * @param[out] ui16
1184  *      Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1185  * @param[in] nb_elements
1186  *      Number of elements in the buffer.
1187  * @param[in] scale
1188  *      Scale factor for conversion.
1189  * @param[in] zero_point
1190  *      Zero point for conversion.
1191  *
1192  * @return
1193  *      - 0, Success.
1194  *      - < 0, Error code on failure.
1195  */
1196 __rte_experimental
1197 int
1198 rte_ml_io_float32_to_uint16(const void *fp32, void *ui16, uint64_t nb_elements, float scale,
1199 			    uint16_t zero_point);
1200 
1201 /**
1202  * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single
1203  * precision floating format (float32).
1204  *
1205  * @param[in] ui16
1206  *      Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1207  * @param[out] fp32
1208  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1209  * @param[in] nb_elements
1210  *      Number of elements in the buffer.
1211  * @param[in] scale
1212  *      Scale factor for conversion.
1213  * @param[in] zero_point
1214  *      Zero point for conversion.
1215  *
1216  * @return
1217  *      - 0, Success.
1218  *      - < 0, Error code on failure.
1219  */
1220 __rte_experimental
1221 int
1222 rte_ml_io_uint16_to_float32(const void *ui16, void *fp32, uint64_t nb_elements, float scale,
1223 			    uint16_t zero_point);
1224 
1225 /**
1226  * Convert a buffer containing numbers in single precision floating format (float32) to signed
1227  * 32-bit integer format (INT32).
1228  *
1229  * @param[in] fp32
1230  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1231  * @param[out] i32
1232  *      Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1233  * @param[in] nb_elements
1234  *      Number of elements in the buffer.
1235  * @param[in] scale
1236  *      Scale factor for conversion.
1237  * @param[in] zero_point
1238  *      Zero point for conversion.
1239  *
1240  * @return
1241  *      - 0, Success.
1242  *      - < 0, Error code on failure.
1243  */
1244 __rte_experimental
1245 int
1246 rte_ml_io_float32_to_int32(const void *fp32, void *i32, uint64_t nb_elements, float scale,
1247 			   int32_t zero_point);
1248 
1249 /**
1250  * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision
1251  * floating format (float32).
1252  *
1253  * @param[in] i32
1254  *      Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1255  * @param[out] fp32
1256  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1257  * @param[in] nb_elements
1258  *      Number of elements in the buffer.
1259  * @param[in] scale
1260  *      Scale factor for conversion.
1261  * @param[in] zero_point
1262  *      Zero point for conversion.
1263  *
1264  * @return
1265  *      - 0, Success.
1266  *      - < 0, Error code on failure.
1267  */
1268 
1269 __rte_experimental
1270 int
1271 rte_ml_io_int32_to_float32(const void *i32, void *fp32, uint64_t nb_elements, float scale,
1272 			   int32_t zero_point);
1273 
1274 /**
1275  * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
1276  * 32-bit integer format (UINT32).
1277  *
1278  * @param[in] fp32
1279  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1280  * @param[out] ui32
1281  *      Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1282  * @param[in] nb_elements
1283  *      Number of elements in the buffer.
1284  * @param[in] scale
1285  *      Scale factor for conversion.
1286  * @param[in] zero_point
1287  *      Zero point for conversion.
1288  *
1289  * @return
1290  *      - 0, Success.
1291  *      - < 0, Error code on failure.
1292  */
1293 __rte_experimental
1294 int
1295 rte_ml_io_float32_to_uint32(const void *fp32, void *ui32, uint64_t nb_elements, float scale,
1296 			    uint32_t zero_point);
1297 
1298 /**
1299  * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single
1300  * precision floating format (float32).
1301  *
1302  * @param[in] ui32
1303  *      Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1304  * @param[out] fp32
1305  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1306  * @param[in] nb_elements
1307  *      Number of elements in the buffer.
1308  * @param[in] scale
1309  *      Scale factor for conversion.
1310  * @param[in] zero_point
1311  *      Zero point for conversion.
1312  *
1313  * @return
1314  *      - 0, Success.
1315  *      - < 0, Error code on failure.
1316  */
1317 __rte_experimental
1318 int
1319 rte_ml_io_uint32_to_float32(const void *ui32, void *fp32, uint64_t nb_elements, float scale,
1320 			    uint32_t zero_point);
1321 
1322 /**
1323  * Convert a buffer containing numbers in single precision floating format (float32) to signed
1324  * 64-bit integer format (INT64).
1325  *
1326  * @param[in] fp32
1327  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1328  * @param[out] i64
1329  *      Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1330  * @param[in] nb_elements
1331  *      Number of elements in the buffer.
1332  * @param[in] scale
1333  *      Scale factor for conversion.
1334  * @param[in] zero_point
1335  *      Zero point for conversion.
1336  *
1337  * @return
1338  *      - 0, Success.
1339  *      - < 0, Error code on failure.
1340  */
1341 __rte_experimental
1342 int
1343 rte_ml_io_float32_to_int64(const void *fp32, void *i64, uint64_t nb_elements, float scale,
1344 			   int64_t zero_point);
1345 
1346 /**
1347  * Convert a buffer containing numbers in signed 64-bit integer format (INT64) to single precision
1348  * floating format (float32).
1349  *
1350  * @param[in] i64
1351  *      Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1352  * @param[out] fp32
1353  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1354  * @param[in] nb_elements
1355  *      Number of elements in the buffer.
1356  * @param[in] scale
1357  *      Scale factor for conversion.
1358  * @param[in] zero_point
1359  *      Zero point for conversion.
1360  *
1361  * @return
1362  *      - 0, Success.
1363  *      - < 0, Error code on failure.
1364  */
1365 __rte_experimental
1366 int
1367 rte_ml_io_int64_to_float32(const void *i64, void *fp32, uint64_t nb_elements, float scale,
1368 			   int64_t zero_point);
1369 
1370 /**
1371  * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
1372  * 64-bit integer format (UINT64).
1373  *
1374  * @param[in] fp32
1375  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1376  * @param[out] ui64
1377  *      Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1378  * @param[in] nb_elements
1379  *      Number of elements in the buffer.
1380  * @param[in] scale
1381  *      Scale factor for conversion.
1382  * @param[in] zero_point
1383  *      Zero point for conversion.
1384  *
1385  * @return
1386  *      - 0, Success.
1387  *      - < 0, Error code on failure.
1388  */
1389 __rte_experimental
1390 int
1391 rte_ml_io_float32_to_uint64(const void *fp32, void *ui64, uint64_t nb_elements, float scale,
1392 			    uint64_t zero_point);
1393 
1394 /**
1395  * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64) to single
1396  *precision floating format (float32).
1397  *
1398  * @param[in] ui64
1399  *      Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1400  * @param[out] fp32
1401  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1402  * @param[in] nb_elements
1403  *      Number of elements in the buffer.
1404  * @param[in] scale
1405  *      Scale factor for conversion.
1406  * @param[in] zero_point
1407  *      Zero point for conversion.
1408  *
1409  * @return
1410  *      - 0, Success.
1411  *      - < 0, Error code on failure.
1412  */
1413 __rte_experimental
1414 int
1415 rte_ml_io_uint64_to_float32(const void *ui64, void *fp32, uint64_t nb_elements, float scale,
1416 			    uint64_t zero_point);
1417 
1418 /**
1419  * Convert a buffer containing numbers in single precision floating format (float32) to half
1420  * precision floating point format (FP16).
1421  *
1422  * @param[in] fp32
1423  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
1424  * @param[out] fp16
1425  *      Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1426  * @param[in] nb_elements
1427  *      Number of elements in the buffer.
1428  *
1429  * @return
1430  *      - 0, Success.
1431  *      - < 0, Error code on failure.
1432  */
1433 __rte_experimental
1434 int
1435 rte_ml_io_float32_to_float16(const void *fp32, void *fp16, uint64_t nb_elements);
1436 
1437 /**
1438  * Convert a buffer containing numbers in half precision floating format (FP16) to single precision
1439  * floating point format (float32).
1440  *
1441  * @param[in] fp16
1442  *      Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1443  * @param[out] fp32
1444  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1445  * @param[in] nb_elements
1446  *      Number of elements in the buffer.
1447  *
1448  * @return
1449  *      - 0, Success.
1450  *      - < 0, Error code on failure.
1451  */
1452 __rte_experimental
1453 int
1454 rte_ml_io_float16_to_float32(const void *fp16, void *fp32, uint64_t nb_elements);
1455 
1456 /**
1457  * Convert a buffer containing numbers in single precision floating format (float32) to brain
1458  * floating point format (bfloat16).
1459  *
1460  * @param[in] fp32
1461  *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
1462  * @param[out] bf16
1463  *      Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
1464  * @param[in] nb_elements
1465  *      Number of elements in the buffer.
1466  *
1467  * @return
1468  *      - 0, Success.
1469  *      - < 0, Error code on failure.
1470  */
1471 __rte_experimental
1472 int
1473 rte_ml_io_float32_to_bfloat16(const void *fp32, void *bf16, uint64_t nb_elements);
1474 
1475 /**
1476  * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision
1477  * floating point format (float32).
1478  *
1479  * @param[in] bf16
1480  *      Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2)
1481  * bytes.
1482  * @param[out] fp32
1483  *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
1484  * @param[in] nb_elements
1485  *      Number of elements in the buffer.
1486  *
1487  * @return
1488  *      - 0, Success.
1489  *      - < 0, Error code on failure.
1490  */
1491 __rte_experimental
1492 int
1493 rte_ml_io_bfloat16_to_float32(const void *bf16, void *fp32, uint64_t nb_elements);
1494 
1495 /**
1496  * Quantize input data.
1497  *
1498  * Quantization converts data from a higher precision types to a lower precision types to improve
1499  * the throughput and efficiency of the model execution with minimal loss of accuracy.
1500  * Types of dequantized data and quantized data are specified by the model.
1501  *
1502  * @param[in] dev_id
1503  *   The identifier of the device.
1504  * @param[in] model_id
1505  *   Identifier for the model
1506  * @param[in] dbuffer
1507  *   Address of dequantized input data
1508  * @param[in] qbuffer
1509  *   Address of quantized input data
1510  *
1511  * @return
1512  *   - Returns 0 on success
1513  *   - Returns negative value on failure
1514  */
1515 __rte_experimental
1516 int
1517 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,
1518 		   struct rte_ml_buff_seg **qbuffer);
1519 
1520 /**
1521  * Dequantize output data.
1522  *
1523  * Dequantization converts data from a lower precision type to a higher precision type.
1524  * Types of quantized data and dequantized are specified by the model.
1525  *
1526  * @param[in] dev_id
1527  *   The identifier of the device.
1528  * @param[in] model_id
1529  *   Identifier for the model
1530  * @param[in] qbuffer
1531  *   Address of quantized output data
1532  * @param[in] dbuffer
1533  *   Address of dequantized output data
1534  *
1535  * @return
1536  *   - Returns 0 on success
1537  *   - Returns negative value on failure
1538  */
1539 __rte_experimental
1540 int
1541 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,
1542 		     struct rte_ml_buff_seg **dbuffer);
1543 
1544 /* ML op pool operations */
1545 
1546 /**
1547  * Create an ML operation pool
1548  *
1549  * @param name
1550  *   ML operations pool name
1551  * @param nb_elts
1552  *   Number of elements in pool
1553  * @param cache_size
1554  *   Number of elements to cache on lcore, see
1555  *   *rte_mempool_create* for further details about cache size
1556  * @param user_size
1557  *   Size of private data to allocate for user with each operation
1558  * @param socket_id
1559  *   Socket to identifier allocate memory on
1560  * @return
1561  *  - On success pointer to mempool
1562  *  - On failure NULL
1563  */
1564 __rte_experimental
1565 struct rte_mempool *
1566 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
1567 		      uint16_t user_size, int socket_id);
1568 
1569 /**
1570  * Free an ML operation pool
1571  *
1572  * @param mempool
1573  *   A pointer to the mempool structure.
1574  *   If NULL then, the function does nothing.
1575  */
1576 __rte_experimental
1577 void
1578 rte_ml_op_pool_free(struct rte_mempool *mempool);
1579 
1580 #ifdef __cplusplus
1581 }
1582 #endif
1583 
1584 #endif /* RTE_MLDEV_H */
1585