xref: /dpdk/lib/mldev/rte_mldev.h (revision d82cac584f84f08337d7219f76c9374222fb289a)
1*d82cac58SJerin Jacob /* SPDX-License-Identifier: BSD-3-Clause
2*d82cac58SJerin Jacob  * Copyright (c) 2022 Marvell.
3*d82cac58SJerin Jacob  */
4*d82cac58SJerin Jacob 
5*d82cac58SJerin Jacob #ifndef RTE_MLDEV_H
6*d82cac58SJerin Jacob #define RTE_MLDEV_H
7*d82cac58SJerin Jacob 
8*d82cac58SJerin Jacob /**
9*d82cac58SJerin Jacob  * @file rte_mldev.h
10*d82cac58SJerin Jacob  *
11*d82cac58SJerin Jacob  * @warning
12*d82cac58SJerin Jacob  * @b EXPERIMENTAL:
13*d82cac58SJerin Jacob  * All functions in this file may be changed or removed without prior notice.
14*d82cac58SJerin Jacob  *
15*d82cac58SJerin Jacob  * ML (Machine Learning) device API.
16*d82cac58SJerin Jacob  *
17*d82cac58SJerin Jacob  * The ML framework is built on the following model:
18*d82cac58SJerin Jacob  *
19*d82cac58SJerin Jacob  *
20*d82cac58SJerin Jacob  *     +-----------------+               rte_ml_[en|de]queue_burst()
21*d82cac58SJerin Jacob  *     |                 |                          |
22*d82cac58SJerin Jacob  *     |     Machine     o------+     +--------+    |
23*d82cac58SJerin Jacob  *     |     Learning    |      |     | queue  |    |    +------+
24*d82cac58SJerin Jacob  *     |     Inference   o------+-----o        |<===o===>|Core 0|
25*d82cac58SJerin Jacob  *     |     Engine      |      |     | pair 0 |         +------+
26*d82cac58SJerin Jacob  *     |                 o----+ |     +--------+
27*d82cac58SJerin Jacob  *     |                 |    | |
28*d82cac58SJerin Jacob  *     +-----------------+    | |     +--------+
29*d82cac58SJerin Jacob  *              ^             | |     | queue  |         +------+
30*d82cac58SJerin Jacob  *              |             | +-----o        |<=======>|Core 1|
31*d82cac58SJerin Jacob  *              |             |       | pair 1 |         +------+
32*d82cac58SJerin Jacob  *              |             |       +--------+
33*d82cac58SJerin Jacob  *     +--------+--------+    |
34*d82cac58SJerin Jacob  *     | +-------------+ |    |       +--------+
35*d82cac58SJerin Jacob  *     | |   Model 0   | |    |       | queue  |         +------+
36*d82cac58SJerin Jacob  *     | +-------------+ |    +-------o        |<=======>|Core N|
37*d82cac58SJerin Jacob  *     | +-------------+ |            | pair N |         +------+
38*d82cac58SJerin Jacob  *     | |   Model 1   | |            +--------+
39*d82cac58SJerin Jacob  *     | +-------------+ |
40*d82cac58SJerin Jacob  *     | +-------------+ |<------> rte_ml_model_load()
41*d82cac58SJerin Jacob  *     | |   Model ..  | |-------> rte_ml_model_info_get()
42*d82cac58SJerin Jacob  *     | +-------------+ |<------- rte_ml_model_start()
43*d82cac58SJerin Jacob  *     | +-------------+ |<------- rte_ml_model_stop()
44*d82cac58SJerin Jacob  *     | |   Model N   | |<------- rte_ml_model_params_update()
45*d82cac58SJerin Jacob  *     | +-------------+ |<------- rte_ml_model_unload()
46*d82cac58SJerin Jacob  *     +-----------------+
47*d82cac58SJerin Jacob  *
48*d82cac58SJerin Jacob  * ML Device: A hardware or software-based implementation of ML device API for
49*d82cac58SJerin Jacob  * running inferences using a pre-trained ML model.
50*d82cac58SJerin Jacob  *
51*d82cac58SJerin Jacob  * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
52*d82cac58SJerin Jacob  * procedure/algorithm and data/pattern required to make predictions on live data.
53*d82cac58SJerin Jacob  * Once the model is created and trained outside of the DPDK scope, the model can be loaded
54*d82cac58SJerin Jacob  * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
55*d82cac58SJerin Jacob  * The rte_ml_model_params_update() can be used to update the model parameters such as weight
56*d82cac58SJerin Jacob  * and bias without unloading the model using rte_ml_model_unload().
57*d82cac58SJerin Jacob  *
58*d82cac58SJerin Jacob  * ML Inference: ML inference is the process of feeding data to the model via
59*d82cac58SJerin Jacob  * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
60*d82cac58SJerin Jacob  * outputs/predictions from the started model.
61*d82cac58SJerin Jacob  *
62*d82cac58SJerin Jacob  * In all functions of the ML device API, the ML device is designated by an
63*d82cac58SJerin Jacob  * integer >= 0 named as device identifier *dev_id*.
64*d82cac58SJerin Jacob  *
65*d82cac58SJerin Jacob  * The functions exported by the ML device API to setup a device designated by
66*d82cac58SJerin Jacob  * its device identifier must be invoked in the following order:
67*d82cac58SJerin Jacob  *
68*d82cac58SJerin Jacob  *      - rte_ml_dev_configure()
69*d82cac58SJerin Jacob  *      - rte_ml_dev_queue_pair_setup()
70*d82cac58SJerin Jacob  *      - rte_ml_dev_start()
71*d82cac58SJerin Jacob  *
72*d82cac58SJerin Jacob  * A model is required to run the inference operations with the user specified inputs.
73*d82cac58SJerin Jacob  * Application needs to invoke the ML model API in the following order before queueing
74*d82cac58SJerin Jacob  * inference jobs.
75*d82cac58SJerin Jacob  *
76*d82cac58SJerin Jacob  *      - rte_ml_model_load()
77*d82cac58SJerin Jacob  *      - rte_ml_model_start()
78*d82cac58SJerin Jacob  *
79*d82cac58SJerin Jacob  * A model can be loaded on a device only after the device has been configured and can be
80*d82cac58SJerin Jacob  * started or stopped only after a device has been started.
81*d82cac58SJerin Jacob  *
82*d82cac58SJerin Jacob  * The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
83*d82cac58SJerin Jacob  * The information would include the shape and type of input and output required for the inference.
84*d82cac58SJerin Jacob  *
85*d82cac58SJerin Jacob  * Data quantization and dequantization is one of the main aspects in ML domain. This involves
86*d82cac58SJerin Jacob  * conversion of input data from a higher precision to a lower precision data type and vice-versa
87*d82cac58SJerin Jacob  * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
88*d82cac58SJerin Jacob  * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
89*d82cac58SJerin Jacob  * and output buffers holding data for multiple batches.
90*d82cac58SJerin Jacob  *
91*d82cac58SJerin Jacob  * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
92*d82cac58SJerin Jacob  * size of quantized and de-quantized multi-batch input and output buffers.
93*d82cac58SJerin Jacob  *
94*d82cac58SJerin Jacob  * User can optionally update the model parameters with rte_ml_model_params_update() after
95*d82cac58SJerin Jacob  * invoking rte_ml_model_stop() API on a given model ID.
96*d82cac58SJerin Jacob  *
97*d82cac58SJerin Jacob  * The application can invoke, in any order, the functions exported by the ML API to enqueue
98*d82cac58SJerin Jacob  * inference jobs and dequeue inference response.
99*d82cac58SJerin Jacob  *
100*d82cac58SJerin Jacob  * If the application wants to change the device configuration (i.e., call
101*d82cac58SJerin Jacob  * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
102*d82cac58SJerin Jacob  * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
103*d82cac58SJerin Jacob  * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
104*d82cac58SJerin Jacob  * for the given model. The application does not need to call rte_ml_dev_stop() API for
105*d82cac58SJerin Jacob  * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
106*d82cac58SJerin Jacob  *
107*d82cac58SJerin Jacob  * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
108*d82cac58SJerin Jacob  * start state after invoking rte_ml_model_start() API, then the application can call
109*d82cac58SJerin Jacob  * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
110*d82cac58SJerin Jacob  *
111*d82cac58SJerin Jacob  * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
112*d82cac58SJerin Jacob  *
113*d82cac58SJerin Jacob  * Typical application utilisation of the ML API will follow the following
114*d82cac58SJerin Jacob  * programming flow.
115*d82cac58SJerin Jacob  *
116*d82cac58SJerin Jacob  * - rte_ml_dev_configure()
117*d82cac58SJerin Jacob  * - rte_ml_dev_queue_pair_setup()
118*d82cac58SJerin Jacob  * - rte_ml_model_load()
119*d82cac58SJerin Jacob  * - rte_ml_dev_start()
120*d82cac58SJerin Jacob  * - rte_ml_model_start()
121*d82cac58SJerin Jacob  * - rte_ml_model_info_get()
122*d82cac58SJerin Jacob  * - rte_ml_enqueue_burst()
123*d82cac58SJerin Jacob  * - rte_ml_dequeue_burst()
124*d82cac58SJerin Jacob  * - rte_ml_model_stop()
125*d82cac58SJerin Jacob  * - rte_ml_model_unload()
126*d82cac58SJerin Jacob  * - rte_ml_dev_stop()
127*d82cac58SJerin Jacob  * - rte_ml_dev_close()
128*d82cac58SJerin Jacob  *
129*d82cac58SJerin Jacob  * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
130*d82cac58SJerin Jacob  * are lock-free functions which assume to not be invoked in parallel on different logical cores
131*d82cac58SJerin Jacob  * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
132*d82cac58SJerin Jacob  * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
133*d82cac58SJerin Jacob  * can be invoked in parallel by different logical core on different queue pair.
134*d82cac58SJerin Jacob  * It is the responsibility of the user application to enforce this rule.
135*d82cac58SJerin Jacob  */
136*d82cac58SJerin Jacob 
137*d82cac58SJerin Jacob #include <rte_common.h>
138*d82cac58SJerin Jacob #include <rte_log.h>
139*d82cac58SJerin Jacob #include <rte_mempool.h>
140*d82cac58SJerin Jacob 
141*d82cac58SJerin Jacob #ifdef __cplusplus
142*d82cac58SJerin Jacob extern "C" {
143*d82cac58SJerin Jacob #endif
144*d82cac58SJerin Jacob 
145*d82cac58SJerin Jacob /* Logging Macro */
146*d82cac58SJerin Jacob extern int rte_ml_dev_logtype;
147*d82cac58SJerin Jacob 
148*d82cac58SJerin Jacob #define RTE_MLDEV_LOG(level, fmt, args...)                                                         \
149*d82cac58SJerin Jacob 	rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args)
150*d82cac58SJerin Jacob 
151*d82cac58SJerin Jacob #define RTE_ML_STR_MAX 128
152*d82cac58SJerin Jacob /**< Maximum length of name string */
153*d82cac58SJerin Jacob 
154*d82cac58SJerin Jacob #define RTE_MLDEV_DEFAULT_MAX 32
155*d82cac58SJerin Jacob /** Maximum number of devices if rte_ml_dev_init() is not called. */
156*d82cac58SJerin Jacob 
157*d82cac58SJerin Jacob /* Device operations */
158*d82cac58SJerin Jacob 
159*d82cac58SJerin Jacob /**
160*d82cac58SJerin Jacob  * Initialize the device array before probing devices. If not called, the first device probed would
161*d82cac58SJerin Jacob  * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
162*d82cac58SJerin Jacob  *
163*d82cac58SJerin Jacob  * @param dev_max
164*d82cac58SJerin Jacob  *   Maximum number of devices.
165*d82cac58SJerin Jacob  *
166*d82cac58SJerin Jacob  * @return
167*d82cac58SJerin Jacob  *   0 on success, -rte_errno otherwise:
168*d82cac58SJerin Jacob  *   - ENOMEM if out of memory
169*d82cac58SJerin Jacob  *   - EINVAL if 0 size
170*d82cac58SJerin Jacob  *   - EBUSY if already initialized
171*d82cac58SJerin Jacob  */
172*d82cac58SJerin Jacob __rte_experimental
173*d82cac58SJerin Jacob int
174*d82cac58SJerin Jacob rte_ml_dev_init(size_t dev_max);
175*d82cac58SJerin Jacob 
176*d82cac58SJerin Jacob /**
177*d82cac58SJerin Jacob  * Get the total number of ML devices that have been successfully initialised.
178*d82cac58SJerin Jacob  *
179*d82cac58SJerin Jacob  * @return
180*d82cac58SJerin Jacob  *   - The total number of usable ML devices.
181*d82cac58SJerin Jacob  */
182*d82cac58SJerin Jacob __rte_experimental
183*d82cac58SJerin Jacob uint16_t
184*d82cac58SJerin Jacob rte_ml_dev_count(void);
185*d82cac58SJerin Jacob 
186*d82cac58SJerin Jacob /**
187*d82cac58SJerin Jacob  * Check if the device is in ready state.
188*d82cac58SJerin Jacob  *
189*d82cac58SJerin Jacob  * @param dev_id
190*d82cac58SJerin Jacob  *   The identifier of the device.
191*d82cac58SJerin Jacob  *
192*d82cac58SJerin Jacob  * @return
193*d82cac58SJerin Jacob  *   - 0 if device state is not in ready state.
194*d82cac58SJerin Jacob  *   - 1 if device state is ready state.
195*d82cac58SJerin Jacob  */
196*d82cac58SJerin Jacob __rte_experimental
197*d82cac58SJerin Jacob int
198*d82cac58SJerin Jacob rte_ml_dev_is_valid_dev(int16_t dev_id);
199*d82cac58SJerin Jacob 
200*d82cac58SJerin Jacob /**
201*d82cac58SJerin Jacob  * Return the NUMA socket to which a device is connected.
202*d82cac58SJerin Jacob  *
203*d82cac58SJerin Jacob  * @param dev_id
204*d82cac58SJerin Jacob  *   The identifier of the device.
205*d82cac58SJerin Jacob  *
206*d82cac58SJerin Jacob  * @return
207*d82cac58SJerin Jacob  *   - The NUMA socket id to which the device is connected
208*d82cac58SJerin Jacob  *   - 0 If the socket could not be determined.
209*d82cac58SJerin Jacob  *   - -EINVAL: if the dev_id value is not valid.
210*d82cac58SJerin Jacob  */
211*d82cac58SJerin Jacob __rte_experimental
212*d82cac58SJerin Jacob int
213*d82cac58SJerin Jacob rte_ml_dev_socket_id(int16_t dev_id);
214*d82cac58SJerin Jacob 
215*d82cac58SJerin Jacob /**  ML device information */
216*d82cac58SJerin Jacob struct rte_ml_dev_info {
217*d82cac58SJerin Jacob 	const char *driver_name;
218*d82cac58SJerin Jacob 	/**< Driver name */
219*d82cac58SJerin Jacob 	uint16_t max_models;
220*d82cac58SJerin Jacob 	/**< Maximum number of models supported by the device.
221*d82cac58SJerin Jacob 	 * @see struct rte_ml_dev_config::nb_models
222*d82cac58SJerin Jacob 	 */
223*d82cac58SJerin Jacob 	uint16_t max_queue_pairs;
224*d82cac58SJerin Jacob 	/**< Maximum number of queues pairs supported by the device.
225*d82cac58SJerin Jacob 	 * @see struct rte_ml_dev_config::nb_queue_pairs
226*d82cac58SJerin Jacob 	 */
227*d82cac58SJerin Jacob 	uint16_t max_desc;
228*d82cac58SJerin Jacob 	/**< Maximum allowed number of descriptors for queue pair by the device.
229*d82cac58SJerin Jacob 	 * @see struct rte_ml_dev_qp_conf::nb_desc
230*d82cac58SJerin Jacob 	 */
231*d82cac58SJerin Jacob 	uint16_t max_segments;
232*d82cac58SJerin Jacob 	/**< Maximum number of scatter-gather entries supported by the device.
233*d82cac58SJerin Jacob 	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
234*d82cac58SJerin Jacob 	 */
235*d82cac58SJerin Jacob 	uint16_t min_align_size;
236*d82cac58SJerin Jacob 	/**< Minimum alignment size of IO buffers used by the device. */
237*d82cac58SJerin Jacob };
238*d82cac58SJerin Jacob 
239*d82cac58SJerin Jacob /**
240*d82cac58SJerin Jacob  * Retrieve the information of the device.
241*d82cac58SJerin Jacob  *
242*d82cac58SJerin Jacob  * @param dev_id
243*d82cac58SJerin Jacob  *   The identifier of the device.
244*d82cac58SJerin Jacob  * @param dev_info
245*d82cac58SJerin Jacob  *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
246*d82cac58SJerin Jacob  *
247*d82cac58SJerin Jacob  * @return
248*d82cac58SJerin Jacob  *   - 0: Success, driver updates the information of the ML device
249*d82cac58SJerin Jacob  *   - < 0: Error code returned by the driver info get function.
250*d82cac58SJerin Jacob  */
251*d82cac58SJerin Jacob __rte_experimental
252*d82cac58SJerin Jacob int
253*d82cac58SJerin Jacob rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
254*d82cac58SJerin Jacob 
255*d82cac58SJerin Jacob /** ML device configuration structure */
256*d82cac58SJerin Jacob struct rte_ml_dev_config {
257*d82cac58SJerin Jacob 	int socket_id;
258*d82cac58SJerin Jacob 	/**< Socket to allocate resources on. */
259*d82cac58SJerin Jacob 	uint16_t nb_models;
260*d82cac58SJerin Jacob 	/**< Number of models to be loaded on the device.
261*d82cac58SJerin Jacob 	 * This value cannot exceed the max_models which is previously provided in
262*d82cac58SJerin Jacob 	 * struct rte_ml_dev_info::max_models
263*d82cac58SJerin Jacob 	 */
264*d82cac58SJerin Jacob 	uint16_t nb_queue_pairs;
265*d82cac58SJerin Jacob 	/**< Number of queue pairs to configure on this device.
266*d82cac58SJerin Jacob 	 * This value cannot exceed the max_models which is previously provided in
267*d82cac58SJerin Jacob 	 * struct rte_ml_dev_info::max_queue_pairs
268*d82cac58SJerin Jacob 	 */
269*d82cac58SJerin Jacob };
270*d82cac58SJerin Jacob 
271*d82cac58SJerin Jacob /**
272*d82cac58SJerin Jacob  * Configure an ML device.
273*d82cac58SJerin Jacob  *
274*d82cac58SJerin Jacob  * This function must be invoked first before any other function in the API.
275*d82cac58SJerin Jacob  *
276*d82cac58SJerin Jacob  * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
277*d82cac58SJerin Jacob  * rte_ml_dev_close() is called.
278*d82cac58SJerin Jacob  *
279*d82cac58SJerin Jacob  * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
280*d82cac58SJerin Jacob  * this ML device.
281*d82cac58SJerin Jacob  *
282*d82cac58SJerin Jacob  * @param dev_id
283*d82cac58SJerin Jacob  *   The identifier of the device to configure.
284*d82cac58SJerin Jacob  * @param config
285*d82cac58SJerin Jacob  *   The ML device configuration structure.
286*d82cac58SJerin Jacob  *
287*d82cac58SJerin Jacob  * @return
288*d82cac58SJerin Jacob  *   - 0: Success, device configured.
289*d82cac58SJerin Jacob  *   - < 0: Error code returned by the driver configuration function.
290*d82cac58SJerin Jacob  */
291*d82cac58SJerin Jacob __rte_experimental
292*d82cac58SJerin Jacob int
293*d82cac58SJerin Jacob rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
294*d82cac58SJerin Jacob 
295*d82cac58SJerin Jacob /* Forward declaration */
296*d82cac58SJerin Jacob struct rte_ml_op;
297*d82cac58SJerin Jacob 
298*d82cac58SJerin Jacob /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
299*d82cac58SJerin Jacob typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
300*d82cac58SJerin Jacob 
301*d82cac58SJerin Jacob /** ML device queue pair configuration structure. */
302*d82cac58SJerin Jacob struct rte_ml_dev_qp_conf {
303*d82cac58SJerin Jacob 	uint32_t nb_desc;
304*d82cac58SJerin Jacob 	/**< Number of descriptors per queue pair.
305*d82cac58SJerin Jacob 	 * This value cannot exceed the max_desc which previously provided in
306*d82cac58SJerin Jacob 	 * struct rte_ml_dev_info:max_desc
307*d82cac58SJerin Jacob 	 */
308*d82cac58SJerin Jacob 	rte_ml_dev_stop_flush_t cb;
309*d82cac58SJerin Jacob 	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
310*d82cac58SJerin Jacob 	 * Value NULL is allowed, in which case callback will not be invoked.
311*d82cac58SJerin Jacob 	 * This function can be used to properly dispose of outstanding ML ops from all
312*d82cac58SJerin Jacob 	 * queue pairs, for example ops containing  memory pointers.
313*d82cac58SJerin Jacob 	 * @see rte_ml_dev_stop()
314*d82cac58SJerin Jacob 	 */
315*d82cac58SJerin Jacob };
316*d82cac58SJerin Jacob 
317*d82cac58SJerin Jacob /**
318*d82cac58SJerin Jacob  * Set up a queue pair for a device. This should only be called when the device is stopped.
319*d82cac58SJerin Jacob  *
320*d82cac58SJerin Jacob  * @param dev_id
321*d82cac58SJerin Jacob  *   The identifier of the device.
322*d82cac58SJerin Jacob  * @param queue_pair_id
323*d82cac58SJerin Jacob  *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
324*d82cac58SJerin Jacob  * previously supplied to rte_ml_dev_configure().
325*d82cac58SJerin Jacob  * @param qp_conf
326*d82cac58SJerin Jacob  *   The pointer to the configuration data to be used for the queue pair.
327*d82cac58SJerin Jacob  * @param socket_id
328*d82cac58SJerin Jacob  *   The *socket_id* argument is the socket identifier in case of NUMA.
329*d82cac58SJerin Jacob  * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
330*d82cac58SJerin Jacob  * for the queue pair.
331*d82cac58SJerin Jacob  *
332*d82cac58SJerin Jacob  * @return
333*d82cac58SJerin Jacob  *   - 0: Success, queue pair correctly set up.
334*d82cac58SJerin Jacob  *   - < 0: Queue pair configuration failed.
335*d82cac58SJerin Jacob  */
336*d82cac58SJerin Jacob __rte_experimental
337*d82cac58SJerin Jacob int
338*d82cac58SJerin Jacob rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
339*d82cac58SJerin Jacob 			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
340*d82cac58SJerin Jacob 
341*d82cac58SJerin Jacob /**
342*d82cac58SJerin Jacob  * Start an ML device.
343*d82cac58SJerin Jacob  *
344*d82cac58SJerin Jacob  * The device start step consists of setting the configured features and enabling the ML device
345*d82cac58SJerin Jacob  * to accept inference jobs.
346*d82cac58SJerin Jacob  *
347*d82cac58SJerin Jacob  * @param dev_id
348*d82cac58SJerin Jacob  *   The identifier of the device.
349*d82cac58SJerin Jacob  *
350*d82cac58SJerin Jacob  * @return
351*d82cac58SJerin Jacob  *   - 0: Success, device started.
352*d82cac58SJerin Jacob  *   - <0: Error code of the driver device start function.
353*d82cac58SJerin Jacob  */
354*d82cac58SJerin Jacob __rte_experimental
355*d82cac58SJerin Jacob int
356*d82cac58SJerin Jacob rte_ml_dev_start(int16_t dev_id);
357*d82cac58SJerin Jacob 
358*d82cac58SJerin Jacob /**
359*d82cac58SJerin Jacob  * Stop an ML device. A stopped device cannot accept inference jobs.
360*d82cac58SJerin Jacob  * The device can be restarted with a call to rte_ml_dev_start().
361*d82cac58SJerin Jacob  *
362*d82cac58SJerin Jacob  * @param dev_id
363*d82cac58SJerin Jacob  *   The identifier of the device.
364*d82cac58SJerin Jacob  *
365*d82cac58SJerin Jacob  * @return
366*d82cac58SJerin Jacob  *   - 0: Success, device stopped.
367*d82cac58SJerin Jacob  *   - <0: Error code of the driver device stop function.
368*d82cac58SJerin Jacob  */
369*d82cac58SJerin Jacob __rte_experimental
370*d82cac58SJerin Jacob int
371*d82cac58SJerin Jacob rte_ml_dev_stop(int16_t dev_id);
372*d82cac58SJerin Jacob 
373*d82cac58SJerin Jacob /**
374*d82cac58SJerin Jacob  * Close an ML device. The device cannot be restarted!
375*d82cac58SJerin Jacob  *
376*d82cac58SJerin Jacob  * @param dev_id
377*d82cac58SJerin Jacob  *   The identifier of the device.
378*d82cac58SJerin Jacob  *
379*d82cac58SJerin Jacob  * @return
380*d82cac58SJerin Jacob  *  - 0 on successfully closing device.
381*d82cac58SJerin Jacob  *  - <0 on failure to close device.
382*d82cac58SJerin Jacob  */
383*d82cac58SJerin Jacob __rte_experimental
384*d82cac58SJerin Jacob int
385*d82cac58SJerin Jacob rte_ml_dev_close(int16_t dev_id);
386*d82cac58SJerin Jacob 
387*d82cac58SJerin Jacob /** Status of ML operation */
388*d82cac58SJerin Jacob enum rte_ml_op_status {
389*d82cac58SJerin Jacob 	RTE_ML_OP_STATUS_SUCCESS = 0,
390*d82cac58SJerin Jacob 	/**< Operation completed successfully */
391*d82cac58SJerin Jacob 	RTE_ML_OP_STATUS_NOT_PROCESSED,
392*d82cac58SJerin Jacob 	/**< Operation has not yet been processed by the device. */
393*d82cac58SJerin Jacob 	RTE_ML_OP_STATUS_ERROR,
394*d82cac58SJerin Jacob 	/**< Operation completed with error.
395*d82cac58SJerin Jacob 	 * Application can invoke rte_ml_op_error_get() to get PMD specific
396*d82cac58SJerin Jacob 	 * error code if needed.
397*d82cac58SJerin Jacob 	 */
398*d82cac58SJerin Jacob };
399*d82cac58SJerin Jacob 
400*d82cac58SJerin Jacob /** ML operation's input and output buffer representation as scatter gather list
401*d82cac58SJerin Jacob  */
402*d82cac58SJerin Jacob struct rte_ml_buff_seg {
403*d82cac58SJerin Jacob 	rte_iova_t iova_addr;
404*d82cac58SJerin Jacob 	/**< IOVA address of segment buffer. */
405*d82cac58SJerin Jacob 	void *addr;
406*d82cac58SJerin Jacob 	/**< Virtual address of segment buffer. */
407*d82cac58SJerin Jacob 	uint32_t length;
408*d82cac58SJerin Jacob 	/**< Segment length. */
409*d82cac58SJerin Jacob 	uint32_t reserved;
410*d82cac58SJerin Jacob 	/**< Reserved for future use. */
411*d82cac58SJerin Jacob 	struct rte_ml_buff_seg *next;
412*d82cac58SJerin Jacob 	/**< Points to next segment. Value NULL represents the last segment. */
413*d82cac58SJerin Jacob };
414*d82cac58SJerin Jacob 
415*d82cac58SJerin Jacob /**
416*d82cac58SJerin Jacob  * ML Operation.
417*d82cac58SJerin Jacob  *
418*d82cac58SJerin Jacob  * This structure contains data related to performing an ML operation on the buffers using
419*d82cac58SJerin Jacob  * the model specified through model_id.
420*d82cac58SJerin Jacob  */
421*d82cac58SJerin Jacob struct rte_ml_op {
422*d82cac58SJerin Jacob 	uint16_t model_id;
423*d82cac58SJerin Jacob 	/**< Model ID to be used for the operation. */
424*d82cac58SJerin Jacob 	uint16_t nb_batches;
425*d82cac58SJerin Jacob 	/**< Number of batches. Minimum value must be one.
426*d82cac58SJerin Jacob 	 * Input buffer must hold inference data for each batch as contiguous.
427*d82cac58SJerin Jacob 	 */
428*d82cac58SJerin Jacob 	uint32_t reserved;
429*d82cac58SJerin Jacob 	/**< Reserved for future use. */
430*d82cac58SJerin Jacob 	struct rte_mempool *mempool;
431*d82cac58SJerin Jacob 	/**< Pool from which operation is allocated. */
432*d82cac58SJerin Jacob 	struct rte_ml_buff_seg input;
433*d82cac58SJerin Jacob 	/**< Input buffer to hold the inference data. */
434*d82cac58SJerin Jacob 	struct rte_ml_buff_seg output;
435*d82cac58SJerin Jacob 	/**< Output buffer to hold the inference output by the driver. */
436*d82cac58SJerin Jacob 	RTE_STD_C11
437*d82cac58SJerin Jacob 	union {
438*d82cac58SJerin Jacob 		uint64_t user_u64;
439*d82cac58SJerin Jacob 		/**< User data as uint64_t.*/
440*d82cac58SJerin Jacob 		void *user_ptr;
441*d82cac58SJerin Jacob 		/**< User data as void*.*/
442*d82cac58SJerin Jacob 	};
443*d82cac58SJerin Jacob 	enum rte_ml_op_status status;
444*d82cac58SJerin Jacob 	/**< Operation status. */
445*d82cac58SJerin Jacob 	uint64_t impl_opaque;
446*d82cac58SJerin Jacob 	/**< Implementation specific opaque value.
447*d82cac58SJerin Jacob 	 * An implementation may use this field to hold
448*d82cac58SJerin Jacob 	 * implementation specific value to share between
449*d82cac58SJerin Jacob 	 * dequeue and enqueue operation.
450*d82cac58SJerin Jacob 	 * The application should not modify this field.
451*d82cac58SJerin Jacob 	 */
452*d82cac58SJerin Jacob } __rte_cache_aligned;
453*d82cac58SJerin Jacob 
454*d82cac58SJerin Jacob /* Enqueue/Dequeue operations */
455*d82cac58SJerin Jacob 
456*d82cac58SJerin Jacob /**
457*d82cac58SJerin Jacob  * Enqueue a burst of ML inferences for processing on an ML device.
458*d82cac58SJerin Jacob  *
459*d82cac58SJerin Jacob  * The rte_ml_enqueue_burst() function is invoked to place ML inference
460*d82cac58SJerin Jacob  * operations on the queue *qp_id* of the device designated by its *dev_id*.
461*d82cac58SJerin Jacob  *
462*d82cac58SJerin Jacob  * The *nb_ops* parameter is the number of inferences to process which are
463*d82cac58SJerin Jacob  * supplied in the *ops* array of *rte_ml_op* structures.
464*d82cac58SJerin Jacob  *
465*d82cac58SJerin Jacob  * The rte_ml_enqueue_burst() function returns the number of inferences it
466*d82cac58SJerin Jacob  * actually enqueued for processing. A return value equal to *nb_ops* means that
467*d82cac58SJerin Jacob  * all packets have been enqueued.
468*d82cac58SJerin Jacob  *
469*d82cac58SJerin Jacob  * @param dev_id
470*d82cac58SJerin Jacob  *   The identifier of the device.
471*d82cac58SJerin Jacob  * @param qp_id
472*d82cac58SJerin Jacob  *   The index of the queue pair which inferences are to be enqueued for processing.
473*d82cac58SJerin Jacob  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
474*d82cac58SJerin Jacob  * *rte_ml_dev_configure*.
475*d82cac58SJerin Jacob  * @param ops
476*d82cac58SJerin Jacob  *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
477*d82cac58SJerin Jacob  * ML inferences to be processed.
478*d82cac58SJerin Jacob  * @param nb_ops
479*d82cac58SJerin Jacob  *   The number of operations to process.
480*d82cac58SJerin Jacob  *
481*d82cac58SJerin Jacob  * @return
482*d82cac58SJerin Jacob  *   The number of inference operations actually enqueued to the ML device.
483*d82cac58SJerin Jacob  * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
484*d82cac58SJerin Jacob  * is full or if invalid parameters are specified in a *rte_ml_op*.
485*d82cac58SJerin Jacob  */
486*d82cac58SJerin Jacob __rte_experimental
487*d82cac58SJerin Jacob uint16_t
488*d82cac58SJerin Jacob rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
489*d82cac58SJerin Jacob 
490*d82cac58SJerin Jacob /**
491*d82cac58SJerin Jacob  * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
492*d82cac58SJerin Jacob  * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
493*d82cac58SJerin Jacob  * in the *ops* array.
494*d82cac58SJerin Jacob  *
495*d82cac58SJerin Jacob  * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
496*d82cac58SJerin Jacob  * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
497*d82cac58SJerin Jacob  *
498*d82cac58SJerin Jacob  * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
499*d82cac58SJerin Jacob  * and this is likely to signify that other processed operations remain in the devices output queue.
500*d82cac58SJerin Jacob  * Application implementing a "retrieve as many processed operations as possible" policy can check
501*d82cac58SJerin Jacob  * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
502*d82cac58SJerin Jacob  * *nb_ops* is returned.
503*d82cac58SJerin Jacob  *
504*d82cac58SJerin Jacob  * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
505*d82cac58SJerin Jacob  * the corresponding overhead.
506*d82cac58SJerin Jacob  *
507*d82cac58SJerin Jacob  * @param dev_id
508*d82cac58SJerin Jacob  *   The identifier of the device.
509*d82cac58SJerin Jacob  * @param qp_id
510*d82cac58SJerin Jacob  *   The index of the queue pair from which to retrieve processed packets.
511*d82cac58SJerin Jacob  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
512*d82cac58SJerin Jacob  * rte_ml_dev_configure().
513*d82cac58SJerin Jacob  * @param ops
514*d82cac58SJerin Jacob  *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
515*d82cac58SJerin Jacob  * store *nb_ops* pointers in it.
516*d82cac58SJerin Jacob  * @param nb_ops
517*d82cac58SJerin Jacob  *   The maximum number of inferences to dequeue.
518*d82cac58SJerin Jacob  *
519*d82cac58SJerin Jacob  * @return
520*d82cac58SJerin Jacob  *   The number of operations actually dequeued, which is the number of pointers
521*d82cac58SJerin Jacob  * to *rte_ml_op* structures effectively supplied to the *ops* array.
522*d82cac58SJerin Jacob  */
523*d82cac58SJerin Jacob __rte_experimental
524*d82cac58SJerin Jacob uint16_t
525*d82cac58SJerin Jacob rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
526*d82cac58SJerin Jacob 
527*d82cac58SJerin Jacob /**
528*d82cac58SJerin Jacob  * Verbose error structure definition.
529*d82cac58SJerin Jacob  */
530*d82cac58SJerin Jacob struct rte_ml_op_error {
531*d82cac58SJerin Jacob 	char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
532*d82cac58SJerin Jacob 	uint64_t errcode;	      /**< Vendor specific error code. */
533*d82cac58SJerin Jacob };
534*d82cac58SJerin Jacob 
535*d82cac58SJerin Jacob /**
536*d82cac58SJerin Jacob  * Get PMD specific error information for an ML op.
537*d82cac58SJerin Jacob  *
538*d82cac58SJerin Jacob  * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
539*d82cac58SJerin Jacob  * This API allows to get PMD specific error details.
540*d82cac58SJerin Jacob  *
541*d82cac58SJerin Jacob  * @param[in] dev_id
542*d82cac58SJerin Jacob  *   Device identifier
543*d82cac58SJerin Jacob  * @param[in] op
544*d82cac58SJerin Jacob  *   Handle of ML operation
545*d82cac58SJerin Jacob  * @param[in] error
546*d82cac58SJerin Jacob  *   Address of structure rte_ml_op_error to be filled
547*d82cac58SJerin Jacob  *
548*d82cac58SJerin Jacob  * @return
549*d82cac58SJerin Jacob  *   - Returns 0 on success
550*d82cac58SJerin Jacob  *   - Returns negative value on failure
551*d82cac58SJerin Jacob  */
552*d82cac58SJerin Jacob __rte_experimental
553*d82cac58SJerin Jacob int
554*d82cac58SJerin Jacob rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
555*d82cac58SJerin Jacob 
556*d82cac58SJerin Jacob /* Statistics operations */
557*d82cac58SJerin Jacob 
558*d82cac58SJerin Jacob /** Device statistics. */
559*d82cac58SJerin Jacob struct rte_ml_dev_stats {
560*d82cac58SJerin Jacob 	uint64_t enqueued_count;
561*d82cac58SJerin Jacob 	/**< Count of all operations enqueued */
562*d82cac58SJerin Jacob 	uint64_t dequeued_count;
563*d82cac58SJerin Jacob 	/**< Count of all operations dequeued */
564*d82cac58SJerin Jacob 	uint64_t enqueue_err_count;
565*d82cac58SJerin Jacob 	/**< Total error count on operations enqueued */
566*d82cac58SJerin Jacob 	uint64_t dequeue_err_count;
567*d82cac58SJerin Jacob 	/**< Total error count on operations dequeued */
568*d82cac58SJerin Jacob };
569*d82cac58SJerin Jacob 
570*d82cac58SJerin Jacob /**
571*d82cac58SJerin Jacob  * Retrieve the general I/O statistics of a device.
572*d82cac58SJerin Jacob  *
573*d82cac58SJerin Jacob  * @param dev_id
574*d82cac58SJerin Jacob  *   The identifier of the device.
575*d82cac58SJerin Jacob  * @param stats
576*d82cac58SJerin Jacob  *   Pointer to structure to where statistics will be copied.
577*d82cac58SJerin Jacob  * On error, this location may or may not have been modified.
578*d82cac58SJerin Jacob  * @return
579*d82cac58SJerin Jacob  *   - 0 on success
580*d82cac58SJerin Jacob  *   - -EINVAL: If invalid parameter pointer is provided.
581*d82cac58SJerin Jacob  */
582*d82cac58SJerin Jacob __rte_experimental
583*d82cac58SJerin Jacob int
584*d82cac58SJerin Jacob rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
585*d82cac58SJerin Jacob 
586*d82cac58SJerin Jacob /**
587*d82cac58SJerin Jacob  * Reset the statistics of a device.
588*d82cac58SJerin Jacob  *
589*d82cac58SJerin Jacob  * @param dev_id
590*d82cac58SJerin Jacob  *   The identifier of the device.
591*d82cac58SJerin Jacob  */
592*d82cac58SJerin Jacob __rte_experimental
593*d82cac58SJerin Jacob void
594*d82cac58SJerin Jacob rte_ml_dev_stats_reset(int16_t dev_id);
595*d82cac58SJerin Jacob 
596*d82cac58SJerin Jacob /**
597*d82cac58SJerin Jacob  * A name-key lookup element for extended statistics.
598*d82cac58SJerin Jacob  *
599*d82cac58SJerin Jacob  * This structure is used to map between names and ID numbers for extended ML device statistics.
600*d82cac58SJerin Jacob  */
601*d82cac58SJerin Jacob struct rte_ml_dev_xstats_map {
602*d82cac58SJerin Jacob 	uint16_t id;
603*d82cac58SJerin Jacob 	/**< xstat identifier */
604*d82cac58SJerin Jacob 	char name[RTE_ML_STR_MAX];
605*d82cac58SJerin Jacob 	/**< xstat name */
606*d82cac58SJerin Jacob };
607*d82cac58SJerin Jacob 
608*d82cac58SJerin Jacob /**
609*d82cac58SJerin Jacob  * Retrieve names of extended statistics of an ML device.
610*d82cac58SJerin Jacob  *
611*d82cac58SJerin Jacob  * @param dev_id
612*d82cac58SJerin Jacob  *   The identifier of the device.
613*d82cac58SJerin Jacob  * @param[out] xstats_map
614*d82cac58SJerin Jacob  *   Block of memory to insert id and names into. Must be at least size in capacity.
615*d82cac58SJerin Jacob  * If set to NULL, function returns required capacity.
616*d82cac58SJerin Jacob  * @param size
617*d82cac58SJerin Jacob  *   Capacity of xstats_map (number of name-id maps).
618*d82cac58SJerin Jacob  *
619*d82cac58SJerin Jacob  * @return
620*d82cac58SJerin Jacob  *   - Positive value on success:
621*d82cac58SJerin Jacob  *      - The return value is the number of entries filled in the stats map.
622*d82cac58SJerin Jacob  *      - If xstats_map set to NULL then required capacity for xstats_map.
623*d82cac58SJerin Jacob  *   - Negative value on error:
624*d82cac58SJerin Jacob  *      - -ENODEV: for invalid *dev_id*.
625*d82cac58SJerin Jacob  *      - -ENOTSUP: if the device doesn't support this function.
626*d82cac58SJerin Jacob  */
627*d82cac58SJerin Jacob __rte_experimental
628*d82cac58SJerin Jacob int
629*d82cac58SJerin Jacob rte_ml_dev_xstats_names_get(int16_t dev_id, struct rte_ml_dev_xstats_map *xstats_map,
630*d82cac58SJerin Jacob 			    uint32_t size);
631*d82cac58SJerin Jacob 
632*d82cac58SJerin Jacob /**
633*d82cac58SJerin Jacob  * Retrieve the value of a single stat by requesting it by name.
634*d82cac58SJerin Jacob  *
635*d82cac58SJerin Jacob  * @param dev_id
636*d82cac58SJerin Jacob  *   The identifier of the device.
637*d82cac58SJerin Jacob  * @param name
638*d82cac58SJerin Jacob  *   The stat name to retrieve.
639*d82cac58SJerin Jacob  * @param stat_id
640*d82cac58SJerin Jacob  *   If non-NULL, the numerical id of the stat will be returned, so that further requests for
641*d82cac58SJerin Jacob  * the stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to
642*d82cac58SJerin Jacob  * scan a list of names for the stat.
643*d82cac58SJerin Jacob  * @param[out] value
644*d82cac58SJerin Jacob  *   Must be non-NULL, retrieved xstat value will be stored in this address.
645*d82cac58SJerin Jacob  *
646*d82cac58SJerin Jacob  * @return
647*d82cac58SJerin Jacob  *   - 0: Successfully retrieved xstat value.
648*d82cac58SJerin Jacob  *   - -EINVAL: invalid parameters.
649*d82cac58SJerin Jacob  *   - -ENOTSUP: if not supported.
650*d82cac58SJerin Jacob  */
651*d82cac58SJerin Jacob __rte_experimental
652*d82cac58SJerin Jacob int
653*d82cac58SJerin Jacob rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
654*d82cac58SJerin Jacob 
655*d82cac58SJerin Jacob /**
656*d82cac58SJerin Jacob  * Retrieve extended statistics of an ML device.
657*d82cac58SJerin Jacob  *
658*d82cac58SJerin Jacob  * @param dev_id
659*d82cac58SJerin Jacob  *   The identifier of the device.
660*d82cac58SJerin Jacob  * @param stat_ids
661*d82cac58SJerin Jacob  *   The id numbers of the stats to get. The ids can be fetched from the stat position in the
662*d82cac58SJerin Jacob  * stat list from rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
663*d82cac58SJerin Jacob  * @param values
664*d82cac58SJerin Jacob  *   The values for each stats request by ID.
665*d82cac58SJerin Jacob  * @param nb_ids
666*d82cac58SJerin Jacob  *   The number of stats requested.
667*d82cac58SJerin Jacob  * @return
668*d82cac58SJerin Jacob  *   - Positive value: number of stat entries filled into the values array
669*d82cac58SJerin Jacob  *   - Negative value on error:
670*d82cac58SJerin Jacob  *      - -ENODEV: for invalid *dev_id*.
671*d82cac58SJerin Jacob  *      - -ENOTSUP: if the device doesn't support this function.
672*d82cac58SJerin Jacob  */
673*d82cac58SJerin Jacob __rte_experimental
674*d82cac58SJerin Jacob int
675*d82cac58SJerin Jacob rte_ml_dev_xstats_get(int16_t dev_id, const uint16_t *stat_ids, uint64_t *values, uint16_t nb_ids);
676*d82cac58SJerin Jacob 
677*d82cac58SJerin Jacob /**
678*d82cac58SJerin Jacob  * Reset the values of the xstats of the selected component in the device.
679*d82cac58SJerin Jacob  *
680*d82cac58SJerin Jacob  * @param dev_id
681*d82cac58SJerin Jacob  *   The identifier of the device.
682*d82cac58SJerin Jacob  * @param stat_ids
683*d82cac58SJerin Jacob  *   Selects specific statistics to be reset. When NULL, all statistics will be reset.
684*d82cac58SJerin Jacob  * If non-NULL, must point to array of at least *nb_ids* size.
685*d82cac58SJerin Jacob  * @param nb_ids
686*d82cac58SJerin Jacob  *   The number of ids available from the *ids* array. Ignored when ids is NULL.
687*d82cac58SJerin Jacob  * @return
688*d82cac58SJerin Jacob  *   - 0: Successfully reset the statistics to zero.
689*d82cac58SJerin Jacob  *   - -EINVAL: invalid parameters.
690*d82cac58SJerin Jacob  *   - -ENOTSUP: if not supported.
691*d82cac58SJerin Jacob  */
692*d82cac58SJerin Jacob __rte_experimental
693*d82cac58SJerin Jacob int
694*d82cac58SJerin Jacob rte_ml_dev_xstats_reset(int16_t dev_id, const uint16_t *stat_ids, uint16_t nb_ids);
695*d82cac58SJerin Jacob 
696*d82cac58SJerin Jacob /* Utility operations */
697*d82cac58SJerin Jacob 
698*d82cac58SJerin Jacob /**
699*d82cac58SJerin Jacob  * Dump internal information about *dev_id* to the FILE* provided in *fd*.
700*d82cac58SJerin Jacob  *
701*d82cac58SJerin Jacob  * @param dev_id
702*d82cac58SJerin Jacob  *   The identifier of the device.
703*d82cac58SJerin Jacob  * @param fd
704*d82cac58SJerin Jacob  *   A pointer to a file for output.
705*d82cac58SJerin Jacob  * @return
706*d82cac58SJerin Jacob  *   - 0: on success.
707*d82cac58SJerin Jacob  *   - <0: on failure.
708*d82cac58SJerin Jacob  */
709*d82cac58SJerin Jacob __rte_experimental
710*d82cac58SJerin Jacob int
711*d82cac58SJerin Jacob rte_ml_dev_dump(int16_t dev_id, FILE *fd);
712*d82cac58SJerin Jacob 
713*d82cac58SJerin Jacob /**
714*d82cac58SJerin Jacob  * Trigger the ML device self test.
715*d82cac58SJerin Jacob  *
716*d82cac58SJerin Jacob  * @param dev_id
717*d82cac58SJerin Jacob  *   The identifier of the device.
718*d82cac58SJerin Jacob  * @return
719*d82cac58SJerin Jacob  *   - 0: Selftest successful.
720*d82cac58SJerin Jacob  *   - -ENOTSUP: if the device doesn't support selftest.
721*d82cac58SJerin Jacob  *   - other values < 0 on failure.
722*d82cac58SJerin Jacob  */
723*d82cac58SJerin Jacob __rte_experimental
724*d82cac58SJerin Jacob int
725*d82cac58SJerin Jacob rte_ml_dev_selftest(int16_t dev_id);
726*d82cac58SJerin Jacob 
727*d82cac58SJerin Jacob /* Model operations */
728*d82cac58SJerin Jacob 
729*d82cac58SJerin Jacob /** ML model load parameters
730*d82cac58SJerin Jacob  *
731*d82cac58SJerin Jacob  * Parameters required to load an ML model.
732*d82cac58SJerin Jacob  */
733*d82cac58SJerin Jacob struct rte_ml_model_params {
734*d82cac58SJerin Jacob 	void *addr;
735*d82cac58SJerin Jacob 	/**< Address of model buffer */
736*d82cac58SJerin Jacob 	size_t size;
737*d82cac58SJerin Jacob 	/**< Size of model buffer */
738*d82cac58SJerin Jacob };
739*d82cac58SJerin Jacob 
740*d82cac58SJerin Jacob /**
741*d82cac58SJerin Jacob  * Load an ML model to the device.
742*d82cac58SJerin Jacob  *
743*d82cac58SJerin Jacob  * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
744*d82cac58SJerin Jacob  *
745*d82cac58SJerin Jacob  * @param[in] dev_id
746*d82cac58SJerin Jacob  *   The identifier of the device.
747*d82cac58SJerin Jacob  * @param[in] params
748*d82cac58SJerin Jacob  *   Parameters for the model to be loaded.
749*d82cac58SJerin Jacob  * @param[out] model_id
750*d82cac58SJerin Jacob  *   Identifier of the model loaded.
751*d82cac58SJerin Jacob  *
752*d82cac58SJerin Jacob  * @return
753*d82cac58SJerin Jacob  *   - 0: Success, Model loaded.
754*d82cac58SJerin Jacob  *   - < 0: Failure, Error code of the model load driver function.
755*d82cac58SJerin Jacob  */
756*d82cac58SJerin Jacob __rte_experimental
757*d82cac58SJerin Jacob int
758*d82cac58SJerin Jacob rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
759*d82cac58SJerin Jacob 
760*d82cac58SJerin Jacob /**
761*d82cac58SJerin Jacob  * Unload an ML model from the device.
762*d82cac58SJerin Jacob  *
763*d82cac58SJerin Jacob  * @param[in] dev_id
764*d82cac58SJerin Jacob  *   The identifier of the device.
765*d82cac58SJerin Jacob  * @param[in] model_id
766*d82cac58SJerin Jacob  *   Identifier of the model to be unloaded.
767*d82cac58SJerin Jacob  *
768*d82cac58SJerin Jacob  * @return
769*d82cac58SJerin Jacob  *   - 0: Success, Model unloaded.
770*d82cac58SJerin Jacob  *   - < 0: Failure, Error code of the model unload driver function.
771*d82cac58SJerin Jacob  */
772*d82cac58SJerin Jacob __rte_experimental
773*d82cac58SJerin Jacob int
774*d82cac58SJerin Jacob rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
775*d82cac58SJerin Jacob 
776*d82cac58SJerin Jacob /**
777*d82cac58SJerin Jacob  * Start an ML model for the given device ID.
778*d82cac58SJerin Jacob  *
779*d82cac58SJerin Jacob  * Start an ML model to accept inference requests.
780*d82cac58SJerin Jacob  *
781*d82cac58SJerin Jacob  * @param[in] dev_id
782*d82cac58SJerin Jacob  *   The identifier of the device.
783*d82cac58SJerin Jacob  * @param[in] model_id
784*d82cac58SJerin Jacob  *   Identifier of the model to be started.
785*d82cac58SJerin Jacob  *
786*d82cac58SJerin Jacob  * @return
787*d82cac58SJerin Jacob  *   - 0: Success, Model loaded.
788*d82cac58SJerin Jacob  *   - < 0: Failure, Error code of the model start driver function.
789*d82cac58SJerin Jacob  */
790*d82cac58SJerin Jacob __rte_experimental
791*d82cac58SJerin Jacob int
792*d82cac58SJerin Jacob rte_ml_model_start(int16_t dev_id, uint16_t model_id);
793*d82cac58SJerin Jacob 
794*d82cac58SJerin Jacob /**
795*d82cac58SJerin Jacob  * Stop an ML model for the given device ID.
796*d82cac58SJerin Jacob  *
797*d82cac58SJerin Jacob  * Model stop would disable the ML model to be used for inference jobs.
798*d82cac58SJerin Jacob  * All inference jobs must have been completed before model stop is attempted.
799*d82cac58SJerin Jacob 
800*d82cac58SJerin Jacob  * @param[in] dev_id
801*d82cac58SJerin Jacob  *   The identifier of the device.
802*d82cac58SJerin Jacob  * @param[in] model_id
803*d82cac58SJerin Jacob  *   Identifier of the model to be stopped.
804*d82cac58SJerin Jacob  *
805*d82cac58SJerin Jacob  * @return
806*d82cac58SJerin Jacob  *   - 0: Success, Model unloaded.
807*d82cac58SJerin Jacob  *   - < 0: Failure, Error code of the model stop driver function.
808*d82cac58SJerin Jacob  */
809*d82cac58SJerin Jacob __rte_experimental
810*d82cac58SJerin Jacob int
811*d82cac58SJerin Jacob rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
812*d82cac58SJerin Jacob 
813*d82cac58SJerin Jacob /**
814*d82cac58SJerin Jacob  * Input and output data types. ML models can operate on reduced precision
815*d82cac58SJerin Jacob  * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
816*d82cac58SJerin Jacob  * This enum is used to represent the lower precision integer and floating point types used
817*d82cac58SJerin Jacob  * by ML models.
818*d82cac58SJerin Jacob  */
819*d82cac58SJerin Jacob enum rte_ml_io_type {
820*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_UNKNOWN = 0,
821*d82cac58SJerin Jacob 	/**< Invalid or unknown type */
822*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_INT8,
823*d82cac58SJerin Jacob 	/**< 8-bit integer */
824*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_UINT8,
825*d82cac58SJerin Jacob 	/**< 8-bit unsigned integer */
826*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_INT16,
827*d82cac58SJerin Jacob 	/**< 16-bit integer */
828*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_UINT16,
829*d82cac58SJerin Jacob 	/**< 16-bit unsigned integer */
830*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_INT32,
831*d82cac58SJerin Jacob 	/**< 32-bit integer */
832*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_UINT32,
833*d82cac58SJerin Jacob 	/**< 32-bit unsigned integer */
834*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_FP8,
835*d82cac58SJerin Jacob 	/**< 8-bit floating point number */
836*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_FP16,
837*d82cac58SJerin Jacob 	/**< IEEE 754 16-bit floating point number */
838*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_FP32,
839*d82cac58SJerin Jacob 	/**< IEEE 754 32-bit floating point number */
840*d82cac58SJerin Jacob 	RTE_ML_IO_TYPE_BFLOAT16
841*d82cac58SJerin Jacob 	/**< 16-bit brain floating point number. */
842*d82cac58SJerin Jacob };
843*d82cac58SJerin Jacob 
844*d82cac58SJerin Jacob /**
845*d82cac58SJerin Jacob  * Input and output format. This is used to represent the encoding type of multi-dimensional
846*d82cac58SJerin Jacob  * used by ML models.
847*d82cac58SJerin Jacob  */
848*d82cac58SJerin Jacob enum rte_ml_io_format {
849*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_NCHW = 1,
850*d82cac58SJerin Jacob 	/**< Batch size (N) x channels (C) x height (H) x width (W) */
851*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_NHWC,
852*d82cac58SJerin Jacob 	/**< Batch size (N) x height (H) x width (W) x channels (C) */
853*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_CHWN,
854*d82cac58SJerin Jacob 	/**< Channels (C) x height (H) x width (W) x batch size (N) */
855*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_3D,
856*d82cac58SJerin Jacob 	/**< Format to represent a 3 dimensional data */
857*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_2D,
858*d82cac58SJerin Jacob 	/**< Format to represent matrix data */
859*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_1D,
860*d82cac58SJerin Jacob 	/**< Format to represent vector data */
861*d82cac58SJerin Jacob 	RTE_ML_IO_FORMAT_SCALAR,
862*d82cac58SJerin Jacob 	/**< Format to represent scalar data */
863*d82cac58SJerin Jacob };
864*d82cac58SJerin Jacob 
865*d82cac58SJerin Jacob /**
866*d82cac58SJerin Jacob  * Input and output shape. This structure represents the encoding format and dimensions
867*d82cac58SJerin Jacob  * of the tensor or vector.
868*d82cac58SJerin Jacob  *
869*d82cac58SJerin Jacob  * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used
870*d82cac58SJerin Jacob  * for the data would depend on the format. Unused dimensions to be set to 1.
871*d82cac58SJerin Jacob  */
872*d82cac58SJerin Jacob struct rte_ml_io_shape {
873*d82cac58SJerin Jacob 	enum rte_ml_io_format format;
874*d82cac58SJerin Jacob 	/**< Format of the data */
875*d82cac58SJerin Jacob 	uint32_t w;
876*d82cac58SJerin Jacob 	/**< First dimension */
877*d82cac58SJerin Jacob 	uint32_t x;
878*d82cac58SJerin Jacob 	/**< Second dimension */
879*d82cac58SJerin Jacob 	uint32_t y;
880*d82cac58SJerin Jacob 	/**< Third dimension */
881*d82cac58SJerin Jacob 	uint32_t z;
882*d82cac58SJerin Jacob 	/**< Fourth dimension */
883*d82cac58SJerin Jacob };
884*d82cac58SJerin Jacob 
885*d82cac58SJerin Jacob /** Input and output data information structure
886*d82cac58SJerin Jacob  *
887*d82cac58SJerin Jacob  * Specifies the type and shape of input and output data.
888*d82cac58SJerin Jacob  */
889*d82cac58SJerin Jacob struct rte_ml_io_info {
890*d82cac58SJerin Jacob 	char name[RTE_ML_STR_MAX];
891*d82cac58SJerin Jacob 	/**< Name of data */
892*d82cac58SJerin Jacob 	struct rte_ml_io_shape shape;
893*d82cac58SJerin Jacob 	/**< Shape of data */
894*d82cac58SJerin Jacob 	enum rte_ml_io_type qtype;
895*d82cac58SJerin Jacob 	/**< Type of quantized data */
896*d82cac58SJerin Jacob 	enum rte_ml_io_type dtype;
897*d82cac58SJerin Jacob 	/**< Type of de-quantized data */
898*d82cac58SJerin Jacob };
899*d82cac58SJerin Jacob 
900*d82cac58SJerin Jacob /** Model information structure */
901*d82cac58SJerin Jacob struct rte_ml_model_info {
902*d82cac58SJerin Jacob 	char name[RTE_ML_STR_MAX];
903*d82cac58SJerin Jacob 	/**< Model name. */
904*d82cac58SJerin Jacob 	char version[RTE_ML_STR_MAX];
905*d82cac58SJerin Jacob 	/**< Model version */
906*d82cac58SJerin Jacob 	uint16_t model_id;
907*d82cac58SJerin Jacob 	/**< Model ID */
908*d82cac58SJerin Jacob 	uint16_t device_id;
909*d82cac58SJerin Jacob 	/**< Device ID */
910*d82cac58SJerin Jacob 	uint16_t batch_size;
911*d82cac58SJerin Jacob 	/**< Maximum number of batches that the model can process simultaneously */
912*d82cac58SJerin Jacob 	uint32_t nb_inputs;
913*d82cac58SJerin Jacob 	/**< Number of inputs */
914*d82cac58SJerin Jacob 	const struct rte_ml_io_info *input_info;
915*d82cac58SJerin Jacob 	/**< Input info array. Array size is equal to nb_inputs */
916*d82cac58SJerin Jacob 	uint32_t nb_outputs;
917*d82cac58SJerin Jacob 	/**< Number of outputs */
918*d82cac58SJerin Jacob 	const struct rte_ml_io_info *output_info;
919*d82cac58SJerin Jacob 	/**< Output info array. Array size is equal to nb_output */
920*d82cac58SJerin Jacob 	uint64_t wb_size;
921*d82cac58SJerin Jacob 	/**< Size of model weights and bias */
922*d82cac58SJerin Jacob };
923*d82cac58SJerin Jacob 
924*d82cac58SJerin Jacob /**
925*d82cac58SJerin Jacob  * Get ML model information.
926*d82cac58SJerin Jacob  *
927*d82cac58SJerin Jacob  * @param[in] dev_id
928*d82cac58SJerin Jacob  *   The identifier of the device.
929*d82cac58SJerin Jacob  * @param[in] model_id
930*d82cac58SJerin Jacob  *   Identifier for the model created
931*d82cac58SJerin Jacob  * @param[out] model_info
932*d82cac58SJerin Jacob  *   Pointer to a model info structure
933*d82cac58SJerin Jacob  *
934*d82cac58SJerin Jacob  * @return
935*d82cac58SJerin Jacob  *   - Returns 0 on success
936*d82cac58SJerin Jacob  *   - Returns negative value on failure
937*d82cac58SJerin Jacob  */
938*d82cac58SJerin Jacob __rte_experimental
939*d82cac58SJerin Jacob int
940*d82cac58SJerin Jacob rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
941*d82cac58SJerin Jacob 
942*d82cac58SJerin Jacob /**
943*d82cac58SJerin Jacob  * Update the model parameters without unloading model.
944*d82cac58SJerin Jacob  *
945*d82cac58SJerin Jacob  * Update model parameters such as weights and bias without unloading the model.
946*d82cac58SJerin Jacob  * rte_ml_model_stop() must be called before invoking this API.
947*d82cac58SJerin Jacob  *
948*d82cac58SJerin Jacob  * @param[in] dev_id
949*d82cac58SJerin Jacob  *   The identifier of the device.
950*d82cac58SJerin Jacob  * @param[in] model_id
951*d82cac58SJerin Jacob  *   Identifier for the model created
952*d82cac58SJerin Jacob  * @param[in] buffer
953*d82cac58SJerin Jacob  *   Pointer to the model weights and bias buffer.
954*d82cac58SJerin Jacob  * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
955*d82cac58SJerin Jacob  *
956*d82cac58SJerin Jacob  * @return
957*d82cac58SJerin Jacob  *   - Returns 0 on success
958*d82cac58SJerin Jacob  *   - Returns negative value on failure
959*d82cac58SJerin Jacob  */
960*d82cac58SJerin Jacob __rte_experimental
961*d82cac58SJerin Jacob int
962*d82cac58SJerin Jacob rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
963*d82cac58SJerin Jacob 
964*d82cac58SJerin Jacob /* IO operations */
965*d82cac58SJerin Jacob 
966*d82cac58SJerin Jacob /**
967*d82cac58SJerin Jacob  * Get size of quantized and dequantized input buffers.
968*d82cac58SJerin Jacob  *
969*d82cac58SJerin Jacob  * Calculate the size of buffers required for quantized and dequantized input data.
970*d82cac58SJerin Jacob  * This API would return the buffer sizes for the number of batches provided and would
971*d82cac58SJerin Jacob  * consider the alignment requirements as per the PMD. Input sizes computed by this API can
972*d82cac58SJerin Jacob  * be used by the application to allocate buffers.
973*d82cac58SJerin Jacob  *
974*d82cac58SJerin Jacob  * @param[in] dev_id
975*d82cac58SJerin Jacob  *   The identifier of the device.
976*d82cac58SJerin Jacob  * @param[in] model_id
977*d82cac58SJerin Jacob  *   Identifier for the model created
978*d82cac58SJerin Jacob  * @param[in] nb_batches
979*d82cac58SJerin Jacob  *   Number of batches of input to be processed in a single inference job
980*d82cac58SJerin Jacob  * @param[out] input_qsize
981*d82cac58SJerin Jacob  *   Quantized input size pointer.
982*d82cac58SJerin Jacob  * NULL value is allowed, in which case input_qsize is not calculated by the driver.
983*d82cac58SJerin Jacob  * @param[out] input_dsize
984*d82cac58SJerin Jacob  *   Dequantized input size pointer.
985*d82cac58SJerin Jacob  * NULL value is allowed, in which case input_dsize is not calculated by the driver.
986*d82cac58SJerin Jacob  *
987*d82cac58SJerin Jacob  * @return
988*d82cac58SJerin Jacob  *   - Returns 0 on success
989*d82cac58SJerin Jacob  *   - Returns negative value on failure
990*d82cac58SJerin Jacob  */
991*d82cac58SJerin Jacob __rte_experimental
992*d82cac58SJerin Jacob int
993*d82cac58SJerin Jacob rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
994*d82cac58SJerin Jacob 			 uint64_t *input_qsize, uint64_t *input_dsize);
995*d82cac58SJerin Jacob 
996*d82cac58SJerin Jacob /**
997*d82cac58SJerin Jacob  * Get size of quantized and dequantized output buffers.
998*d82cac58SJerin Jacob  *
999*d82cac58SJerin Jacob  * Calculate the size of buffers required for quantized and dequantized output data.
1000*d82cac58SJerin Jacob  * This API would return the buffer sizes for the number of batches provided and would consider
1001*d82cac58SJerin Jacob  * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the
1002*d82cac58SJerin Jacob  * application to allocate buffers.
1003*d82cac58SJerin Jacob  *
1004*d82cac58SJerin Jacob  * @param[in] dev_id
1005*d82cac58SJerin Jacob  *   The identifier of the device.
1006*d82cac58SJerin Jacob  * @param[in] model_id
1007*d82cac58SJerin Jacob  *   Identifier for the model created
1008*d82cac58SJerin Jacob  * @param[in] nb_batches
1009*d82cac58SJerin Jacob  *   Number of batches of input to be processed in a single inference job
1010*d82cac58SJerin Jacob  * @param[out] output_qsize
1011*d82cac58SJerin Jacob  *   Quantized output size pointer.
1012*d82cac58SJerin Jacob  * NULL value is allowed, in which case output_qsize is not calculated by the driver.
1013*d82cac58SJerin Jacob  * @param[out] output_dsize
1014*d82cac58SJerin Jacob  *   Dequantized output size pointer.
1015*d82cac58SJerin Jacob  * NULL value is allowed, in which case output_dsize is not calculated by the driver.
1016*d82cac58SJerin Jacob  *
1017*d82cac58SJerin Jacob  * @return
1018*d82cac58SJerin Jacob  *   - Returns 0 on success
1019*d82cac58SJerin Jacob  *   - Returns negative value on failure
1020*d82cac58SJerin Jacob  */
1021*d82cac58SJerin Jacob __rte_experimental
1022*d82cac58SJerin Jacob int
1023*d82cac58SJerin Jacob rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
1024*d82cac58SJerin Jacob 			  uint64_t *output_qsize, uint64_t *output_dsize);
1025*d82cac58SJerin Jacob 
1026*d82cac58SJerin Jacob /**
1027*d82cac58SJerin Jacob  * Quantize input data.
1028*d82cac58SJerin Jacob  *
1029*d82cac58SJerin Jacob  * Quantization converts data from a higher precision types to a lower precision types to improve
1030*d82cac58SJerin Jacob  * the throughput and efficiency of the model execution with minimal loss of accuracy.
1031*d82cac58SJerin Jacob  * Types of dequantized data and quantized data are specified by the model.
1032*d82cac58SJerin Jacob  *
1033*d82cac58SJerin Jacob  * @param[in] dev_id
1034*d82cac58SJerin Jacob  *   The identifier of the device.
1035*d82cac58SJerin Jacob  * @param[in] model_id
1036*d82cac58SJerin Jacob  *   Identifier for the model
1037*d82cac58SJerin Jacob  * @param[in] nb_batches
1038*d82cac58SJerin Jacob  *   Number of batches in the dequantized input buffer
1039*d82cac58SJerin Jacob  * @param[in] dbuffer
1040*d82cac58SJerin Jacob  *   Address of dequantized input data
1041*d82cac58SJerin Jacob  * @param[in] qbuffer
1042*d82cac58SJerin Jacob  *   Address of quantized input data
1043*d82cac58SJerin Jacob  *
1044*d82cac58SJerin Jacob  * @return
1045*d82cac58SJerin Jacob  *   - Returns 0 on success
1046*d82cac58SJerin Jacob  *   - Returns negative value on failure
1047*d82cac58SJerin Jacob  */
1048*d82cac58SJerin Jacob __rte_experimental
1049*d82cac58SJerin Jacob int
1050*d82cac58SJerin Jacob rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer,
1051*d82cac58SJerin Jacob 		   void *qbuffer);
1052*d82cac58SJerin Jacob 
1053*d82cac58SJerin Jacob /**
1054*d82cac58SJerin Jacob  * Dequantize output data.
1055*d82cac58SJerin Jacob  *
1056*d82cac58SJerin Jacob  * Dequantization converts data from a lower precision type to a higher precision type.
1057*d82cac58SJerin Jacob  * Types of quantized data and dequantized are specified by the model.
1058*d82cac58SJerin Jacob  *
1059*d82cac58SJerin Jacob  * @param[in] dev_id
1060*d82cac58SJerin Jacob  *   The identifier of the device.
1061*d82cac58SJerin Jacob  * @param[in] model_id
1062*d82cac58SJerin Jacob  *   Identifier for the model
1063*d82cac58SJerin Jacob  * @param[in] nb_batches
1064*d82cac58SJerin Jacob  *   Number of batches in the dequantized output buffer
1065*d82cac58SJerin Jacob  * @param[in] qbuffer
1066*d82cac58SJerin Jacob  *   Address of quantized output data
1067*d82cac58SJerin Jacob  * @param[in] dbuffer
1068*d82cac58SJerin Jacob  *   Address of dequantized output data
1069*d82cac58SJerin Jacob  *
1070*d82cac58SJerin Jacob  * @return
1071*d82cac58SJerin Jacob  *   - Returns 0 on success
1072*d82cac58SJerin Jacob  *   - Returns negative value on failure
1073*d82cac58SJerin Jacob  */
1074*d82cac58SJerin Jacob __rte_experimental
1075*d82cac58SJerin Jacob int
1076*d82cac58SJerin Jacob rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer,
1077*d82cac58SJerin Jacob 		     void *dbuffer);
1078*d82cac58SJerin Jacob 
1079*d82cac58SJerin Jacob /* ML op pool operations */
1080*d82cac58SJerin Jacob 
1081*d82cac58SJerin Jacob /**
1082*d82cac58SJerin Jacob  * Create an ML operation pool
1083*d82cac58SJerin Jacob  *
1084*d82cac58SJerin Jacob  * @param name
1085*d82cac58SJerin Jacob  *   ML operations pool name
1086*d82cac58SJerin Jacob  * @param nb_elts
1087*d82cac58SJerin Jacob  *   Number of elements in pool
1088*d82cac58SJerin Jacob  * @param cache_size
1089*d82cac58SJerin Jacob  *   Number of elements to cache on lcore, see
1090*d82cac58SJerin Jacob  *   *rte_mempool_create* for further details about cache size
1091*d82cac58SJerin Jacob  * @param user_size
1092*d82cac58SJerin Jacob  *   Size of private data to allocate for user with each operation
1093*d82cac58SJerin Jacob  * @param socket_id
1094*d82cac58SJerin Jacob  *   Socket to identifier allocate memory on
1095*d82cac58SJerin Jacob  * @return
1096*d82cac58SJerin Jacob  *  - On success pointer to mempool
1097*d82cac58SJerin Jacob  *  - On failure NULL
1098*d82cac58SJerin Jacob  */
1099*d82cac58SJerin Jacob __rte_experimental
1100*d82cac58SJerin Jacob struct rte_mempool *
1101*d82cac58SJerin Jacob rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
1102*d82cac58SJerin Jacob 		      uint16_t user_size, int socket_id);
1103*d82cac58SJerin Jacob 
1104*d82cac58SJerin Jacob /**
1105*d82cac58SJerin Jacob  * Free an ML operation pool
1106*d82cac58SJerin Jacob  *
1107*d82cac58SJerin Jacob  * @param mempool
1108*d82cac58SJerin Jacob  *   A pointer to the mempool structure.
1109*d82cac58SJerin Jacob  *   If NULL then, the function does nothing.
1110*d82cac58SJerin Jacob  */
1111*d82cac58SJerin Jacob __rte_experimental
1112*d82cac58SJerin Jacob void
1113*d82cac58SJerin Jacob rte_ml_op_pool_free(struct rte_mempool *mempool);
1114*d82cac58SJerin Jacob 
1115*d82cac58SJerin Jacob #ifdef __cplusplus
1116*d82cac58SJerin Jacob }
1117*d82cac58SJerin Jacob #endif
1118*d82cac58SJerin Jacob 
1119*d82cac58SJerin Jacob #endif /* RTE_MLDEV_H */
1120