xref: /dpdk/lib/mldev/rte_mldev.h (revision 3c4898ef762eeb2578b9ae3d7f6e3a0e5cbca8c8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_H
6 #define RTE_MLDEV_H
7 
8 /**
9  * @file rte_mldev.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * ML (Machine Learning) device API.
16  *
17  * The ML framework is built on the following model:
18  *
19  *
20  *     +-----------------+               rte_ml_[en|de]queue_burst()
21  *     |                 |                          |
22  *     |     Machine     o------+     +--------+    |
23  *     |     Learning    |      |     | queue  |    |    +------+
24  *     |     Inference   o------+-----o        |<===o===>|Core 0|
25  *     |     Engine      |      |     | pair 0 |         +------+
26  *     |                 o----+ |     +--------+
27  *     |                 |    | |
28  *     +-----------------+    | |     +--------+
29  *              ^             | |     | queue  |         +------+
30  *              |             | +-----o        |<=======>|Core 1|
31  *              |             |       | pair 1 |         +------+
32  *              |             |       +--------+
33  *     +--------+--------+    |
34  *     | +-------------+ |    |       +--------+
35  *     | |   Model 0   | |    |       | queue  |         +------+
36  *     | +-------------+ |    +-------o        |<=======>|Core N|
37  *     | +-------------+ |            | pair N |         +------+
38  *     | |   Model 1   | |            +--------+
39  *     | +-------------+ |
40  *     | +-------------+ |<------> rte_ml_model_load()
41  *     | |   Model ..  | |-------> rte_ml_model_info_get()
42  *     | +-------------+ |<------- rte_ml_model_start()
43  *     | +-------------+ |<------- rte_ml_model_stop()
44  *     | |   Model N   | |<------- rte_ml_model_params_update()
45  *     | +-------------+ |<------- rte_ml_model_unload()
46  *     +-----------------+
47  *
48  * ML Device: A hardware or software-based implementation of ML device API for
49  * running inferences using a pre-trained ML model.
50  *
51  * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
52  * procedure/algorithm and data/pattern required to make predictions on live data.
53  * Once the model is created and trained outside of the DPDK scope, the model can be loaded
54  * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
55  * The rte_ml_model_params_update() can be used to update the model parameters such as weight
56  * and bias without unloading the model using rte_ml_model_unload().
57  *
58  * ML Inference: ML inference is the process of feeding data to the model via
59  * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
60  * outputs/predictions from the started model.
61  *
62  * In all functions of the ML device API, the ML device is designated by an
63  * integer >= 0 named as device identifier *dev_id*.
64  *
65  * The functions exported by the ML device API to setup a device designated by
66  * its device identifier must be invoked in the following order:
67  *
68  *      - rte_ml_dev_configure()
69  *      - rte_ml_dev_queue_pair_setup()
70  *      - rte_ml_dev_start()
71  *
72  * A model is required to run the inference operations with the user specified inputs.
73  * Application needs to invoke the ML model API in the following order before queueing
74  * inference jobs.
75  *
76  *      - rte_ml_model_load()
77  *      - rte_ml_model_start()
78  *
79  * A model can be loaded on a device only after the device has been configured and can be
80  * started or stopped only after a device has been started.
81  *
82  * The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
83  * The information would include the shape and type of input and output required for the inference.
84  *
85  * Data quantization and dequantization is one of the main aspects in ML domain. This involves
86  * conversion of input data from a higher precision to a lower precision data type and vice-versa
87  * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
88  * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
89  * and output buffers holding data for multiple batches.
90  *
91  * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
92  * size of quantized and de-quantized multi-batch input and output buffers.
93  *
94  * User can optionally update the model parameters with rte_ml_model_params_update() after
95  * invoking rte_ml_model_stop() API on a given model ID.
96  *
97  * The application can invoke, in any order, the functions exported by the ML API to enqueue
98  * inference jobs and dequeue inference response.
99  *
100  * If the application wants to change the device configuration (i.e., call
101  * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
102  * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
103  * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
104  * for the given model. The application does not need to call rte_ml_dev_stop() API for
105  * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
106  *
107  * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
108  * start state after invoking rte_ml_model_start() API, then the application can call
109  * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
110  *
111  * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
112  *
113  * Typical application utilisation of the ML API will follow the following
114  * programming flow.
115  *
116  * - rte_ml_dev_configure()
117  * - rte_ml_dev_queue_pair_setup()
118  * - rte_ml_model_load()
119  * - rte_ml_dev_start()
120  * - rte_ml_model_start()
121  * - rte_ml_model_info_get()
122  * - rte_ml_enqueue_burst()
123  * - rte_ml_dequeue_burst()
124  * - rte_ml_model_stop()
125  * - rte_ml_model_unload()
126  * - rte_ml_dev_stop()
127  * - rte_ml_dev_close()
128  *
129  * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
130  * are lock-free functions which assume to not be invoked in parallel on different logical cores
131  * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
132  * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
133  * can be invoked in parallel by different logical core on different queue pair.
134  * It is the responsibility of the user application to enforce this rule.
135  */
136 
137 #include <rte_common.h>
138 #include <rte_log.h>
139 #include <rte_mempool.h>
140 
141 #ifdef __cplusplus
142 extern "C" {
143 #endif
144 
145 /* Logging Macro */
146 extern int rte_ml_dev_logtype;
147 
148 #define RTE_MLDEV_LOG(level, fmt, args...)                                                         \
149 	rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args)
150 
151 #define RTE_ML_STR_MAX 128
152 /**< Maximum length of name string */
153 
154 #define RTE_MLDEV_DEFAULT_MAX 32
155 /** Maximum number of devices if rte_ml_dev_init() is not called. */
156 
157 /* Device operations */
158 
159 /**
160  * Initialize the device array before probing devices. If not called, the first device probed would
161  * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
162  *
163  * @param dev_max
164  *   Maximum number of devices.
165  *
166  * @return
167  *   0 on success, -rte_errno otherwise:
168  *   - ENOMEM if out of memory
169  *   - EINVAL if 0 size
170  *   - EBUSY if already initialized
171  */
172 __rte_experimental
173 int
174 rte_ml_dev_init(size_t dev_max);
175 
176 /**
177  * Get the total number of ML devices that have been successfully initialised.
178  *
179  * @return
180  *   - The total number of usable ML devices.
181  */
182 __rte_experimental
183 uint16_t
184 rte_ml_dev_count(void);
185 
186 /**
187  * Check if the device is in ready state.
188  *
189  * @param dev_id
190  *   The identifier of the device.
191  *
192  * @return
193  *   - 0 if device state is not in ready state.
194  *   - 1 if device state is ready state.
195  */
196 __rte_experimental
197 int
198 rte_ml_dev_is_valid_dev(int16_t dev_id);
199 
200 /**
201  * Return the NUMA socket to which a device is connected.
202  *
203  * @param dev_id
204  *   The identifier of the device.
205  *
206  * @return
207  *   - The NUMA socket id to which the device is connected
208  *   - 0 If the socket could not be determined.
209  *   - -EINVAL: if the dev_id value is not valid.
210  */
211 __rte_experimental
212 int
213 rte_ml_dev_socket_id(int16_t dev_id);
214 
215 /**  ML device information */
216 struct rte_ml_dev_info {
217 	const char *driver_name;
218 	/**< Driver name */
219 	uint16_t max_models;
220 	/**< Maximum number of models supported by the device.
221 	 * @see struct rte_ml_dev_config::nb_models
222 	 */
223 	uint16_t max_queue_pairs;
224 	/**< Maximum number of queues pairs supported by the device.
225 	 * @see struct rte_ml_dev_config::nb_queue_pairs
226 	 */
227 	uint16_t max_desc;
228 	/**< Maximum allowed number of descriptors for queue pair by the device.
229 	 * @see struct rte_ml_dev_qp_conf::nb_desc
230 	 */
231 	uint16_t max_io;
232 	/**< Maximum number of inputs/outputs supported per model. */
233 	uint16_t max_segments;
234 	/**< Maximum number of scatter-gather entries supported by the device.
235 	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
236 	 */
237 	uint16_t align_size;
238 	/**< Alignment size of IO buffers used by the device. */
239 };
240 
241 /**
242  * Retrieve the information of the device.
243  *
244  * @param dev_id
245  *   The identifier of the device.
246  * @param dev_info
247  *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
248  *
249  * @return
250  *   - 0: Success, driver updates the information of the ML device
251  *   - < 0: Error code returned by the driver info get function.
252  */
253 __rte_experimental
254 int
255 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
256 
257 /** ML device configuration structure */
258 struct rte_ml_dev_config {
259 	int socket_id;
260 	/**< Socket to allocate resources on. */
261 	uint16_t nb_models;
262 	/**< Number of models to be loaded on the device.
263 	 * This value cannot exceed the max_models which is previously provided in
264 	 * struct rte_ml_dev_info::max_models
265 	 */
266 	uint16_t nb_queue_pairs;
267 	/**< Number of queue pairs to configure on this device.
268 	 * This value cannot exceed the max_models which is previously provided in
269 	 * struct rte_ml_dev_info::max_queue_pairs
270 	 */
271 };
272 
273 /**
274  * Configure an ML device.
275  *
276  * This function must be invoked first before any other function in the API.
277  *
278  * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
279  * rte_ml_dev_close() is called.
280  *
281  * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
282  * this ML device.
283  *
284  * @param dev_id
285  *   The identifier of the device to configure.
286  * @param config
287  *   The ML device configuration structure.
288  *
289  * @return
290  *   - 0: Success, device configured.
291  *   - < 0: Error code returned by the driver configuration function.
292  */
293 __rte_experimental
294 int
295 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
296 
297 /* Forward declaration */
298 struct rte_ml_op;
299 
300 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
301 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
302 
303 /** ML device queue pair configuration structure. */
304 struct rte_ml_dev_qp_conf {
305 	uint32_t nb_desc;
306 	/**< Number of descriptors per queue pair.
307 	 * This value cannot exceed the max_desc which previously provided in
308 	 * struct rte_ml_dev_info:max_desc
309 	 */
310 	rte_ml_dev_stop_flush_t cb;
311 	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
312 	 * Value NULL is allowed, in which case callback will not be invoked.
313 	 * This function can be used to properly dispose of outstanding ML ops from all
314 	 * queue pairs, for example ops containing  memory pointers.
315 	 * @see rte_ml_dev_stop()
316 	 */
317 };
318 
319 /**
320  * Set up a queue pair for a device. This should only be called when the device is stopped.
321  *
322  * @param dev_id
323  *   The identifier of the device.
324  * @param queue_pair_id
325  *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
326  * previously supplied to rte_ml_dev_configure().
327  * @param qp_conf
328  *   The pointer to the configuration data to be used for the queue pair.
329  * @param socket_id
330  *   The *socket_id* argument is the socket identifier in case of NUMA.
331  * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
332  * for the queue pair.
333  *
334  * @return
335  *   - 0: Success, queue pair correctly set up.
336  *   - < 0: Queue pair configuration failed.
337  */
338 __rte_experimental
339 int
340 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
341 			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
342 
343 /**
344  * Start an ML device.
345  *
346  * The device start step consists of setting the configured features and enabling the ML device
347  * to accept inference jobs.
348  *
349  * @param dev_id
350  *   The identifier of the device.
351  *
352  * @return
353  *   - 0: Success, device started.
354  *   - <0: Error code of the driver device start function.
355  */
356 __rte_experimental
357 int
358 rte_ml_dev_start(int16_t dev_id);
359 
360 /**
361  * Stop an ML device. A stopped device cannot accept inference jobs.
362  * The device can be restarted with a call to rte_ml_dev_start().
363  *
364  * @param dev_id
365  *   The identifier of the device.
366  *
367  * @return
368  *   - 0: Success, device stopped.
369  *   - <0: Error code of the driver device stop function.
370  */
371 __rte_experimental
372 int
373 rte_ml_dev_stop(int16_t dev_id);
374 
375 /**
376  * Close an ML device. The device cannot be restarted!
377  *
378  * @param dev_id
379  *   The identifier of the device.
380  *
381  * @return
382  *  - 0 on successfully closing device.
383  *  - <0 on failure to close device.
384  */
385 __rte_experimental
386 int
387 rte_ml_dev_close(int16_t dev_id);
388 
389 /** Status of ML operation */
390 enum rte_ml_op_status {
391 	RTE_ML_OP_STATUS_SUCCESS = 0,
392 	/**< Operation completed successfully */
393 	RTE_ML_OP_STATUS_NOT_PROCESSED,
394 	/**< Operation has not yet been processed by the device. */
395 	RTE_ML_OP_STATUS_ERROR,
396 	/**< Operation completed with error.
397 	 * Application can invoke rte_ml_op_error_get() to get PMD specific
398 	 * error code if needed.
399 	 */
400 };
401 
402 /** ML operation's input and output buffer representation as scatter gather list
403  */
404 struct rte_ml_buff_seg {
405 	rte_iova_t iova_addr;
406 	/**< IOVA address of segment buffer. */
407 	void *addr;
408 	/**< Virtual address of segment buffer. */
409 	uint32_t length;
410 	/**< Segment length. */
411 	uint32_t reserved;
412 	/**< Reserved for future use. */
413 	struct rte_ml_buff_seg *next;
414 	/**< Points to next segment. Value NULL represents the last segment. */
415 };
416 
417 /**
418  * ML Operation.
419  *
420  * This structure contains data related to performing an ML operation on the buffers using
421  * the model specified through model_id.
422  */
423 struct rte_ml_op {
424 	uint16_t model_id;
425 	/**< Model ID to be used for the operation. */
426 	uint16_t nb_batches;
427 	/**< Number of batches. Minimum value must be one.
428 	 * Input buffer must hold inference data for each batch as contiguous.
429 	 */
430 	uint32_t reserved;
431 	/**< Reserved for future use. */
432 	struct rte_mempool *mempool;
433 	/**< Pool from which operation is allocated. */
434 	struct rte_ml_buff_seg **input;
435 	/**< Array of buffer segments to hold the inference input data.
436 	 *
437 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
438 	 * the array is 1.
439 	 *
440 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
441 	 * the array is rte_ml_model_info::nb_inputs.
442 	 *
443 	 * @see struct rte_ml_dev_info::io_layout
444 	 */
445 	struct rte_ml_buff_seg **output;
446 	/**< Array of buffer segments to hold the inference output data.
447 	 *
448 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
449 	 * the array is 1.
450 	 *
451 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
452 	 * the array is rte_ml_model_info::nb_outputs.
453 	 *
454 	 * @see struct rte_ml_dev_info::io_layout
455 	 */
456 	union {
457 		uint64_t user_u64;
458 		/**< User data as uint64_t.*/
459 		void *user_ptr;
460 		/**< User data as void*.*/
461 	};
462 	enum rte_ml_op_status status;
463 	/**< Operation status. */
464 	uint64_t impl_opaque;
465 	/**< Implementation specific opaque value.
466 	 * An implementation may use this field to hold
467 	 * implementation specific value to share between
468 	 * dequeue and enqueue operation.
469 	 * The application should not modify this field.
470 	 */
471 } __rte_cache_aligned;
472 
473 /* Enqueue/Dequeue operations */
474 
475 /**
476  * Enqueue a burst of ML inferences for processing on an ML device.
477  *
478  * The rte_ml_enqueue_burst() function is invoked to place ML inference
479  * operations on the queue *qp_id* of the device designated by its *dev_id*.
480  *
481  * The *nb_ops* parameter is the number of inferences to process which are
482  * supplied in the *ops* array of *rte_ml_op* structures.
483  *
484  * The rte_ml_enqueue_burst() function returns the number of inferences it
485  * actually enqueued for processing. A return value equal to *nb_ops* means that
486  * all packets have been enqueued.
487  *
488  * @param dev_id
489  *   The identifier of the device.
490  * @param qp_id
491  *   The index of the queue pair which inferences are to be enqueued for processing.
492  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
493  * *rte_ml_dev_configure*.
494  * @param ops
495  *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
496  * ML inferences to be processed.
497  * @param nb_ops
498  *   The number of operations to process.
499  *
500  * @return
501  *   The number of inference operations actually enqueued to the ML device.
502  * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
503  * is full or if invalid parameters are specified in a *rte_ml_op*.
504  */
505 __rte_experimental
506 uint16_t
507 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
508 
509 /**
510  * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
511  * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
512  * in the *ops* array.
513  *
514  * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
515  * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
516  *
517  * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
518  * and this is likely to signify that other processed operations remain in the devices output queue.
519  * Application implementing a "retrieve as many processed operations as possible" policy can check
520  * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
521  * *nb_ops* is returned.
522  *
523  * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
524  * the corresponding overhead.
525  *
526  * @param dev_id
527  *   The identifier of the device.
528  * @param qp_id
529  *   The index of the queue pair from which to retrieve processed packets.
530  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
531  * rte_ml_dev_configure().
532  * @param ops
533  *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
534  * store *nb_ops* pointers in it.
535  * @param nb_ops
536  *   The maximum number of inferences to dequeue.
537  *
538  * @return
539  *   The number of operations actually dequeued, which is the number of pointers
540  * to *rte_ml_op* structures effectively supplied to the *ops* array.
541  */
542 __rte_experimental
543 uint16_t
544 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
545 
546 /**
547  * Verbose error structure definition.
548  */
549 struct rte_ml_op_error {
550 	char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
551 	uint64_t errcode;	      /**< Vendor specific error code. */
552 };
553 
554 /**
555  * Get PMD specific error information for an ML op.
556  *
557  * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
558  * This API allows to get PMD specific error details.
559  *
560  * @param[in] dev_id
561  *   Device identifier
562  * @param[in] op
563  *   Handle of ML operation
564  * @param[in] error
565  *   Address of structure rte_ml_op_error to be filled
566  *
567  * @return
568  *   - Returns 0 on success
569  *   - Returns negative value on failure
570  */
571 __rte_experimental
572 int
573 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
574 
575 /* Statistics operations */
576 
577 /** Device statistics. */
578 struct rte_ml_dev_stats {
579 	uint64_t enqueued_count;
580 	/**< Count of all operations enqueued */
581 	uint64_t dequeued_count;
582 	/**< Count of all operations dequeued */
583 	uint64_t enqueue_err_count;
584 	/**< Total error count on operations enqueued */
585 	uint64_t dequeue_err_count;
586 	/**< Total error count on operations dequeued */
587 };
588 
589 /**
590  * Retrieve the general I/O statistics of a device.
591  *
592  * @param dev_id
593  *   The identifier of the device.
594  * @param stats
595  *   Pointer to structure to where statistics will be copied.
596  * On error, this location may or may not have been modified.
597  * @return
598  *   - 0 on success
599  *   - -EINVAL: If invalid parameter pointer is provided.
600  */
601 __rte_experimental
602 int
603 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
604 
605 /**
606  * Reset the statistics of a device.
607  *
608  * @param dev_id
609  *   The identifier of the device.
610  */
611 __rte_experimental
612 void
613 rte_ml_dev_stats_reset(int16_t dev_id);
614 
615 /**
616  * Selects the component of the mldev to retrieve statistics from.
617  */
618 enum rte_ml_dev_xstats_mode {
619 	RTE_ML_DEV_XSTATS_DEVICE,
620 	/**< Device xstats */
621 	RTE_ML_DEV_XSTATS_MODEL,
622 	/**< Model xstats */
623 };
624 
625 /**
626  * A name-key lookup element for extended statistics.
627  *
628  * This structure is used to map between names and ID numbers for extended ML device statistics.
629  */
630 struct rte_ml_dev_xstats_map {
631 	uint16_t id;
632 	/**< xstat identifier */
633 	char name[RTE_ML_STR_MAX];
634 	/**< xstat name */
635 };
636 
637 /**
638  * Retrieve names of extended statistics of an ML device.
639  *
640  * @param dev_id
641  *   The identifier of the device.
642  * @param mode
643  *   Mode of statistics to retrieve. Choices include the device statistics and model statistics.
644  * @param model_id
645  *   Used to specify the model number in model mode, and is ignored in device mode.
646  * @param[out] xstats_map
647  *   Block of memory to insert names and ids into. Must be at least size in capacity. If set to
648  * NULL, function returns required capacity. The id values returned can be passed to
649  * *rte_ml_dev_xstats_get* to select statistics.
650  * @param size
651  *   Capacity of xstats_names (number of xstats_map).
652  * @return
653  *   - Positive value lower or equal to size: success. The return value is the number of entries
654  * filled in the stats table.
655  *   - Positive value higher than size: error, the given statistics table is too small. The return
656  * value corresponds to the size that should be given to succeed. The entries in the table are not
657  * valid and shall not be used by the caller.
658  *   - Negative value on error:
659  *        -ENODEV for invalid *dev_id*.
660  *        -EINVAL for invalid mode, model parameters.
661  *        -ENOTSUP if the device doesn't support this function.
662  */
663 __rte_experimental
664 int
665 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
666 			    struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
667 
668 /**
669  * Retrieve the value of a single stat by requesting it by name.
670  *
671  * @param dev_id
672  *   The identifier of the device.
673  * @param name
674  *   Name of stat name to retrieve.
675  * @param[out] stat_id
676  *   If non-NULL, the numerical id of the stat will be returned, so that further requests for the
677  * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a
678  * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1.
679  * @param[out] value
680  *   Value of the stat to be returned.
681  * @return
682  *   - Zero: No error.
683  *   - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
684  */
685 __rte_experimental
686 int
687 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
688 
689 /**
690  * Retrieve extended statistics of an ML device.
691  *
692  * @param dev_id
693  *   The identifier of the device.
694  * @param mode
695  *  Mode of statistics to retrieve. Choices include the device statistics and model statistics.
696  * @param model_id
697  *   Used to specify the model id in model mode, and is ignored in device mode.
698  * @param stat_ids
699  *   ID numbers of the stats to get. The ids can be got from the stat position in the stat list from
700  * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
701  * @param[out] values
702  *   Values for each stats request by ID.
703  * @param nb_ids
704  *   Number of stats requested.
705  * @return
706  *   - Positive value: number of stat entries filled into the values array
707  *   - Negative value on error:
708  *        -ENODEV for invalid *dev_id*.
709  *        -EINVAL for invalid mode, model id or stat id parameters.
710  *        -ENOTSUP if the device doesn't support this function.
711  */
712 __rte_experimental
713 int
714 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
715 		      const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids);
716 
717 /**
718  * Reset the values of the xstats of the selected component in the device.
719  *
720  * @param dev_id
721  *   The identifier of the device.
722  * @param mode
723  *   Mode of the statistics to reset. Choose from device or model.
724  * @param model_id
725  *   Model stats to reset. 0 and positive values select models, while -1 indicates all models.
726  * @param stat_ids
727  *   Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be
728  * reset. If non-NULL, must point to array of at least *nb_ids* size.
729  * @param nb_ids
730  *   The number of ids available from the *ids* array. Ignored when ids is NULL.
731  * @return
732  *   - Zero: successfully reset the statistics.
733  *   - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
734  */
735 __rte_experimental
736 int
737 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
738 			const uint16_t stat_ids[], uint16_t nb_ids);
739 
740 /**
741  * Dump internal information about *dev_id* to the FILE* provided in *fd*.
742  *
743  * @param dev_id
744  *   The identifier of the device.
745  * @param fd
746  *   A pointer to a file for output.
747  * @return
748  *   - 0: on success.
749  *   - <0: on failure.
750  */
751 __rte_experimental
752 int
753 rte_ml_dev_dump(int16_t dev_id, FILE *fd);
754 
755 /**
756  * Trigger the ML device self test.
757  *
758  * @param dev_id
759  *   The identifier of the device.
760  * @return
761  *   - 0: Selftest successful.
762  *   - -ENOTSUP: if the device doesn't support selftest.
763  *   - other values < 0 on failure.
764  */
765 __rte_experimental
766 int
767 rte_ml_dev_selftest(int16_t dev_id);
768 
769 /* Model operations */
770 
771 /** ML model load parameters
772  *
773  * Parameters required to load an ML model.
774  */
775 struct rte_ml_model_params {
776 	void *addr;
777 	/**< Address of model buffer */
778 	size_t size;
779 	/**< Size of model buffer */
780 };
781 
782 /**
783  * Load an ML model to the device.
784  *
785  * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
786  *
787  * @param[in] dev_id
788  *   The identifier of the device.
789  * @param[in] params
790  *   Parameters for the model to be loaded.
791  * @param[out] model_id
792  *   Identifier of the model loaded.
793  *
794  * @return
795  *   - 0: Success, Model loaded.
796  *   - < 0: Failure, Error code of the model load driver function.
797  */
798 __rte_experimental
799 int
800 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
801 
802 /**
803  * Unload an ML model from the device.
804  *
805  * @param[in] dev_id
806  *   The identifier of the device.
807  * @param[in] model_id
808  *   Identifier of the model to be unloaded.
809  *
810  * @return
811  *   - 0: Success, Model unloaded.
812  *   - < 0: Failure, Error code of the model unload driver function.
813  */
814 __rte_experimental
815 int
816 rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
817 
818 /**
819  * Start an ML model for the given device ID.
820  *
821  * Start an ML model to accept inference requests.
822  *
823  * @param[in] dev_id
824  *   The identifier of the device.
825  * @param[in] model_id
826  *   Identifier of the model to be started.
827  *
828  * @return
829  *   - 0: Success, Model loaded.
830  *   - < 0: Failure, Error code of the model start driver function.
831  */
832 __rte_experimental
833 int
834 rte_ml_model_start(int16_t dev_id, uint16_t model_id);
835 
836 /**
837  * Stop an ML model for the given device ID.
838  *
839  * Model stop would disable the ML model to be used for inference jobs.
840  * All inference jobs must have been completed before model stop is attempted.
841 
842  * @param[in] dev_id
843  *   The identifier of the device.
844  * @param[in] model_id
845  *   Identifier of the model to be stopped.
846  *
847  * @return
848  *   - 0: Success, Model unloaded.
849  *   - < 0: Failure, Error code of the model stop driver function.
850  */
851 __rte_experimental
852 int
853 rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
854 
855 /**
856  * Input and output data types. ML models can operate on reduced precision
857  * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
858  * This enum is used to represent the lower precision integer and floating point types used
859  * by ML models.
860  */
861 enum rte_ml_io_type {
862 	RTE_ML_IO_TYPE_UNKNOWN = 0,
863 	/**< Invalid or unknown type */
864 	RTE_ML_IO_TYPE_INT8,
865 	/**< 8-bit integer */
866 	RTE_ML_IO_TYPE_UINT8,
867 	/**< 8-bit unsigned integer */
868 	RTE_ML_IO_TYPE_INT16,
869 	/**< 16-bit integer */
870 	RTE_ML_IO_TYPE_UINT16,
871 	/**< 16-bit unsigned integer */
872 	RTE_ML_IO_TYPE_INT32,
873 	/**< 32-bit integer */
874 	RTE_ML_IO_TYPE_UINT32,
875 	/**< 32-bit unsigned integer */
876 	RTE_ML_IO_TYPE_FP8,
877 	/**< 8-bit floating point number */
878 	RTE_ML_IO_TYPE_FP16,
879 	/**< IEEE 754 16-bit floating point number */
880 	RTE_ML_IO_TYPE_FP32,
881 	/**< IEEE 754 32-bit floating point number */
882 	RTE_ML_IO_TYPE_BFLOAT16
883 	/**< 16-bit brain floating point number. */
884 };
885 
886 /** ML I/O buffer layout */
887 enum rte_ml_io_layout {
888 	RTE_ML_IO_LAYOUT_PACKED,
889 	/**< All inputs for the model should packed in a single buffer with
890 	 * no padding between individual inputs. The buffer is expected to
891 	 * be aligned to rte_ml_dev_info::align_size.
892 	 *
893 	 * When I/O segmentation is supported by the device, the packed
894 	 * data can be split into multiple segments. In this case, each
895 	 * segment is expected to be aligned to rte_ml_dev_info::align_size
896 	 *
897 	 * Same applies to output.
898 	 *
899 	 * @see struct rte_ml_dev_info::max_segments
900 	 */
901 	RTE_ML_IO_LAYOUT_SPLIT
902 	/**< Each input for the model should be stored as separate buffers
903 	 * and each input should be aligned to rte_ml_dev_info::align_size.
904 	 *
905 	 * When I/O segmentation is supported, each input can be split into
906 	 * multiple segments. In this case, each segment is expected to be
907 	 * aligned to rte_ml_dev_info::align_size
908 	 *
909 	 * Same applies to output.
910 	 *
911 	 * @see struct rte_ml_dev_info::max_segments
912 	 */
913 };
914 
915 /**
916  * Input and output data information structure
917  *
918  * Specifies the type and shape of input and output data.
919  */
920 struct rte_ml_io_info {
921 	char name[RTE_ML_STR_MAX];
922 	/**< Name of data */
923 	uint32_t nb_dims;
924 	/**< Number of dimensions in shape */
925 	uint32_t *shape;
926 	/**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */
927 	enum rte_ml_io_type type;
928 	/**< Type of data
929 	 * @see enum rte_ml_io_type
930 	 */
931 	uint64_t nb_elements;
932 	/** Number of elements in tensor */
933 	uint64_t size;
934 	/** Size of tensor in bytes */
935 };
936 
937 /** Model information structure */
938 struct rte_ml_model_info {
939 	char name[RTE_ML_STR_MAX];
940 	/**< Model name. */
941 	char version[RTE_ML_STR_MAX];
942 	/**< Model version */
943 	uint16_t model_id;
944 	/**< Model ID */
945 	uint16_t device_id;
946 	/**< Device ID */
947 	enum rte_ml_io_layout io_layout;
948 	/**< I/O buffer layout for the model */
949 	uint16_t min_batches;
950 	/**< Minimum number of batches that the model can process
951 	 * in one inference request
952 	 */
953 	uint16_t max_batches;
954 	/**< Maximum number of batches that the model can process
955 	 * in one inference request
956 	 */
957 	uint32_t nb_inputs;
958 	/**< Number of inputs */
959 	const struct rte_ml_io_info *input_info;
960 	/**< Input info array. Array size is equal to nb_inputs */
961 	uint32_t nb_outputs;
962 	/**< Number of outputs */
963 	const struct rte_ml_io_info *output_info;
964 	/**< Output info array. Array size is equal to nb_output */
965 	uint64_t wb_size;
966 	/**< Size of model weights and bias */
967 };
968 
969 /**
970  * Get ML model information.
971  *
972  * @param[in] dev_id
973  *   The identifier of the device.
974  * @param[in] model_id
975  *   Identifier for the model created
976  * @param[out] model_info
977  *   Pointer to a model info structure
978  *
979  * @return
980  *   - Returns 0 on success
981  *   - Returns negative value on failure
982  */
983 __rte_experimental
984 int
985 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
986 
987 /**
988  * Update the model parameters without unloading model.
989  *
990  * Update model parameters such as weights and bias without unloading the model.
991  * rte_ml_model_stop() must be called before invoking this API.
992  *
993  * @param[in] dev_id
994  *   The identifier of the device.
995  * @param[in] model_id
996  *   Identifier for the model created
997  * @param[in] buffer
998  *   Pointer to the model weights and bias buffer.
999  * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
1000  *
1001  * @return
1002  *   - Returns 0 on success
1003  *   - Returns negative value on failure
1004  */
1005 __rte_experimental
1006 int
1007 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
1008 
1009 /* IO operations */
1010 
1011 /**
1012  * Quantize input data.
1013  *
1014  * Quantization converts data from a higher precision types to a lower precision types to improve
1015  * the throughput and efficiency of the model execution with minimal loss of accuracy.
1016  * Types of dequantized data and quantized data are specified by the model.
1017  *
1018  * @param[in] dev_id
1019  *   The identifier of the device.
1020  * @param[in] model_id
1021  *   Identifier for the model
1022  * @param[in] dbuffer
1023  *   Address of dequantized input data
1024  * @param[in] qbuffer
1025  *   Address of quantized input data
1026  *
1027  * @return
1028  *   - Returns 0 on success
1029  *   - Returns negative value on failure
1030  */
1031 __rte_experimental
1032 int
1033 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,
1034 		   struct rte_ml_buff_seg **qbuffer);
1035 
1036 /**
1037  * Dequantize output data.
1038  *
1039  * Dequantization converts data from a lower precision type to a higher precision type.
1040  * Types of quantized data and dequantized are specified by the model.
1041  *
1042  * @param[in] dev_id
1043  *   The identifier of the device.
1044  * @param[in] model_id
1045  *   Identifier for the model
1046  * @param[in] qbuffer
1047  *   Address of quantized output data
1048  * @param[in] dbuffer
1049  *   Address of dequantized output data
1050  *
1051  * @return
1052  *   - Returns 0 on success
1053  *   - Returns negative value on failure
1054  */
1055 __rte_experimental
1056 int
1057 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,
1058 		     struct rte_ml_buff_seg **dbuffer);
1059 
1060 /* ML op pool operations */
1061 
1062 /**
1063  * Create an ML operation pool
1064  *
1065  * @param name
1066  *   ML operations pool name
1067  * @param nb_elts
1068  *   Number of elements in pool
1069  * @param cache_size
1070  *   Number of elements to cache on lcore, see
1071  *   *rte_mempool_create* for further details about cache size
1072  * @param user_size
1073  *   Size of private data to allocate for user with each operation
1074  * @param socket_id
1075  *   Socket to identifier allocate memory on
1076  * @return
1077  *  - On success pointer to mempool
1078  *  - On failure NULL
1079  */
1080 __rte_experimental
1081 struct rte_mempool *
1082 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
1083 		      uint16_t user_size, int socket_id);
1084 
1085 /**
1086  * Free an ML operation pool
1087  *
1088  * @param mempool
1089  *   A pointer to the mempool structure.
1090  *   If NULL then, the function does nothing.
1091  */
1092 __rte_experimental
1093 void
1094 rte_ml_op_pool_free(struct rte_mempool *mempool);
1095 
1096 #ifdef __cplusplus
1097 }
1098 #endif
1099 
1100 #endif /* RTE_MLDEV_H */
1101