xref: /dpdk/lib/mldev/rte_mldev.h (revision d029f35384d0844e9aeb5dbc46fbe1b063d649f7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_H
6 #define RTE_MLDEV_H
7 
8 /**
9  * @file rte_mldev.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * ML (Machine Learning) device API.
16  *
17  * The ML framework is built on the following model:
18  *
19  *
20  *     +-----------------+               rte_ml_[en|de]queue_burst()
21  *     |                 |                          |
22  *     |     Machine     o------+     +--------+    |
23  *     |     Learning    |      |     | queue  |    |    +------+
24  *     |     Inference   o------+-----o        |<===o===>|Core 0|
25  *     |     Engine      |      |     | pair 0 |         +------+
26  *     |                 o----+ |     +--------+
27  *     |                 |    | |
28  *     +-----------------+    | |     +--------+
29  *              ^             | |     | queue  |         +------+
30  *              |             | +-----o        |<=======>|Core 1|
31  *              |             |       | pair 1 |         +------+
32  *              |             |       +--------+
33  *     +--------+--------+    |
34  *     | +-------------+ |    |       +--------+
35  *     | |   Model 0   | |    |       | queue  |         +------+
36  *     | +-------------+ |    +-------o        |<=======>|Core N|
37  *     | +-------------+ |            | pair N |         +------+
38  *     | |   Model 1   | |            +--------+
39  *     | +-------------+ |
40  *     | +-------------+ |<------> rte_ml_model_load()
41  *     | |   Model ..  | |-------> rte_ml_model_info_get()
42  *     | +-------------+ |<------- rte_ml_model_start()
43  *     | +-------------+ |<------- rte_ml_model_stop()
44  *     | |   Model N   | |<------- rte_ml_model_params_update()
45  *     | +-------------+ |<------- rte_ml_model_unload()
46  *     +-----------------+
47  *
48  * ML Device: A hardware or software-based implementation of ML device API for
49  * running inferences using a pre-trained ML model.
50  *
51  * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
52  * procedure/algorithm and data/pattern required to make predictions on live data.
53  * Once the model is created and trained outside of the DPDK scope, the model can be loaded
54  * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
55  * The rte_ml_model_params_update() can be used to update the model parameters such as weight
56  * and bias without unloading the model using rte_ml_model_unload().
57  *
58  * ML Inference: ML inference is the process of feeding data to the model via
59  * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
60  * outputs/predictions from the started model.
61  *
62  * In all functions of the ML device API, the ML device is designated by an
63  * integer >= 0 named as device identifier *dev_id*.
64  *
65  * The functions exported by the ML device API to setup a device designated by
66  * its device identifier must be invoked in the following order:
67  *
68  *      - rte_ml_dev_configure()
69  *      - rte_ml_dev_queue_pair_setup()
70  *      - rte_ml_dev_start()
71  *
72  * A model is required to run the inference operations with the user specified inputs.
73  * Application needs to invoke the ML model API in the following order before queueing
74  * inference jobs.
75  *
76  *      - rte_ml_model_load()
77  *      - rte_ml_model_start()
78  *
79  * A model can be loaded on a device only after the device has been configured and can be
80  * started or stopped only after a device has been started.
81  *
82  * The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
83  * The information would include the shape and type of input and output required for the inference.
84  *
85  * Data quantization and dequantization is one of the main aspects in ML domain. This involves
86  * conversion of input data from a higher precision to a lower precision data type and vice-versa
87  * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
88  * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
89  * and output buffers holding data for multiple batches.
90  *
91  * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
92  * size of quantized and de-quantized multi-batch input and output buffers.
93  *
94  * User can optionally update the model parameters with rte_ml_model_params_update() after
95  * invoking rte_ml_model_stop() API on a given model ID.
96  *
97  * The application can invoke, in any order, the functions exported by the ML API to enqueue
98  * inference jobs and dequeue inference response.
99  *
100  * If the application wants to change the device configuration (i.e., call
101  * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
102  * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
103  * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
104  * for the given model. The application does not need to call rte_ml_dev_stop() API for
105  * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
106  *
107  * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
108  * start state after invoking rte_ml_model_start() API, then the application can call
109  * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
110  *
111  * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
112  *
113  * Typical application utilisation of the ML API will follow the following
114  * programming flow.
115  *
116  * - rte_ml_dev_configure()
117  * - rte_ml_dev_queue_pair_setup()
118  * - rte_ml_model_load()
119  * - rte_ml_dev_start()
120  * - rte_ml_model_start()
121  * - rte_ml_model_info_get()
122  * - rte_ml_enqueue_burst()
123  * - rte_ml_dequeue_burst()
124  * - rte_ml_model_stop()
125  * - rte_ml_model_unload()
126  * - rte_ml_dev_stop()
127  * - rte_ml_dev_close()
128  *
129  * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
130  * are lock-free functions which assume to not be invoked in parallel on different logical cores
131  * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
132  * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
133  * can be invoked in parallel by different logical core on different queue pair.
134  * It is the responsibility of the user application to enforce this rule.
135  */
136 
137 #include <rte_common.h>
138 #include <rte_log.h>
139 #include <rte_mempool.h>
140 
141 #ifdef __cplusplus
142 extern "C" {
143 #endif
144 
145 /* Logging Macro */
146 extern int rte_ml_dev_logtype;
147 #define RTE_LOGTYPE_MLDEV rte_ml_dev_logtype
148 
149 #define RTE_MLDEV_LOG(level, fmt, args...) \
150 	RTE_LOG_LINE(level, MLDEV, "%s(): " fmt, __func__, ##args)
151 
152 #define RTE_ML_STR_MAX 128
153 /**< Maximum length of name string */
154 
155 #define RTE_MLDEV_DEFAULT_MAX 32
156 /** Maximum number of devices if rte_ml_dev_init() is not called. */
157 
158 /* Device operations */
159 
160 /**
161  * Initialize the device array before probing devices. If not called, the first device probed would
162  * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
163  *
164  * @param dev_max
165  *   Maximum number of devices.
166  *
167  * @return
168  *   0 on success, -rte_errno otherwise:
169  *   - ENOMEM if out of memory
170  *   - EINVAL if 0 size
171  *   - EBUSY if already initialized
172  */
173 __rte_experimental
174 int
175 rte_ml_dev_init(size_t dev_max);
176 
177 /**
178  * Get the total number of ML devices that have been successfully initialised.
179  *
180  * @return
181  *   - The total number of usable ML devices.
182  */
183 __rte_experimental
184 uint16_t
185 rte_ml_dev_count(void);
186 
187 /**
188  * Check if the device is in ready state.
189  *
190  * @param dev_id
191  *   The identifier of the device.
192  *
193  * @return
194  *   - 0 if device state is not in ready state.
195  *   - 1 if device state is ready state.
196  */
197 __rte_experimental
198 int
199 rte_ml_dev_is_valid_dev(int16_t dev_id);
200 
201 /**
202  * Return the NUMA socket to which a device is connected.
203  *
204  * @param dev_id
205  *   The identifier of the device.
206  *
207  * @return
208  *   - The NUMA socket id to which the device is connected
209  *   - 0 If the socket could not be determined.
210  *   - -EINVAL: if the dev_id value is not valid.
211  */
212 __rte_experimental
213 int
214 rte_ml_dev_socket_id(int16_t dev_id);
215 
216 /**  ML device information */
217 struct rte_ml_dev_info {
218 	const char *driver_name;
219 	/**< Driver name */
220 	uint16_t max_models;
221 	/**< Maximum number of models supported by the device.
222 	 * @see struct rte_ml_dev_config::nb_models
223 	 */
224 	uint16_t max_queue_pairs;
225 	/**< Maximum number of queues pairs supported by the device.
226 	 * @see struct rte_ml_dev_config::nb_queue_pairs
227 	 */
228 	uint16_t max_desc;
229 	/**< Maximum allowed number of descriptors for queue pair by the device.
230 	 * @see struct rte_ml_dev_qp_conf::nb_desc
231 	 */
232 	uint16_t max_io;
233 	/**< Maximum number of inputs/outputs supported per model. */
234 	uint16_t max_segments;
235 	/**< Maximum number of scatter-gather entries supported by the device.
236 	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
237 	 */
238 	uint16_t align_size;
239 	/**< Alignment size of IO buffers used by the device. */
240 };
241 
242 /**
243  * Retrieve the information of the device.
244  *
245  * @param dev_id
246  *   The identifier of the device.
247  * @param dev_info
248  *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
249  *
250  * @return
251  *   - 0: Success, driver updates the information of the ML device
252  *   - < 0: Error code returned by the driver info get function.
253  */
254 __rte_experimental
255 int
256 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
257 
258 /** ML device configuration structure */
259 struct rte_ml_dev_config {
260 	int socket_id;
261 	/**< Socket to allocate resources on. */
262 	uint16_t nb_models;
263 	/**< Number of models to be loaded on the device.
264 	 * This value cannot exceed the max_models which is previously provided in
265 	 * struct rte_ml_dev_info::max_models
266 	 */
267 	uint16_t nb_queue_pairs;
268 	/**< Number of queue pairs to configure on this device.
269 	 * This value cannot exceed the max_models which is previously provided in
270 	 * struct rte_ml_dev_info::max_queue_pairs
271 	 */
272 };
273 
274 /**
275  * Configure an ML device.
276  *
277  * This function must be invoked first before any other function in the API.
278  *
279  * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
280  * rte_ml_dev_close() is called.
281  *
282  * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
283  * this ML device.
284  *
285  * @param dev_id
286  *   The identifier of the device to configure.
287  * @param config
288  *   The ML device configuration structure.
289  *
290  * @return
291  *   - 0: Success, device configured.
292  *   - < 0: Error code returned by the driver configuration function.
293  */
294 __rte_experimental
295 int
296 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
297 
298 /* Forward declaration */
299 struct rte_ml_op;
300 
301 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
302 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
303 
304 /** ML device queue pair configuration structure. */
305 struct rte_ml_dev_qp_conf {
306 	uint32_t nb_desc;
307 	/**< Number of descriptors per queue pair.
308 	 * This value cannot exceed the max_desc which previously provided in
309 	 * struct rte_ml_dev_info:max_desc
310 	 */
311 	rte_ml_dev_stop_flush_t cb;
312 	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
313 	 * Value NULL is allowed, in which case callback will not be invoked.
314 	 * This function can be used to properly dispose of outstanding ML ops from all
315 	 * queue pairs, for example ops containing  memory pointers.
316 	 * @see rte_ml_dev_stop()
317 	 */
318 };
319 
320 /**
321  * Set up a queue pair for a device. This should only be called when the device is stopped.
322  *
323  * @param dev_id
324  *   The identifier of the device.
325  * @param queue_pair_id
326  *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
327  * previously supplied to rte_ml_dev_configure().
328  * @param qp_conf
329  *   The pointer to the configuration data to be used for the queue pair.
330  * @param socket_id
331  *   The *socket_id* argument is the socket identifier in case of NUMA.
332  * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
333  * for the queue pair.
334  *
335  * @return
336  *   - 0: Success, queue pair correctly set up.
337  *   - < 0: Queue pair configuration failed.
338  */
339 __rte_experimental
340 int
341 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
342 			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
343 
344 /**
345  * Start an ML device.
346  *
347  * The device start step consists of setting the configured features and enabling the ML device
348  * to accept inference jobs.
349  *
350  * @param dev_id
351  *   The identifier of the device.
352  *
353  * @return
354  *   - 0: Success, device started.
355  *   - <0: Error code of the driver device start function.
356  */
357 __rte_experimental
358 int
359 rte_ml_dev_start(int16_t dev_id);
360 
361 /**
362  * Stop an ML device. A stopped device cannot accept inference jobs.
363  * The device can be restarted with a call to rte_ml_dev_start().
364  *
365  * @param dev_id
366  *   The identifier of the device.
367  *
368  * @return
369  *   - 0: Success, device stopped.
370  *   - <0: Error code of the driver device stop function.
371  */
372 __rte_experimental
373 int
374 rte_ml_dev_stop(int16_t dev_id);
375 
376 /**
377  * Close an ML device. The device cannot be restarted!
378  *
379  * @param dev_id
380  *   The identifier of the device.
381  *
382  * @return
383  *  - 0 on successfully closing device.
384  *  - <0 on failure to close device.
385  */
386 __rte_experimental
387 int
388 rte_ml_dev_close(int16_t dev_id);
389 
390 /** Status of ML operation */
391 enum rte_ml_op_status {
392 	RTE_ML_OP_STATUS_SUCCESS = 0,
393 	/**< Operation completed successfully */
394 	RTE_ML_OP_STATUS_NOT_PROCESSED,
395 	/**< Operation has not yet been processed by the device. */
396 	RTE_ML_OP_STATUS_ERROR,
397 	/**< Operation completed with error.
398 	 * Application can invoke rte_ml_op_error_get() to get PMD specific
399 	 * error code if needed.
400 	 */
401 };
402 
403 /** ML operation's input and output buffer representation as scatter gather list
404  */
405 struct rte_ml_buff_seg {
406 	rte_iova_t iova_addr;
407 	/**< IOVA address of segment buffer. */
408 	void *addr;
409 	/**< Virtual address of segment buffer. */
410 	uint32_t length;
411 	/**< Segment length. */
412 	uint32_t reserved;
413 	/**< Reserved for future use. */
414 	struct rte_ml_buff_seg *next;
415 	/**< Points to next segment. Value NULL represents the last segment. */
416 };
417 
418 /**
419  * ML Operation.
420  *
421  * This structure contains data related to performing an ML operation on the buffers using
422  * the model specified through model_id.
423  */
424 struct rte_ml_op {
425 	uint16_t model_id;
426 	/**< Model ID to be used for the operation. */
427 	uint16_t nb_batches;
428 	/**< Number of batches. Minimum value must be one.
429 	 * Input buffer must hold inference data for each batch as contiguous.
430 	 */
431 	uint32_t reserved;
432 	/**< Reserved for future use. */
433 	struct rte_mempool *mempool;
434 	/**< Pool from which operation is allocated. */
435 	struct rte_ml_buff_seg **input;
436 	/**< Array of buffer segments to hold the inference input data.
437 	 *
438 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
439 	 * the array is 1.
440 	 *
441 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
442 	 * the array is rte_ml_model_info::nb_inputs.
443 	 *
444 	 * @see struct rte_ml_dev_info::io_layout
445 	 */
446 	struct rte_ml_buff_seg **output;
447 	/**< Array of buffer segments to hold the inference output data.
448 	 *
449 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of
450 	 * the array is 1.
451 	 *
452 	 * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of
453 	 * the array is rte_ml_model_info::nb_outputs.
454 	 *
455 	 * @see struct rte_ml_dev_info::io_layout
456 	 */
457 	union {
458 		uint64_t user_u64;
459 		/**< User data as uint64_t.*/
460 		void *user_ptr;
461 		/**< User data as void*.*/
462 	};
463 	enum rte_ml_op_status status;
464 	/**< Operation status. */
465 	uint64_t impl_opaque;
466 	/**< Implementation specific opaque value.
467 	 * An implementation may use this field to hold
468 	 * implementation specific value to share between
469 	 * dequeue and enqueue operation.
470 	 * The application should not modify this field.
471 	 */
472 } __rte_cache_aligned;
473 
474 /* Enqueue/Dequeue operations */
475 
476 /**
477  * Enqueue a burst of ML inferences for processing on an ML device.
478  *
479  * The rte_ml_enqueue_burst() function is invoked to place ML inference
480  * operations on the queue *qp_id* of the device designated by its *dev_id*.
481  *
482  * The *nb_ops* parameter is the number of inferences to process which are
483  * supplied in the *ops* array of *rte_ml_op* structures.
484  *
485  * The rte_ml_enqueue_burst() function returns the number of inferences it
486  * actually enqueued for processing. A return value equal to *nb_ops* means that
487  * all packets have been enqueued.
488  *
489  * @param dev_id
490  *   The identifier of the device.
491  * @param qp_id
492  *   The index of the queue pair which inferences are to be enqueued for processing.
493  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
494  * *rte_ml_dev_configure*.
495  * @param ops
496  *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
497  * ML inferences to be processed.
498  * @param nb_ops
499  *   The number of operations to process.
500  *
501  * @return
502  *   The number of inference operations actually enqueued to the ML device.
503  * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
504  * is full or if invalid parameters are specified in a *rte_ml_op*.
505  */
506 __rte_experimental
507 uint16_t
508 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
509 
510 /**
511  * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
512  * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
513  * in the *ops* array.
514  *
515  * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
516  * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
517  *
518  * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
519  * and this is likely to signify that other processed operations remain in the devices output queue.
520  * Application implementing a "retrieve as many processed operations as possible" policy can check
521  * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
522  * *nb_ops* is returned.
523  *
524  * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
525  * the corresponding overhead.
526  *
527  * @param dev_id
528  *   The identifier of the device.
529  * @param qp_id
530  *   The index of the queue pair from which to retrieve processed packets.
531  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
532  * rte_ml_dev_configure().
533  * @param ops
534  *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
535  * store *nb_ops* pointers in it.
536  * @param nb_ops
537  *   The maximum number of inferences to dequeue.
538  *
539  * @return
540  *   The number of operations actually dequeued, which is the number of pointers
541  * to *rte_ml_op* structures effectively supplied to the *ops* array.
542  */
543 __rte_experimental
544 uint16_t
545 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
546 
547 /**
548  * Verbose error structure definition.
549  */
550 struct rte_ml_op_error {
551 	char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
552 	uint64_t errcode;	      /**< Vendor specific error code. */
553 };
554 
555 /**
556  * Get PMD specific error information for an ML op.
557  *
558  * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
559  * This API allows to get PMD specific error details.
560  *
561  * @param[in] dev_id
562  *   Device identifier
563  * @param[in] op
564  *   Handle of ML operation
565  * @param[in] error
566  *   Address of structure rte_ml_op_error to be filled
567  *
568  * @return
569  *   - Returns 0 on success
570  *   - Returns negative value on failure
571  */
572 __rte_experimental
573 int
574 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
575 
576 /* Statistics operations */
577 
578 /** Device statistics. */
579 struct rte_ml_dev_stats {
580 	uint64_t enqueued_count;
581 	/**< Count of all operations enqueued */
582 	uint64_t dequeued_count;
583 	/**< Count of all operations dequeued */
584 	uint64_t enqueue_err_count;
585 	/**< Total error count on operations enqueued */
586 	uint64_t dequeue_err_count;
587 	/**< Total error count on operations dequeued */
588 };
589 
590 /**
591  * Retrieve the general I/O statistics of a device.
592  *
593  * @param dev_id
594  *   The identifier of the device.
595  * @param stats
596  *   Pointer to structure to where statistics will be copied.
597  * On error, this location may or may not have been modified.
598  * @return
599  *   - 0 on success
600  *   - -EINVAL: If invalid parameter pointer is provided.
601  */
602 __rte_experimental
603 int
604 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
605 
606 /**
607  * Reset the statistics of a device.
608  *
609  * @param dev_id
610  *   The identifier of the device.
611  */
612 __rte_experimental
613 void
614 rte_ml_dev_stats_reset(int16_t dev_id);
615 
616 /**
617  * Selects the component of the mldev to retrieve statistics from.
618  */
619 enum rte_ml_dev_xstats_mode {
620 	RTE_ML_DEV_XSTATS_DEVICE,
621 	/**< Device xstats */
622 	RTE_ML_DEV_XSTATS_MODEL,
623 	/**< Model xstats */
624 };
625 
626 /**
627  * A name-key lookup element for extended statistics.
628  *
629  * This structure is used to map between names and ID numbers for extended ML device statistics.
630  */
631 struct rte_ml_dev_xstats_map {
632 	uint16_t id;
633 	/**< xstat identifier */
634 	char name[RTE_ML_STR_MAX];
635 	/**< xstat name */
636 };
637 
638 /**
639  * Retrieve names of extended statistics of an ML device.
640  *
641  * @param dev_id
642  *   The identifier of the device.
643  * @param mode
644  *   Mode of statistics to retrieve. Choices include the device statistics and model statistics.
645  * @param model_id
646  *   Used to specify the model number in model mode, and is ignored in device mode.
647  * @param[out] xstats_map
648  *   Block of memory to insert names and ids into. Must be at least size in capacity. If set to
649  * NULL, function returns required capacity. The id values returned can be passed to
650  * *rte_ml_dev_xstats_get* to select statistics.
651  * @param size
652  *   Capacity of xstats_names (number of xstats_map).
653  * @return
654  *   - Positive value lower or equal to size: success. The return value is the number of entries
655  * filled in the stats table.
656  *   - Positive value higher than size: error, the given statistics table is too small. The return
657  * value corresponds to the size that should be given to succeed. The entries in the table are not
658  * valid and shall not be used by the caller.
659  *   - Negative value on error:
660  *        -ENODEV for invalid *dev_id*.
661  *        -EINVAL for invalid mode, model parameters.
662  *        -ENOTSUP if the device doesn't support this function.
663  */
664 __rte_experimental
665 int
666 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
667 			    struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
668 
669 /**
670  * Retrieve the value of a single stat by requesting it by name.
671  *
672  * @param dev_id
673  *   The identifier of the device.
674  * @param name
675  *   Name of stat name to retrieve.
676  * @param[out] stat_id
677  *   If non-NULL, the numerical id of the stat will be returned, so that further requests for the
678  * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a
679  * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1.
680  * @param[out] value
681  *   Value of the stat to be returned.
682  * @return
683  *   - Zero: No error.
684  *   - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
685  */
686 __rte_experimental
687 int
688 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
689 
690 /**
691  * Retrieve extended statistics of an ML device.
692  *
693  * @param dev_id
694  *   The identifier of the device.
695  * @param mode
696  *  Mode of statistics to retrieve. Choices include the device statistics and model statistics.
697  * @param model_id
698  *   Used to specify the model id in model mode, and is ignored in device mode.
699  * @param stat_ids
700  *   ID numbers of the stats to get. The ids can be got from the stat position in the stat list from
701  * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
702  * @param[out] values
703  *   Values for each stats request by ID.
704  * @param nb_ids
705  *   Number of stats requested.
706  * @return
707  *   - Positive value: number of stat entries filled into the values array
708  *   - Negative value on error:
709  *        -ENODEV for invalid *dev_id*.
710  *        -EINVAL for invalid mode, model id or stat id parameters.
711  *        -ENOTSUP if the device doesn't support this function.
712  */
713 __rte_experimental
714 int
715 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
716 		      const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids);
717 
718 /**
719  * Reset the values of the xstats of the selected component in the device.
720  *
721  * @param dev_id
722  *   The identifier of the device.
723  * @param mode
724  *   Mode of the statistics to reset. Choose from device or model.
725  * @param model_id
726  *   Model stats to reset. 0 and positive values select models, while -1 indicates all models.
727  * @param stat_ids
728  *   Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be
729  * reset. If non-NULL, must point to array of at least *nb_ids* size.
730  * @param nb_ids
731  *   The number of ids available from the *ids* array. Ignored when ids is NULL.
732  * @return
733  *   - Zero: successfully reset the statistics.
734  *   - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
735  */
736 __rte_experimental
737 int
738 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
739 			const uint16_t stat_ids[], uint16_t nb_ids);
740 
741 /**
742  * Dump internal information about *dev_id* to the FILE* provided in *fd*.
743  *
744  * @param dev_id
745  *   The identifier of the device.
746  * @param fd
747  *   A pointer to a file for output.
748  * @return
749  *   - 0: on success.
750  *   - <0: on failure.
751  */
752 __rte_experimental
753 int
754 rte_ml_dev_dump(int16_t dev_id, FILE *fd);
755 
756 /**
757  * Trigger the ML device self test.
758  *
759  * @param dev_id
760  *   The identifier of the device.
761  * @return
762  *   - 0: Selftest successful.
763  *   - -ENOTSUP: if the device doesn't support selftest.
764  *   - other values < 0 on failure.
765  */
766 __rte_experimental
767 int
768 rte_ml_dev_selftest(int16_t dev_id);
769 
770 /* Model operations */
771 
772 /** ML model load parameters
773  *
774  * Parameters required to load an ML model.
775  */
776 struct rte_ml_model_params {
777 	void *addr;
778 	/**< Address of model buffer */
779 	size_t size;
780 	/**< Size of model buffer */
781 };
782 
783 /**
784  * Load an ML model to the device.
785  *
786  * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
787  *
788  * @param[in] dev_id
789  *   The identifier of the device.
790  * @param[in] params
791  *   Parameters for the model to be loaded.
792  * @param[out] model_id
793  *   Identifier of the model loaded.
794  *
795  * @return
796  *   - 0: Success, Model loaded.
797  *   - < 0: Failure, Error code of the model load driver function.
798  */
799 __rte_experimental
800 int
801 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
802 
803 /**
804  * Unload an ML model from the device.
805  *
806  * @param[in] dev_id
807  *   The identifier of the device.
808  * @param[in] model_id
809  *   Identifier of the model to be unloaded.
810  *
811  * @return
812  *   - 0: Success, Model unloaded.
813  *   - < 0: Failure, Error code of the model unload driver function.
814  */
815 __rte_experimental
816 int
817 rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
818 
819 /**
820  * Start an ML model for the given device ID.
821  *
822  * Start an ML model to accept inference requests.
823  *
824  * @param[in] dev_id
825  *   The identifier of the device.
826  * @param[in] model_id
827  *   Identifier of the model to be started.
828  *
829  * @return
830  *   - 0: Success, Model loaded.
831  *   - < 0: Failure, Error code of the model start driver function.
832  */
833 __rte_experimental
834 int
835 rte_ml_model_start(int16_t dev_id, uint16_t model_id);
836 
837 /**
838  * Stop an ML model for the given device ID.
839  *
840  * Model stop would disable the ML model to be used for inference jobs.
841  * All inference jobs must have been completed before model stop is attempted.
842 
843  * @param[in] dev_id
844  *   The identifier of the device.
845  * @param[in] model_id
846  *   Identifier of the model to be stopped.
847  *
848  * @return
849  *   - 0: Success, Model unloaded.
850  *   - < 0: Failure, Error code of the model stop driver function.
851  */
852 __rte_experimental
853 int
854 rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
855 
856 /**
857  * Input and output data types. ML models can operate on reduced precision
858  * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
859  * This enum is used to represent the lower precision integer and floating point types used
860  * by ML models.
861  */
862 enum rte_ml_io_type {
863 	RTE_ML_IO_TYPE_UNKNOWN = 0,
864 	/**< Invalid or unknown type */
865 	RTE_ML_IO_TYPE_INT8,
866 	/**< 8-bit integer */
867 	RTE_ML_IO_TYPE_UINT8,
868 	/**< 8-bit unsigned integer */
869 	RTE_ML_IO_TYPE_INT16,
870 	/**< 16-bit integer */
871 	RTE_ML_IO_TYPE_UINT16,
872 	/**< 16-bit unsigned integer */
873 	RTE_ML_IO_TYPE_INT32,
874 	/**< 32-bit integer */
875 	RTE_ML_IO_TYPE_UINT32,
876 	/**< 32-bit unsigned integer */
877 	RTE_ML_IO_TYPE_INT64,
878 	/**< 32-bit integer */
879 	RTE_ML_IO_TYPE_UINT64,
880 	/**< 32-bit unsigned integer */
881 	RTE_ML_IO_TYPE_FP8,
882 	/**< 8-bit floating point number */
883 	RTE_ML_IO_TYPE_FP16,
884 	/**< IEEE 754 16-bit floating point number */
885 	RTE_ML_IO_TYPE_FP32,
886 	/**< IEEE 754 32-bit floating point number */
887 	RTE_ML_IO_TYPE_BFLOAT16
888 	/**< 16-bit brain floating point number. */
889 };
890 
891 /** ML I/O buffer layout */
892 enum rte_ml_io_layout {
893 	RTE_ML_IO_LAYOUT_PACKED,
894 	/**< All inputs for the model should packed in a single buffer with
895 	 * no padding between individual inputs. The buffer is expected to
896 	 * be aligned to rte_ml_dev_info::align_size.
897 	 *
898 	 * When I/O segmentation is supported by the device, the packed
899 	 * data can be split into multiple segments. In this case, each
900 	 * segment is expected to be aligned to rte_ml_dev_info::align_size
901 	 *
902 	 * Same applies to output.
903 	 *
904 	 * @see struct rte_ml_dev_info::max_segments
905 	 */
906 	RTE_ML_IO_LAYOUT_SPLIT
907 	/**< Each input for the model should be stored as separate buffers
908 	 * and each input should be aligned to rte_ml_dev_info::align_size.
909 	 *
910 	 * When I/O segmentation is supported, each input can be split into
911 	 * multiple segments. In this case, each segment is expected to be
912 	 * aligned to rte_ml_dev_info::align_size
913 	 *
914 	 * Same applies to output.
915 	 *
916 	 * @see struct rte_ml_dev_info::max_segments
917 	 */
918 };
919 
920 /**
921  * Input and output data information structure
922  *
923  * Specifies the type and shape of input and output data.
924  */
925 struct rte_ml_io_info {
926 	char name[RTE_ML_STR_MAX];
927 	/**< Name of data */
928 	uint32_t nb_dims;
929 	/**< Number of dimensions in shape */
930 	uint32_t *shape;
931 	/**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */
932 	enum rte_ml_io_type type;
933 	/**< Type of data
934 	 * @see enum rte_ml_io_type
935 	 */
936 	uint64_t nb_elements;
937 	/** Number of elements in tensor */
938 	uint64_t size;
939 	/** Size of tensor in bytes */
940 };
941 
942 /** Model information structure */
943 struct rte_ml_model_info {
944 	char name[RTE_ML_STR_MAX];
945 	/**< Model name. */
946 	char version[RTE_ML_STR_MAX];
947 	/**< Model version */
948 	uint16_t model_id;
949 	/**< Model ID */
950 	uint16_t device_id;
951 	/**< Device ID */
952 	enum rte_ml_io_layout io_layout;
953 	/**< I/O buffer layout for the model */
954 	uint16_t min_batches;
955 	/**< Minimum number of batches that the model can process
956 	 * in one inference request
957 	 */
958 	uint16_t max_batches;
959 	/**< Maximum number of batches that the model can process
960 	 * in one inference request
961 	 */
962 	uint32_t nb_inputs;
963 	/**< Number of inputs */
964 	const struct rte_ml_io_info *input_info;
965 	/**< Input info array. Array size is equal to nb_inputs */
966 	uint32_t nb_outputs;
967 	/**< Number of outputs */
968 	const struct rte_ml_io_info *output_info;
969 	/**< Output info array. Array size is equal to nb_output */
970 	uint64_t wb_size;
971 	/**< Size of model weights and bias */
972 };
973 
974 /**
975  * Get ML model information.
976  *
977  * @param[in] dev_id
978  *   The identifier of the device.
979  * @param[in] model_id
980  *   Identifier for the model created
981  * @param[out] model_info
982  *   Pointer to a model info structure
983  *
984  * @return
985  *   - Returns 0 on success
986  *   - Returns negative value on failure
987  */
988 __rte_experimental
989 int
990 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
991 
992 /**
993  * Update the model parameters without unloading model.
994  *
995  * Update model parameters such as weights and bias without unloading the model.
996  * rte_ml_model_stop() must be called before invoking this API.
997  *
998  * @param[in] dev_id
999  *   The identifier of the device.
1000  * @param[in] model_id
1001  *   Identifier for the model created
1002  * @param[in] buffer
1003  *   Pointer to the model weights and bias buffer.
1004  * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
1005  *
1006  * @return
1007  *   - Returns 0 on success
1008  *   - Returns negative value on failure
1009  */
1010 __rte_experimental
1011 int
1012 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
1013 
1014 /* IO operations */
1015 
1016 /**
1017  * Quantize input data.
1018  *
1019  * Quantization converts data from a higher precision types to a lower precision types to improve
1020  * the throughput and efficiency of the model execution with minimal loss of accuracy.
1021  * Types of dequantized data and quantized data are specified by the model.
1022  *
1023  * @param[in] dev_id
1024  *   The identifier of the device.
1025  * @param[in] model_id
1026  *   Identifier for the model
1027  * @param[in] dbuffer
1028  *   Address of dequantized input data
1029  * @param[in] qbuffer
1030  *   Address of quantized input data
1031  *
1032  * @return
1033  *   - Returns 0 on success
1034  *   - Returns negative value on failure
1035  */
1036 __rte_experimental
1037 int
1038 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,
1039 		   struct rte_ml_buff_seg **qbuffer);
1040 
1041 /**
1042  * Dequantize output data.
1043  *
1044  * Dequantization converts data from a lower precision type to a higher precision type.
1045  * Types of quantized data and dequantized are specified by the model.
1046  *
1047  * @param[in] dev_id
1048  *   The identifier of the device.
1049  * @param[in] model_id
1050  *   Identifier for the model
1051  * @param[in] qbuffer
1052  *   Address of quantized output data
1053  * @param[in] dbuffer
1054  *   Address of dequantized output data
1055  *
1056  * @return
1057  *   - Returns 0 on success
1058  *   - Returns negative value on failure
1059  */
1060 __rte_experimental
1061 int
1062 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,
1063 		     struct rte_ml_buff_seg **dbuffer);
1064 
1065 /* ML op pool operations */
1066 
1067 /**
1068  * Create an ML operation pool
1069  *
1070  * @param name
1071  *   ML operations pool name
1072  * @param nb_elts
1073  *   Number of elements in pool
1074  * @param cache_size
1075  *   Number of elements to cache on lcore, see
1076  *   *rte_mempool_create* for further details about cache size
1077  * @param user_size
1078  *   Size of private data to allocate for user with each operation
1079  * @param socket_id
1080  *   Socket to identifier allocate memory on
1081  * @return
1082  *  - On success pointer to mempool
1083  *  - On failure NULL
1084  */
1085 __rte_experimental
1086 struct rte_mempool *
1087 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
1088 		      uint16_t user_size, int socket_id);
1089 
1090 /**
1091  * Free an ML operation pool
1092  *
1093  * @param mempool
1094  *   A pointer to the mempool structure.
1095  *   If NULL then, the function does nothing.
1096  */
1097 __rte_experimental
1098 void
1099 rte_ml_op_pool_free(struct rte_mempool *mempool);
1100 
1101 #ifdef __cplusplus
1102 }
1103 #endif
1104 
1105 #endif /* RTE_MLDEV_H */
1106