xref: /dpdk/lib/mldev/rte_mldev.h (revision 41f6bdc7615ad36b235a0ccd0ad92736832018d0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_H
6 #define RTE_MLDEV_H
7 
8 /**
9  * @file rte_mldev.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * ML (Machine Learning) device API.
16  *
17  * The ML framework is built on the following model:
18  *
19  *
20  *     +-----------------+               rte_ml_[en|de]queue_burst()
21  *     |                 |                          |
22  *     |     Machine     o------+     +--------+    |
23  *     |     Learning    |      |     | queue  |    |    +------+
24  *     |     Inference   o------+-----o        |<===o===>|Core 0|
25  *     |     Engine      |      |     | pair 0 |         +------+
26  *     |                 o----+ |     +--------+
27  *     |                 |    | |
28  *     +-----------------+    | |     +--------+
29  *              ^             | |     | queue  |         +------+
30  *              |             | +-----o        |<=======>|Core 1|
31  *              |             |       | pair 1 |         +------+
32  *              |             |       +--------+
33  *     +--------+--------+    |
34  *     | +-------------+ |    |       +--------+
35  *     | |   Model 0   | |    |       | queue  |         +------+
36  *     | +-------------+ |    +-------o        |<=======>|Core N|
37  *     | +-------------+ |            | pair N |         +------+
38  *     | |   Model 1   | |            +--------+
39  *     | +-------------+ |
40  *     | +-------------+ |<------> rte_ml_model_load()
41  *     | |   Model ..  | |-------> rte_ml_model_info_get()
42  *     | +-------------+ |<------- rte_ml_model_start()
43  *     | +-------------+ |<------- rte_ml_model_stop()
44  *     | |   Model N   | |<------- rte_ml_model_params_update()
45  *     | +-------------+ |<------- rte_ml_model_unload()
46  *     +-----------------+
47  *
48  * ML Device: A hardware or software-based implementation of ML device API for
49  * running inferences using a pre-trained ML model.
50  *
51  * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
52  * procedure/algorithm and data/pattern required to make predictions on live data.
53  * Once the model is created and trained outside of the DPDK scope, the model can be loaded
54  * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
55  * The rte_ml_model_params_update() can be used to update the model parameters such as weight
56  * and bias without unloading the model using rte_ml_model_unload().
57  *
58  * ML Inference: ML inference is the process of feeding data to the model via
59  * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
60  * outputs/predictions from the started model.
61  *
62  * In all functions of the ML device API, the ML device is designated by an
63  * integer >= 0 named as device identifier *dev_id*.
64  *
65  * The functions exported by the ML device API to setup a device designated by
66  * its device identifier must be invoked in the following order:
67  *
68  *      - rte_ml_dev_configure()
69  *      - rte_ml_dev_queue_pair_setup()
70  *      - rte_ml_dev_start()
71  *
72  * A model is required to run the inference operations with the user specified inputs.
73  * Application needs to invoke the ML model API in the following order before queueing
74  * inference jobs.
75  *
76  *      - rte_ml_model_load()
77  *      - rte_ml_model_start()
78  *
79  * A model can be loaded on a device only after the device has been configured and can be
80  * started or stopped only after a device has been started.
81  *
82  * The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
83  * The information would include the shape and type of input and output required for the inference.
84  *
85  * Data quantization and dequantization is one of the main aspects in ML domain. This involves
86  * conversion of input data from a higher precision to a lower precision data type and vice-versa
87  * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
88  * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
89  * and output buffers holding data for multiple batches.
90  *
91  * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
92  * size of quantized and de-quantized multi-batch input and output buffers.
93  *
94  * User can optionally update the model parameters with rte_ml_model_params_update() after
95  * invoking rte_ml_model_stop() API on a given model ID.
96  *
97  * The application can invoke, in any order, the functions exported by the ML API to enqueue
98  * inference jobs and dequeue inference response.
99  *
100  * If the application wants to change the device configuration (i.e., call
101  * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
102  * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
103  * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
104  * for the given model. The application does not need to call rte_ml_dev_stop() API for
105  * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
106  *
107  * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
108  * start state after invoking rte_ml_model_start() API, then the application can call
109  * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
110  *
111  * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
112  *
113  * Typical application utilisation of the ML API will follow the following
114  * programming flow.
115  *
116  * - rte_ml_dev_configure()
117  * - rte_ml_dev_queue_pair_setup()
118  * - rte_ml_model_load()
119  * - rte_ml_dev_start()
120  * - rte_ml_model_start()
121  * - rte_ml_model_info_get()
122  * - rte_ml_enqueue_burst()
123  * - rte_ml_dequeue_burst()
124  * - rte_ml_model_stop()
125  * - rte_ml_model_unload()
126  * - rte_ml_dev_stop()
127  * - rte_ml_dev_close()
128  *
129  * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
130  * are lock-free functions which assume to not be invoked in parallel on different logical cores
131  * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
132  * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
133  * can be invoked in parallel by different logical core on different queue pair.
134  * It is the responsibility of the user application to enforce this rule.
135  */
136 
137 #include <rte_common.h>
138 #include <rte_log.h>
139 #include <rte_mempool.h>
140 
141 #ifdef __cplusplus
142 extern "C" {
143 #endif
144 
145 /* Logging Macro */
146 extern int rte_ml_dev_logtype;
147 
148 #define RTE_MLDEV_LOG(level, fmt, args...)                                                         \
149 	rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args)
150 
151 #define RTE_ML_STR_MAX 128
152 /**< Maximum length of name string */
153 
154 #define RTE_MLDEV_DEFAULT_MAX 32
155 /** Maximum number of devices if rte_ml_dev_init() is not called. */
156 
157 /* Device operations */
158 
159 /**
160  * Initialize the device array before probing devices. If not called, the first device probed would
161  * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
162  *
163  * @param dev_max
164  *   Maximum number of devices.
165  *
166  * @return
167  *   0 on success, -rte_errno otherwise:
168  *   - ENOMEM if out of memory
169  *   - EINVAL if 0 size
170  *   - EBUSY if already initialized
171  */
172 __rte_experimental
173 int
174 rte_ml_dev_init(size_t dev_max);
175 
176 /**
177  * Get the total number of ML devices that have been successfully initialised.
178  *
179  * @return
180  *   - The total number of usable ML devices.
181  */
182 __rte_experimental
183 uint16_t
184 rte_ml_dev_count(void);
185 
186 /**
187  * Check if the device is in ready state.
188  *
189  * @param dev_id
190  *   The identifier of the device.
191  *
192  * @return
193  *   - 0 if device state is not in ready state.
194  *   - 1 if device state is ready state.
195  */
196 __rte_experimental
197 int
198 rte_ml_dev_is_valid_dev(int16_t dev_id);
199 
200 /**
201  * Return the NUMA socket to which a device is connected.
202  *
203  * @param dev_id
204  *   The identifier of the device.
205  *
206  * @return
207  *   - The NUMA socket id to which the device is connected
208  *   - 0 If the socket could not be determined.
209  *   - -EINVAL: if the dev_id value is not valid.
210  */
211 __rte_experimental
212 int
213 rte_ml_dev_socket_id(int16_t dev_id);
214 
215 /**  ML device information */
216 struct rte_ml_dev_info {
217 	const char *driver_name;
218 	/**< Driver name */
219 	uint16_t max_models;
220 	/**< Maximum number of models supported by the device.
221 	 * @see struct rte_ml_dev_config::nb_models
222 	 */
223 	uint16_t max_queue_pairs;
224 	/**< Maximum number of queues pairs supported by the device.
225 	 * @see struct rte_ml_dev_config::nb_queue_pairs
226 	 */
227 	uint16_t max_desc;
228 	/**< Maximum allowed number of descriptors for queue pair by the device.
229 	 * @see struct rte_ml_dev_qp_conf::nb_desc
230 	 */
231 	uint16_t max_segments;
232 	/**< Maximum number of scatter-gather entries supported by the device.
233 	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
234 	 */
235 	uint16_t min_align_size;
236 	/**< Minimum alignment size of IO buffers used by the device. */
237 };
238 
239 /**
240  * Retrieve the information of the device.
241  *
242  * @param dev_id
243  *   The identifier of the device.
244  * @param dev_info
245  *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
246  *
247  * @return
248  *   - 0: Success, driver updates the information of the ML device
249  *   - < 0: Error code returned by the driver info get function.
250  */
251 __rte_experimental
252 int
253 rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
254 
255 /** ML device configuration structure */
256 struct rte_ml_dev_config {
257 	int socket_id;
258 	/**< Socket to allocate resources on. */
259 	uint16_t nb_models;
260 	/**< Number of models to be loaded on the device.
261 	 * This value cannot exceed the max_models which is previously provided in
262 	 * struct rte_ml_dev_info::max_models
263 	 */
264 	uint16_t nb_queue_pairs;
265 	/**< Number of queue pairs to configure on this device.
266 	 * This value cannot exceed the max_models which is previously provided in
267 	 * struct rte_ml_dev_info::max_queue_pairs
268 	 */
269 };
270 
271 /**
272  * Configure an ML device.
273  *
274  * This function must be invoked first before any other function in the API.
275  *
276  * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
277  * rte_ml_dev_close() is called.
278  *
279  * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
280  * this ML device.
281  *
282  * @param dev_id
283  *   The identifier of the device to configure.
284  * @param config
285  *   The ML device configuration structure.
286  *
287  * @return
288  *   - 0: Success, device configured.
289  *   - < 0: Error code returned by the driver configuration function.
290  */
291 __rte_experimental
292 int
293 rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
294 
295 /* Forward declaration */
296 struct rte_ml_op;
297 
298 /**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
299 typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
300 
301 /** ML device queue pair configuration structure. */
302 struct rte_ml_dev_qp_conf {
303 	uint32_t nb_desc;
304 	/**< Number of descriptors per queue pair.
305 	 * This value cannot exceed the max_desc which previously provided in
306 	 * struct rte_ml_dev_info:max_desc
307 	 */
308 	rte_ml_dev_stop_flush_t cb;
309 	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
310 	 * Value NULL is allowed, in which case callback will not be invoked.
311 	 * This function can be used to properly dispose of outstanding ML ops from all
312 	 * queue pairs, for example ops containing  memory pointers.
313 	 * @see rte_ml_dev_stop()
314 	 */
315 };
316 
317 /**
318  * Set up a queue pair for a device. This should only be called when the device is stopped.
319  *
320  * @param dev_id
321  *   The identifier of the device.
322  * @param queue_pair_id
323  *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
324  * previously supplied to rte_ml_dev_configure().
325  * @param qp_conf
326  *   The pointer to the configuration data to be used for the queue pair.
327  * @param socket_id
328  *   The *socket_id* argument is the socket identifier in case of NUMA.
329  * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
330  * for the queue pair.
331  *
332  * @return
333  *   - 0: Success, queue pair correctly set up.
334  *   - < 0: Queue pair configuration failed.
335  */
336 __rte_experimental
337 int
338 rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
339 			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
340 
341 /**
342  * Start an ML device.
343  *
344  * The device start step consists of setting the configured features and enabling the ML device
345  * to accept inference jobs.
346  *
347  * @param dev_id
348  *   The identifier of the device.
349  *
350  * @return
351  *   - 0: Success, device started.
352  *   - <0: Error code of the driver device start function.
353  */
354 __rte_experimental
355 int
356 rte_ml_dev_start(int16_t dev_id);
357 
358 /**
359  * Stop an ML device. A stopped device cannot accept inference jobs.
360  * The device can be restarted with a call to rte_ml_dev_start().
361  *
362  * @param dev_id
363  *   The identifier of the device.
364  *
365  * @return
366  *   - 0: Success, device stopped.
367  *   - <0: Error code of the driver device stop function.
368  */
369 __rte_experimental
370 int
371 rte_ml_dev_stop(int16_t dev_id);
372 
373 /**
374  * Close an ML device. The device cannot be restarted!
375  *
376  * @param dev_id
377  *   The identifier of the device.
378  *
379  * @return
380  *  - 0 on successfully closing device.
381  *  - <0 on failure to close device.
382  */
383 __rte_experimental
384 int
385 rte_ml_dev_close(int16_t dev_id);
386 
387 /** Status of ML operation */
388 enum rte_ml_op_status {
389 	RTE_ML_OP_STATUS_SUCCESS = 0,
390 	/**< Operation completed successfully */
391 	RTE_ML_OP_STATUS_NOT_PROCESSED,
392 	/**< Operation has not yet been processed by the device. */
393 	RTE_ML_OP_STATUS_ERROR,
394 	/**< Operation completed with error.
395 	 * Application can invoke rte_ml_op_error_get() to get PMD specific
396 	 * error code if needed.
397 	 */
398 };
399 
400 /** ML operation's input and output buffer representation as scatter gather list
401  */
402 struct rte_ml_buff_seg {
403 	rte_iova_t iova_addr;
404 	/**< IOVA address of segment buffer. */
405 	void *addr;
406 	/**< Virtual address of segment buffer. */
407 	uint32_t length;
408 	/**< Segment length. */
409 	uint32_t reserved;
410 	/**< Reserved for future use. */
411 	struct rte_ml_buff_seg *next;
412 	/**< Points to next segment. Value NULL represents the last segment. */
413 };
414 
415 /**
416  * ML Operation.
417  *
418  * This structure contains data related to performing an ML operation on the buffers using
419  * the model specified through model_id.
420  */
421 struct rte_ml_op {
422 	uint16_t model_id;
423 	/**< Model ID to be used for the operation. */
424 	uint16_t nb_batches;
425 	/**< Number of batches. Minimum value must be one.
426 	 * Input buffer must hold inference data for each batch as contiguous.
427 	 */
428 	uint32_t reserved;
429 	/**< Reserved for future use. */
430 	struct rte_mempool *mempool;
431 	/**< Pool from which operation is allocated. */
432 	struct rte_ml_buff_seg input;
433 	/**< Input buffer to hold the inference data. */
434 	struct rte_ml_buff_seg output;
435 	/**< Output buffer to hold the inference output by the driver. */
436 	RTE_STD_C11
437 	union {
438 		uint64_t user_u64;
439 		/**< User data as uint64_t.*/
440 		void *user_ptr;
441 		/**< User data as void*.*/
442 	};
443 	enum rte_ml_op_status status;
444 	/**< Operation status. */
445 	uint64_t impl_opaque;
446 	/**< Implementation specific opaque value.
447 	 * An implementation may use this field to hold
448 	 * implementation specific value to share between
449 	 * dequeue and enqueue operation.
450 	 * The application should not modify this field.
451 	 */
452 } __rte_cache_aligned;
453 
454 /* Enqueue/Dequeue operations */
455 
456 /**
457  * Enqueue a burst of ML inferences for processing on an ML device.
458  *
459  * The rte_ml_enqueue_burst() function is invoked to place ML inference
460  * operations on the queue *qp_id* of the device designated by its *dev_id*.
461  *
462  * The *nb_ops* parameter is the number of inferences to process which are
463  * supplied in the *ops* array of *rte_ml_op* structures.
464  *
465  * The rte_ml_enqueue_burst() function returns the number of inferences it
466  * actually enqueued for processing. A return value equal to *nb_ops* means that
467  * all packets have been enqueued.
468  *
469  * @param dev_id
470  *   The identifier of the device.
471  * @param qp_id
472  *   The index of the queue pair which inferences are to be enqueued for processing.
473  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
474  * *rte_ml_dev_configure*.
475  * @param ops
476  *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
477  * ML inferences to be processed.
478  * @param nb_ops
479  *   The number of operations to process.
480  *
481  * @return
482  *   The number of inference operations actually enqueued to the ML device.
483  * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
484  * is full or if invalid parameters are specified in a *rte_ml_op*.
485  */
486 __rte_experimental
487 uint16_t
488 rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
489 
490 /**
491  * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
492  * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
493  * in the *ops* array.
494  *
495  * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
496  * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
497  *
498  * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
499  * and this is likely to signify that other processed operations remain in the devices output queue.
500  * Application implementing a "retrieve as many processed operations as possible" policy can check
501  * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
502  * *nb_ops* is returned.
503  *
504  * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
505  * the corresponding overhead.
506  *
507  * @param dev_id
508  *   The identifier of the device.
509  * @param qp_id
510  *   The index of the queue pair from which to retrieve processed packets.
511  * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
512  * rte_ml_dev_configure().
513  * @param ops
514  *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
515  * store *nb_ops* pointers in it.
516  * @param nb_ops
517  *   The maximum number of inferences to dequeue.
518  *
519  * @return
520  *   The number of operations actually dequeued, which is the number of pointers
521  * to *rte_ml_op* structures effectively supplied to the *ops* array.
522  */
523 __rte_experimental
524 uint16_t
525 rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
526 
527 /**
528  * Verbose error structure definition.
529  */
530 struct rte_ml_op_error {
531 	char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
532 	uint64_t errcode;	      /**< Vendor specific error code. */
533 };
534 
535 /**
536  * Get PMD specific error information for an ML op.
537  *
538  * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
539  * This API allows to get PMD specific error details.
540  *
541  * @param[in] dev_id
542  *   Device identifier
543  * @param[in] op
544  *   Handle of ML operation
545  * @param[in] error
546  *   Address of structure rte_ml_op_error to be filled
547  *
548  * @return
549  *   - Returns 0 on success
550  *   - Returns negative value on failure
551  */
552 __rte_experimental
553 int
554 rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
555 
556 /* Statistics operations */
557 
558 /** Device statistics. */
559 struct rte_ml_dev_stats {
560 	uint64_t enqueued_count;
561 	/**< Count of all operations enqueued */
562 	uint64_t dequeued_count;
563 	/**< Count of all operations dequeued */
564 	uint64_t enqueue_err_count;
565 	/**< Total error count on operations enqueued */
566 	uint64_t dequeue_err_count;
567 	/**< Total error count on operations dequeued */
568 };
569 
570 /**
571  * Retrieve the general I/O statistics of a device.
572  *
573  * @param dev_id
574  *   The identifier of the device.
575  * @param stats
576  *   Pointer to structure to where statistics will be copied.
577  * On error, this location may or may not have been modified.
578  * @return
579  *   - 0 on success
580  *   - -EINVAL: If invalid parameter pointer is provided.
581  */
582 __rte_experimental
583 int
584 rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
585 
586 /**
587  * Reset the statistics of a device.
588  *
589  * @param dev_id
590  *   The identifier of the device.
591  */
592 __rte_experimental
593 void
594 rte_ml_dev_stats_reset(int16_t dev_id);
595 
596 /**
597  * Selects the component of the mldev to retrieve statistics from.
598  */
599 enum rte_ml_dev_xstats_mode {
600 	RTE_ML_DEV_XSTATS_DEVICE,
601 	/**< Device xstats */
602 	RTE_ML_DEV_XSTATS_MODEL,
603 	/**< Model xstats */
604 };
605 
606 /**
607  * A name-key lookup element for extended statistics.
608  *
609  * This structure is used to map between names and ID numbers for extended ML device statistics.
610  */
611 struct rte_ml_dev_xstats_map {
612 	uint16_t id;
613 	/**< xstat identifier */
614 	char name[RTE_ML_STR_MAX];
615 	/**< xstat name */
616 };
617 
618 /**
619  * Retrieve names of extended statistics of an ML device.
620  *
621  * @param dev_id
622  *   The identifier of the device.
623  * @param mode
624  *   Mode of statistics to retrieve. Choices include the device statistics and model statistics.
625  * @param model_id
626  *   Used to specify the model number in model mode, and is ignored in device mode.
627  * @param[out] xstats_map
628  *   Block of memory to insert names and ids into. Must be at least size in capacity. If set to
629  * NULL, function returns required capacity. The id values returned can be passed to
630  * *rte_ml_dev_xstats_get* to select statistics.
631  * @param size
632  *   Capacity of xstats_names (number of xstats_map).
633  * @return
634  *   - Positive value lower or equal to size: success. The return value is the number of entries
635  * filled in the stats table.
636  *   - Positive value higher than size: error, the given statistics table is too small. The return
637  * value corresponds to the size that should be given to succeed. The entries in the table are not
638  * valid and shall not be used by the caller.
639  *   - Negative value on error:
640  *        -ENODEV for invalid *dev_id*.
641  *        -EINVAL for invalid mode, model parameters.
642  *        -ENOTSUP if the device doesn't support this function.
643  */
644 __rte_experimental
645 int
646 rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
647 			    struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
648 
649 /**
650  * Retrieve the value of a single stat by requesting it by name.
651  *
652  * @param dev_id
653  *   The identifier of the device.
654  * @param name
655  *   Name of stat name to retrieve.
656  * @param[out] stat_id
657  *   If non-NULL, the numerical id of the stat will be returned, so that further requests for the
658  * stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a
659  * list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1.
660  * @param[out] value
661  *   Value of the stat to be returned.
662  * @return
663  *   - Zero: No error.
664  *   - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
665  */
666 __rte_experimental
667 int
668 rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
669 
670 /**
671  * Retrieve extended statistics of an ML device.
672  *
673  * @param dev_id
674  *   The identifier of the device.
675  * @param mode
676  *  Mode of statistics to retrieve. Choices include the device statistics and model statistics.
677  * @param model_id
678  *   Used to specify the model id in model mode, and is ignored in device mode.
679  * @param stat_ids
680  *   ID numbers of the stats to get. The ids can be got from the stat position in the stat list from
681  * rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
682  * @param[out] values
683  *   Values for each stats request by ID.
684  * @param nb_ids
685  *   Number of stats requested.
686  * @return
687  *   - Positive value: number of stat entries filled into the values array
688  *   - Negative value on error:
689  *        -ENODEV for invalid *dev_id*.
690  *        -EINVAL for invalid mode, model id or stat id parameters.
691  *        -ENOTSUP if the device doesn't support this function.
692  */
693 __rte_experimental
694 int
695 rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
696 		      const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids);
697 
698 /**
699  * Reset the values of the xstats of the selected component in the device.
700  *
701  * @param dev_id
702  *   The identifier of the device.
703  * @param mode
704  *   Mode of the statistics to reset. Choose from device or model.
705  * @param model_id
706  *   Model stats to reset. 0 and positive values select models, while -1 indicates all models.
707  * @param stat_ids
708  *   Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be
709  * reset. If non-NULL, must point to array of at least *nb_ids* size.
710  * @param nb_ids
711  *   The number of ids available from the *ids* array. Ignored when ids is NULL.
712  * @return
713  *   - Zero: successfully reset the statistics.
714  *   - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
715  */
716 __rte_experimental
717 int
718 rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
719 			const uint16_t stat_ids[], uint16_t nb_ids);
720 
721 /**
722  * Dump internal information about *dev_id* to the FILE* provided in *fd*.
723  *
724  * @param dev_id
725  *   The identifier of the device.
726  * @param fd
727  *   A pointer to a file for output.
728  * @return
729  *   - 0: on success.
730  *   - <0: on failure.
731  */
732 __rte_experimental
733 int
734 rte_ml_dev_dump(int16_t dev_id, FILE *fd);
735 
736 /**
737  * Trigger the ML device self test.
738  *
739  * @param dev_id
740  *   The identifier of the device.
741  * @return
742  *   - 0: Selftest successful.
743  *   - -ENOTSUP: if the device doesn't support selftest.
744  *   - other values < 0 on failure.
745  */
746 __rte_experimental
747 int
748 rte_ml_dev_selftest(int16_t dev_id);
749 
750 /* Model operations */
751 
752 /** ML model load parameters
753  *
754  * Parameters required to load an ML model.
755  */
756 struct rte_ml_model_params {
757 	void *addr;
758 	/**< Address of model buffer */
759 	size_t size;
760 	/**< Size of model buffer */
761 };
762 
763 /**
764  * Load an ML model to the device.
765  *
766  * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
767  *
768  * @param[in] dev_id
769  *   The identifier of the device.
770  * @param[in] params
771  *   Parameters for the model to be loaded.
772  * @param[out] model_id
773  *   Identifier of the model loaded.
774  *
775  * @return
776  *   - 0: Success, Model loaded.
777  *   - < 0: Failure, Error code of the model load driver function.
778  */
779 __rte_experimental
780 int
781 rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
782 
783 /**
784  * Unload an ML model from the device.
785  *
786  * @param[in] dev_id
787  *   The identifier of the device.
788  * @param[in] model_id
789  *   Identifier of the model to be unloaded.
790  *
791  * @return
792  *   - 0: Success, Model unloaded.
793  *   - < 0: Failure, Error code of the model unload driver function.
794  */
795 __rte_experimental
796 int
797 rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
798 
799 /**
800  * Start an ML model for the given device ID.
801  *
802  * Start an ML model to accept inference requests.
803  *
804  * @param[in] dev_id
805  *   The identifier of the device.
806  * @param[in] model_id
807  *   Identifier of the model to be started.
808  *
809  * @return
810  *   - 0: Success, Model loaded.
811  *   - < 0: Failure, Error code of the model start driver function.
812  */
813 __rte_experimental
814 int
815 rte_ml_model_start(int16_t dev_id, uint16_t model_id);
816 
817 /**
818  * Stop an ML model for the given device ID.
819  *
820  * Model stop would disable the ML model to be used for inference jobs.
821  * All inference jobs must have been completed before model stop is attempted.
822 
823  * @param[in] dev_id
824  *   The identifier of the device.
825  * @param[in] model_id
826  *   Identifier of the model to be stopped.
827  *
828  * @return
829  *   - 0: Success, Model unloaded.
830  *   - < 0: Failure, Error code of the model stop driver function.
831  */
832 __rte_experimental
833 int
834 rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
835 
836 /**
837  * Input and output data types. ML models can operate on reduced precision
838  * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
839  * This enum is used to represent the lower precision integer and floating point types used
840  * by ML models.
841  */
842 enum rte_ml_io_type {
843 	RTE_ML_IO_TYPE_UNKNOWN = 0,
844 	/**< Invalid or unknown type */
845 	RTE_ML_IO_TYPE_INT8,
846 	/**< 8-bit integer */
847 	RTE_ML_IO_TYPE_UINT8,
848 	/**< 8-bit unsigned integer */
849 	RTE_ML_IO_TYPE_INT16,
850 	/**< 16-bit integer */
851 	RTE_ML_IO_TYPE_UINT16,
852 	/**< 16-bit unsigned integer */
853 	RTE_ML_IO_TYPE_INT32,
854 	/**< 32-bit integer */
855 	RTE_ML_IO_TYPE_UINT32,
856 	/**< 32-bit unsigned integer */
857 	RTE_ML_IO_TYPE_FP8,
858 	/**< 8-bit floating point number */
859 	RTE_ML_IO_TYPE_FP16,
860 	/**< IEEE 754 16-bit floating point number */
861 	RTE_ML_IO_TYPE_FP32,
862 	/**< IEEE 754 32-bit floating point number */
863 	RTE_ML_IO_TYPE_BFLOAT16
864 	/**< 16-bit brain floating point number. */
865 };
866 
867 /**
868  * Input and output format. This is used to represent the encoding type of multi-dimensional
869  * used by ML models.
870  */
871 enum rte_ml_io_format {
872 	RTE_ML_IO_FORMAT_NCHW = 1,
873 	/**< Batch size (N) x channels (C) x height (H) x width (W) */
874 	RTE_ML_IO_FORMAT_NHWC,
875 	/**< Batch size (N) x height (H) x width (W) x channels (C) */
876 	RTE_ML_IO_FORMAT_CHWN,
877 	/**< Channels (C) x height (H) x width (W) x batch size (N) */
878 	RTE_ML_IO_FORMAT_3D,
879 	/**< Format to represent a 3 dimensional data */
880 	RTE_ML_IO_FORMAT_2D,
881 	/**< Format to represent matrix data */
882 	RTE_ML_IO_FORMAT_1D,
883 	/**< Format to represent vector data */
884 	RTE_ML_IO_FORMAT_SCALAR,
885 	/**< Format to represent scalar data */
886 };
887 
888 /**
889  * Input and output shape. This structure represents the encoding format and dimensions
890  * of the tensor or vector.
891  *
892  * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used
893  * for the data would depend on the format. Unused dimensions to be set to 1.
894  */
895 struct rte_ml_io_shape {
896 	enum rte_ml_io_format format;
897 	/**< Format of the data */
898 	uint32_t w;
899 	/**< First dimension */
900 	uint32_t x;
901 	/**< Second dimension */
902 	uint32_t y;
903 	/**< Third dimension */
904 	uint32_t z;
905 	/**< Fourth dimension */
906 };
907 
908 /** Input and output data information structure
909  *
910  * Specifies the type and shape of input and output data.
911  */
912 struct rte_ml_io_info {
913 	char name[RTE_ML_STR_MAX];
914 	/**< Name of data */
915 	struct rte_ml_io_shape shape;
916 	/**< Shape of data */
917 	enum rte_ml_io_type qtype;
918 	/**< Type of quantized data */
919 	enum rte_ml_io_type dtype;
920 	/**< Type of de-quantized data */
921 };
922 
923 /** Model information structure */
924 struct rte_ml_model_info {
925 	char name[RTE_ML_STR_MAX];
926 	/**< Model name. */
927 	char version[RTE_ML_STR_MAX];
928 	/**< Model version */
929 	uint16_t model_id;
930 	/**< Model ID */
931 	uint16_t device_id;
932 	/**< Device ID */
933 	uint16_t batch_size;
934 	/**< Maximum number of batches that the model can process simultaneously */
935 	uint32_t nb_inputs;
936 	/**< Number of inputs */
937 	const struct rte_ml_io_info *input_info;
938 	/**< Input info array. Array size is equal to nb_inputs */
939 	uint32_t nb_outputs;
940 	/**< Number of outputs */
941 	const struct rte_ml_io_info *output_info;
942 	/**< Output info array. Array size is equal to nb_output */
943 	uint64_t wb_size;
944 	/**< Size of model weights and bias */
945 };
946 
947 /**
948  * Get ML model information.
949  *
950  * @param[in] dev_id
951  *   The identifier of the device.
952  * @param[in] model_id
953  *   Identifier for the model created
954  * @param[out] model_info
955  *   Pointer to a model info structure
956  *
957  * @return
958  *   - Returns 0 on success
959  *   - Returns negative value on failure
960  */
961 __rte_experimental
962 int
963 rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
964 
965 /**
966  * Update the model parameters without unloading model.
967  *
968  * Update model parameters such as weights and bias without unloading the model.
969  * rte_ml_model_stop() must be called before invoking this API.
970  *
971  * @param[in] dev_id
972  *   The identifier of the device.
973  * @param[in] model_id
974  *   Identifier for the model created
975  * @param[in] buffer
976  *   Pointer to the model weights and bias buffer.
977  * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
978  *
979  * @return
980  *   - Returns 0 on success
981  *   - Returns negative value on failure
982  */
983 __rte_experimental
984 int
985 rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
986 
987 /* IO operations */
988 
989 /**
990  * Get size of quantized and dequantized input buffers.
991  *
992  * Calculate the size of buffers required for quantized and dequantized input data.
993  * This API would return the buffer sizes for the number of batches provided and would
994  * consider the alignment requirements as per the PMD. Input sizes computed by this API can
995  * be used by the application to allocate buffers.
996  *
997  * @param[in] dev_id
998  *   The identifier of the device.
999  * @param[in] model_id
1000  *   Identifier for the model created
1001  * @param[in] nb_batches
1002  *   Number of batches of input to be processed in a single inference job
1003  * @param[out] input_qsize
1004  *   Quantized input size pointer.
1005  * NULL value is allowed, in which case input_qsize is not calculated by the driver.
1006  * @param[out] input_dsize
1007  *   Dequantized input size pointer.
1008  * NULL value is allowed, in which case input_dsize is not calculated by the driver.
1009  *
1010  * @return
1011  *   - Returns 0 on success
1012  *   - Returns negative value on failure
1013  */
1014 __rte_experimental
1015 int
1016 rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
1017 			 uint64_t *input_qsize, uint64_t *input_dsize);
1018 
1019 /**
1020  * Get size of quantized and dequantized output buffers.
1021  *
1022  * Calculate the size of buffers required for quantized and dequantized output data.
1023  * This API would return the buffer sizes for the number of batches provided and would consider
1024  * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the
1025  * application to allocate buffers.
1026  *
1027  * @param[in] dev_id
1028  *   The identifier of the device.
1029  * @param[in] model_id
1030  *   Identifier for the model created
1031  * @param[in] nb_batches
1032  *   Number of batches of input to be processed in a single inference job
1033  * @param[out] output_qsize
1034  *   Quantized output size pointer.
1035  * NULL value is allowed, in which case output_qsize is not calculated by the driver.
1036  * @param[out] output_dsize
1037  *   Dequantized output size pointer.
1038  * NULL value is allowed, in which case output_dsize is not calculated by the driver.
1039  *
1040  * @return
1041  *   - Returns 0 on success
1042  *   - Returns negative value on failure
1043  */
1044 __rte_experimental
1045 int
1046 rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
1047 			  uint64_t *output_qsize, uint64_t *output_dsize);
1048 
1049 /**
1050  * Quantize input data.
1051  *
1052  * Quantization converts data from a higher precision types to a lower precision types to improve
1053  * the throughput and efficiency of the model execution with minimal loss of accuracy.
1054  * Types of dequantized data and quantized data are specified by the model.
1055  *
1056  * @param[in] dev_id
1057  *   The identifier of the device.
1058  * @param[in] model_id
1059  *   Identifier for the model
1060  * @param[in] nb_batches
1061  *   Number of batches in the dequantized input buffer
1062  * @param[in] dbuffer
1063  *   Address of dequantized input data
1064  * @param[in] qbuffer
1065  *   Address of quantized input data
1066  *
1067  * @return
1068  *   - Returns 0 on success
1069  *   - Returns negative value on failure
1070  */
1071 __rte_experimental
1072 int
1073 rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer,
1074 		   void *qbuffer);
1075 
1076 /**
1077  * Dequantize output data.
1078  *
1079  * Dequantization converts data from a lower precision type to a higher precision type.
1080  * Types of quantized data and dequantized are specified by the model.
1081  *
1082  * @param[in] dev_id
1083  *   The identifier of the device.
1084  * @param[in] model_id
1085  *   Identifier for the model
1086  * @param[in] nb_batches
1087  *   Number of batches in the dequantized output buffer
1088  * @param[in] qbuffer
1089  *   Address of quantized output data
1090  * @param[in] dbuffer
1091  *   Address of dequantized output data
1092  *
1093  * @return
1094  *   - Returns 0 on success
1095  *   - Returns negative value on failure
1096  */
1097 __rte_experimental
1098 int
1099 rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer,
1100 		     void *dbuffer);
1101 
1102 /* ML op pool operations */
1103 
1104 /**
1105  * Create an ML operation pool
1106  *
1107  * @param name
1108  *   ML operations pool name
1109  * @param nb_elts
1110  *   Number of elements in pool
1111  * @param cache_size
1112  *   Number of elements to cache on lcore, see
1113  *   *rte_mempool_create* for further details about cache size
1114  * @param user_size
1115  *   Size of private data to allocate for user with each operation
1116  * @param socket_id
1117  *   Socket to identifier allocate memory on
1118  * @return
1119  *  - On success pointer to mempool
1120  *  - On failure NULL
1121  */
1122 __rte_experimental
1123 struct rte_mempool *
1124 rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
1125 		      uint16_t user_size, int socket_id);
1126 
1127 /**
1128  * Free an ML operation pool
1129  *
1130  * @param mempool
1131  *   A pointer to the mempool structure.
1132  *   If NULL then, the function does nothing.
1133  */
1134 __rte_experimental
1135 void
1136 rte_ml_op_pool_free(struct rte_mempool *mempool);
1137 
1138 #ifdef __cplusplus
1139 }
1140 #endif
1141 
1142 #endif /* RTE_MLDEV_H */
1143