xref: /dpdk/lib/mldev/rte_mldev_core.h (revision da7e701151ea8b742d4c38ace3e4fefd1b4507fc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_INTERNAL_H
6 #define RTE_MLDEV_INTERNAL_H
7 
8 /**
9  * @file
10  *
11  * MLDEV internal header
12  *
13  * This file contains MLDEV private data structures and macros.
14  *
15  * @note
16  * These APIs are for MLDEV PMDs and library only.
17  */
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #include <stdint.h>
24 
25 #include <dev_driver.h>
26 #include <rte_common.h>
27 #include <rte_log.h>
28 #include <rte_mldev.h>
29 
30 /* Device state */
31 #define ML_DEV_DETACHED (0)
32 #define ML_DEV_ATTACHED (1)
33 
34 struct rte_ml_dev;
35 
36 /**
37  * @internal
38  *
39  * Enqueue a burst of inference requests to a queue on ML device.
40  *
41  * @param dev
42  *	ML device pointer.
43  * @param qp_id
44  *	Queue-pair ID.
45  * @param ops
46  *	Array of ML ops to be enqueued.
47  * @param nb_ops
48  *	Number of ops to enqueue.
49  *
50  * @return
51  *	- Number of ops enqueued.
52  */
53 typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
54 				    uint16_t nb_ops);
55 
56 /**
57  * @internal
58  *
59  * Dequeue a burst of inference requests from a queue on ML device.
60  *
61  * @param dev
62  *	ML device pointer.
63  * @param qp_id
64  *	Queue-pair ID.
65  * @param ops
66  *	Array of ML ops to dequeued.
67  * @param nb_ops
68  *	Number of ops to dequeue.
69  *
70  * @return
71  *	- Number of ops dequeued.
72  */
73 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
74 				    uint16_t nb_ops);
75 
76 /**
77  * @internal
78  *
79  * Get error information for an Op.
80  *
81  * @param dev
82  *	ML device pointer.
83  * @param op
84  *	ML Op handle.
85  * @param error
86  *	Pointer to error structure.
87  *
88  * @return
89  *	- 0 on success.
90  *	- <0, error on failure.
91  */
92 typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op,
93 				    struct rte_ml_op_error *error);
94 
95 /**
96  * Definitions of all functions exported by a driver through the generic structure of type
97  * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device.
98  */
99 
100 /**
101  * @internal
102  *
103  * Function used to get device information.
104  *
105  * @param dev
106  *	ML device pointer.
107  * @param dev_info
108  *	Pointer to info structure.
109  *
110  * @return
111  *	- 0 on success.
112  *	- < 0, error code on failure.
113  */
114 typedef int (*mldev_info_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info);
115 
116 /**
117  * @internal
118  *
119  * Function used to configure device.
120  *
121  * @param dev
122  *	ML device pointer.
123  * @param config
124  *	ML device configurations.
125  *
126  * @return
127  *	- 0 on success
128  *	- < 0, error code on failure.
129  */
130 typedef int (*mldev_configure_t)(struct rte_ml_dev *dev, const struct rte_ml_dev_config *config);
131 
132 /**
133  * @internal
134  *
135  * Function used to close a configured device.
136  *
137  * @param dev
138  *	ML device pointer.
139  *
140  * @return
141  *	- 0 on success.
142  *	- -EAGAIN if can't close as device is busy.
143  *	- < 0, error code on failure, other than busy.
144  */
145 typedef int (*mldev_close_t)(struct rte_ml_dev *dev);
146 
147 /**
148  * @internal
149  *
150  * Function used to start a configured device.
151  *
152  * @param dev
153  *	ML device pointer.
154  *
155  * @return
156  *	- 0 on success.
157  *	- < 0, error code on failure.
158  */
159 typedef int (*mldev_start_t)(struct rte_ml_dev *dev);
160 
161 /**
162  * @internal
163  *
164  * Function used to stop a configured device.
165  *
166  * @param dev
167  *	ML device pointer.
168  *
169  * @return
170  *	- 0 on success.
171  *	- < 0, error code on failure.
172  */
173 typedef int (*mldev_stop_t)(struct rte_ml_dev *dev);
174 
175 /**
176  * @internal
177  *
178  * Setup a queue pair for a device.
179  *
180  * @param dev
181  *	ML device pointer.
182  * @param queue_pair_id
183  *	Queue pair index.
184  * @param queue_pair_conf
185  *	Queue pair configuration structure.
186  * @param socket_id
187  *	Socket index.
188  *
189  * @return
190  *	- 0 on success.
191  *	- < 0, error on failure.
192  */
193 typedef int (*mldev_queue_pair_setup_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id,
194 					const struct rte_ml_dev_qp_conf *queue_pair_conf,
195 					int socket_id);
196 
197 /**
198  * @internal
199  *
200  * Release memory resources allocated by given queue pair.
201  *
202  * @param dev
203  *	ML device pointer.
204  * @param queue_pair_id
205  *	Queue pair index.
206  *
207  * @return
208  *	- 0 on success.
209  *	- -EAGAIN, if can't close as device is busy.
210  */
211 typedef int (*mldev_queue_pair_release_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id);
212 
213 /**
214  * @internal
215  *
216  * Function used to get device statistics.
217  *
218  * @param dev
219  *	ML device pointer.
220  * @param stats
221  *	Pointer to ML device stats structure to update.
222  *
223  * @return
224  *	- 0 on success.
225  *	- < 0, error on failure.
226  */
227 typedef int (*mldev_stats_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats);
228 
229 /**
230  * @internal
231  *
232  * Function used to reset device statistics.
233  *
234  * @param dev
235  *	ML device pointer.
236  */
237 typedef void (*mldev_stats_reset_t)(struct rte_ml_dev *dev);
238 
239 /**
240  * @internal
241  *
242  * Function used to get names of extended stats.
243  *
244  * @param dev
245  *	ML device pointer.
246  * @param mode
247  *	Mode of stats to retrieve.
248  * @param model_id
249  *	Used to specify model id in model mode. Ignored in device mode.
250  * @param xstats_map
251  *	Array to insert id and names into.
252  * @param size
253  *	Size of xstats_map array.
254  *
255  * @return
256  *	- >= 0 and <= size on success.
257  *	- > size, error. Returns the size of xstats_map array required.
258  *	- < 0, error code on failure.
259  */
260 typedef int (*mldev_xstats_names_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
261 					int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
262 					uint32_t size);
263 
264 /**
265  * @internal
266  *
267  * Function used to get a single extended stat by name.
268  *
269  * @param dev
270  *	ML device pointer.
271  * @param name
272  *	Name of the stat to retrieve.
273  * @param stat_id
274  *	ID of the stat to be returned.
275  * @param value
276  *	Value of the stat to be returned.
277  *
278  * @return
279  *	- = 0 success.
280  *	- < 0, error code on failure.
281  */
282 typedef int (*mldev_xstats_by_name_get_t)(struct rte_ml_dev *dev, const char *name,
283 					  uint16_t *stat_id, uint64_t *value);
284 
285 /**
286  * @internal
287  *
288  * Function used to retrieve extended stats of a device.
289  *
290  * @param dev
291  *	ML device pointer.
292  * @param mode
293  *	Mode of stats to retrieve.
294  * @param model_id
295  *	Used to specify model id in model mode. Ignored in device mode.
296  * @param stat_ids
297  *	Array of ID numbers of the stats to be retrieved.
298  * @param values
299  *	Values of the stats requested by the ID.
300  * @param nb_ids
301  *	Number of stats requested.
302  *
303  * @return
304  *	- >= 0, number of entries filled into the values array.
305  *	- < 0, error code on failure.
306  */
307 typedef int (*mldev_xstats_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
308 				  int32_t model_id, const uint16_t stat_ids[], uint64_t values[],
309 				  uint16_t nb_ids);
310 
311 /**
312  * @internal
313  *
314  * Function used to reset extended stats.
315  *
316  * @param dev
317  *	ML device pointer.
318  * @param mode
319  *	Mode of stats to retrieve.
320  * @param model_id
321  *	Used to specify model id in model mode. Ignored in device mode.
322  * @param stat_ids
323  *	Array of stats IDs to be reset.
324  * @param nb_ids
325  *	Number of IDs in the stat_ids array.
326  *
327  * @return
328  *	- 0 on success.
329  *	- < 0, error code on failure.
330  */
331 typedef int (*mldev_xstats_reset_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
332 				    int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids);
333 
334 /**
335  * @internal
336  *
337  * Function used to dump ML device debug info.
338  *
339  * @param dev
340  *	ML device pointer.
341  * @param fd
342  *	File descriptor to dump the debug info.
343  *
344  * @return
345  *	- 0 on success.
346  *	- < 0, error code on failure.
347  */
348 
349 typedef int (*mldev_dump_t)(struct rte_ml_dev *dev, FILE *fd);
350 
351 /**
352  * @internal
353  *
354  * Function used for selftest of ML device.
355  *
356  * @param dev
357  *	ML device pointer.
358  *
359  * @return
360  *	- 0 on success.
361  *	- < 0, error on failure.
362  */
363 typedef int (*mldev_selftest_t)(struct rte_ml_dev *dev);
364 
365 /**
366  * @internal
367  *
368  * Function used to load an ML model.
369  *
370  * @param dev
371  *	ML device pointer.
372  * @param params
373  *	Model load params.
374  * @param model_id
375  *	Model ID returned by the library.
376  *
377  * @return
378  *	- 0 on success.
379  *	- < 0, error on failure.
380  */
381 typedef int (*mldev_model_load_t)(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
382 				  uint16_t *model_id);
383 
384 /**
385  * @internal
386  *
387  * Function used to unload an ML model.
388  *
389  * @param dev
390  *	ML device pointer.
391  * @param model_id
392  *	Model ID to use.
393  *
394  * @return
395  *	- 0 on success.
396  *	- < 0, error on failure.
397  */
398 typedef int (*mldev_model_unload_t)(struct rte_ml_dev *dev, uint16_t model_id);
399 
400 /**
401  * @internal
402  *
403  * Function used to start an ML model.
404  *
405  * @param dev
406  *	ML device pointer.
407  * @param model_id
408  *	Model ID to use.
409  *
410  * @return
411  *	- 0 on success.
412  *	- <0, error on failure.
413  */
414 typedef int (*mldev_model_start_t)(struct rte_ml_dev *dev, uint16_t model_id);
415 
416 /**
417  * @internal
418  *
419  * Function used to stop an ML model.
420  *
421  * @param dev
422  *	ML device pointer.
423  * @param model_id
424  *	Model ID to use.
425  *
426  * @return
427  *	- 0 on success.
428  *	- <0, error on failure.
429  */
430 typedef int (*mldev_model_stop_t)(struct rte_ml_dev *dev, uint16_t model_id);
431 
432 /**
433  * @internal
434  *
435  * Get info about a model.
436  *
437  * @param dev
438  *	ML device pointer.
439  * @param model_id
440  *	Model ID to use.
441  * @param model_info
442  *	Pointer to model info structure.
443  *
444  * @return
445  *	- 0 on success.
446  *	- <0, error on failure.
447  */
448 typedef int (*mldev_model_info_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
449 				      struct rte_ml_model_info *model_info);
450 
451 /**
452  * @internal
453  *
454  * Update model params.
455  *
456  * @param dev
457  *	ML device pointer.
458  * @param model_id
459  *	Model ID to use.
460  * @param buffer
461  *	Pointer to model params.
462  *
463  * @return
464  *	- 0 on success.
465  *	- <0, error on failure.
466  */
467 typedef int (*mldev_model_params_update_t)(struct rte_ml_dev *dev, uint16_t model_id, void *buffer);
468 
469 /**
470  * @internal
471  *
472  * Quantize model data.
473  *
474  * @param dev
475  *	ML device pointer.
476  * @param model_id
477  *	Model ID to use.
478  * @param dbuffer
479  *	Pointer t de-quantized data buffer.
480  * @param qbuffer
481  *	Pointer t de-quantized data buffer.
482  *
483  * @return
484  *	- 0 on success.
485  *	- <0, error on failure.
486  */
487 typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id,
488 				   struct rte_ml_buff_seg **dbuffer,
489 				   struct rte_ml_buff_seg **qbuffer);
490 
491 /**
492  * @internal
493  *
494  * Quantize model data.
495  *
496  * @param dev
497  *	ML device pointer.
498  * @param model_id
499  *	Model ID to use.
500  * @param qbuffer
501  *	Pointer t de-quantized data buffer.
502  * @param dbuffer
503  *	Pointer t de-quantized data buffer.
504  *
505  * @return
506  *	- 0 on success.
507  *	- <0, error on failure.
508  */
509 typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id,
510 				     struct rte_ml_buff_seg **qbuffer,
511 				     struct rte_ml_buff_seg **dbuffer);
512 
513 /**
514  * @internal
515  *
516  * ML device operations function pointer table.
517  */
518 struct rte_ml_dev_ops {
519 	/** Get device information. */
520 	mldev_info_get_t dev_info_get;
521 
522 	/** Configure device. */
523 	mldev_configure_t dev_configure;
524 
525 	/** Close device. */
526 	mldev_close_t dev_close;
527 
528 	/** Start device. */
529 	mldev_start_t dev_start;
530 
531 	/** Stop device. */
532 	mldev_stop_t dev_stop;
533 
534 	/** Set up a device queue pair. */
535 	mldev_queue_pair_setup_t dev_queue_pair_setup;
536 
537 	/** Release a device queue pair. */
538 	mldev_queue_pair_release_t dev_queue_pair_release;
539 
540 	/** Get device statistics. */
541 	mldev_stats_get_t dev_stats_get;
542 
543 	/** Reset device statistics. */
544 	mldev_stats_reset_t dev_stats_reset;
545 
546 	/** Get names of extended stats. */
547 	mldev_xstats_names_get_t dev_xstats_names_get;
548 
549 	/** Get value of a single extended stat. */
550 	mldev_xstats_by_name_get_t dev_xstats_by_name_get;
551 
552 	/** Get extended stats of a device. */
553 	mldev_xstats_get_t dev_xstats_get;
554 
555 	/** Reset extended stats of the device. */
556 	mldev_xstats_reset_t dev_xstats_reset;
557 
558 	/** Dump ML device debug info. */
559 	mldev_dump_t dev_dump;
560 
561 	/** Dump ML device debug info. */
562 	mldev_selftest_t dev_selftest;
563 
564 	/** Load an ML model. */
565 	mldev_model_load_t model_load;
566 
567 	/** Unload an ML model. */
568 	mldev_model_unload_t model_unload;
569 
570 	/** Start an ML model. */
571 	mldev_model_start_t model_start;
572 
573 	/** Stop an ML model. */
574 	mldev_model_stop_t model_stop;
575 
576 	/** Get model information. */
577 	mldev_model_info_get_t model_info_get;
578 
579 	/** Update model params. */
580 	mldev_model_params_update_t model_params_update;
581 
582 	/** Quantize data */
583 	mldev_io_quantize_t io_quantize;
584 
585 	/** De-quantize data */
586 	mldev_io_dequantize_t io_dequantize;
587 };
588 
589 /**
590  * @internal
591  *
592  * The data part, with no function pointers, associated with each device. This structure is safe to
593  * place in shared memory to be common among different processes in a multi-process configuration.
594  */
595 struct rte_ml_dev_data {
596 	/** Device ID for this instance. */
597 	int16_t dev_id;
598 
599 	/** Socket ID where memory is allocated. */
600 	int16_t socket_id;
601 
602 	/** Device state: STOPPED(0) / STARTED(1) */
603 	__extension__ uint8_t dev_started : 1;
604 
605 	/** Number of device queue pairs. */
606 	uint16_t nb_queue_pairs;
607 
608 	/** Number of ML models. */
609 	uint16_t nb_models;
610 
611 	/** Array of pointers to queue pairs. */
612 	void **queue_pairs;
613 
614 	/** Array of pointers to ML models. */
615 	void **models;
616 
617 	/** PMD-specific private data. */
618 	void *dev_private;
619 
620 	/** Unique identifier name. */
621 	char name[RTE_ML_STR_MAX];
622 };
623 
624 /**
625  * @internal
626  *
627  * The data structure associated with each ML device.
628  */
629 struct rte_ml_dev {
630 	/** Pointer to PMD enqueue function. */
631 	mldev_enqueue_t enqueue_burst;
632 
633 	/** Pointer to PMD dequeue function. */
634 	mldev_dequeue_t dequeue_burst;
635 
636 	/** Pointer to PMD Op error get function. */
637 	mldev_op_error_get_t op_error_get;
638 
639 	/** Pointer to device data. */
640 	struct rte_ml_dev_data *data;
641 
642 	/** Functions exported by PMD. */
643 	struct rte_ml_dev_ops *dev_ops;
644 
645 	/** Backing RTE device. */
646 	struct rte_device *device;
647 
648 	/** Flag indicating the device is attached. */
649 	__extension__ uint8_t attached : 1;
650 } __rte_cache_aligned;
651 
652 /**
653  * @internal
654  *
655  * Global structure used for maintaining state of allocated ML devices.
656  */
657 struct rte_ml_dev_global {
658 	/** Device information array. */
659 	struct rte_ml_dev *devs;
660 
661 	/** Device private data array. */
662 	struct rte_ml_dev_data **data;
663 
664 	/** Number of devices found. */
665 	uint8_t nb_devs;
666 
667 	/** Maximum number of devices. */
668 	uint8_t max_devs;
669 };
670 
671 #ifdef __cplusplus
672 }
673 #endif
674 
675 #endif /* RTE_MLDEV_INTERNAL_H */
676