xref: /dpdk/lib/mldev/rte_mldev_core.h (revision 62774b78a84e9fa5df56d04cffed69bef8c901f1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_INTERNAL_H
6 #define RTE_MLDEV_INTERNAL_H
7 
8 /**
9  * @file
10  *
11  * MLDEV internal header
12  *
13  * This file contains MLDEV private data structures and macros.
14  *
15  * @note
16  * These APIs are for MLDEV PMDs and library only.
17  */
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #include <stdint.h>
24 
25 #include <dev_driver.h>
26 #include <rte_common.h>
27 #include <rte_log.h>
28 #include <rte_mldev.h>
29 
30 /* Device state */
31 #define ML_DEV_DETACHED (0)
32 #define ML_DEV_ATTACHED (1)
33 
34 struct rte_ml_dev;
35 
36 /**
37  * @internal
38  *
39  * Enqueue a burst of inference requests to a queue on ML device.
40  *
41  * @param dev
42  *	ML device pointer.
43  * @param qp_id
44  *	Queue-pair ID.
45  * @param ops
46  *	Array of ML ops to be enqueued.
47  * @param nb_ops
48  *	Number of ops to enqueue.
49  *
50  * @return
51  *	- Number of ops enqueued.
52  */
53 typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
54 				    uint16_t nb_ops);
55 
56 /**
57  * @internal
58  *
59  * Dequeue a burst of inference requests from a queue on ML device.
60  *
61  * @param dev
62  *	ML device pointer.
63  * @param qp_id
64  *	Queue-pair ID.
65  * @param ops
66  *	Array of ML ops to dequeued.
67  * @param nb_ops
68  *	Number of ops to dequeue.
69  *
70  * @return
71  *	- Number of ops dequeued.
72  */
73 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
74 				    uint16_t nb_ops);
75 
76 /**
77  * @internal
78  *
79  * Get error information for an Op.
80  *
81  * @param dev
82  *	ML device pointer.
83  * @param op
84  *	ML Op handle.
85  * @param error
86  *	Pointer to error structure.
87  *
88  * @return
89  *	- 0 on success.
90  *	- <0, error on failure.
91  */
92 typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op,
93 				    struct rte_ml_op_error *error);
94 
95 /**
96  * Definitions of all functions exported by a driver through the generic structure of type
97  * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device.
98  */
99 
100 /**
101  * @internal
102  *
103  * Function used to get device information.
104  *
105  * @param dev
106  *	ML device pointer.
107  * @param dev_info
108  *	Pointer to info structure.
109  *
110  * @return
111  *	- 0 on success.
112  *	- < 0, error code on failure.
113  */
114 typedef int (*mldev_info_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info);
115 
116 /**
117  * @internal
118  *
119  * Function used to configure device.
120  *
121  * @param dev
122  *	ML device pointer.
123  * @param config
124  *	ML device configurations.
125  *
126  * @return
127  *	- 0 on success
128  *	- < 0, error code on failure.
129  */
130 typedef int (*mldev_configure_t)(struct rte_ml_dev *dev, const struct rte_ml_dev_config *config);
131 
132 /**
133  * @internal
134  *
135  * Function used to close a configured device.
136  *
137  * @param dev
138  *	ML device pointer.
139  *
140  * @return
141  *	- 0 on success.
142  *	- -EAGAIN if can't close as device is busy.
143  *	- < 0, error code on failure, other than busy.
144  */
145 typedef int (*mldev_close_t)(struct rte_ml_dev *dev);
146 
147 /**
148  * @internal
149  *
150  * Function used to start a configured device.
151  *
152  * @param dev
153  *	ML device pointer.
154  *
155  * @return
156  *	- 0 on success.
157  *	- < 0, error code on failure.
158  */
159 typedef int (*mldev_start_t)(struct rte_ml_dev *dev);
160 
161 /**
162  * @internal
163  *
164  * Function used to stop a configured device.
165  *
166  * @param dev
167  *	ML device pointer.
168  *
169  * @return
170  *	- 0 on success.
171  *	- < 0, error code on failure.
172  */
173 typedef int (*mldev_stop_t)(struct rte_ml_dev *dev);
174 
175 /**
176  * @internal
177  *
178  * Setup a queue pair for a device.
179  *
180  * @param dev
181  *	ML device pointer.
182  * @param queue_pair_id
183  *	Queue pair index.
184  * @param queue_pair_conf
185  *	Queue pair configuration structure.
186  * @param socket_id
187  *	Socket index.
188  *
189  * @return
190  *	- 0 on success.
191  *	- < 0, error on failure.
192  */
193 typedef int (*mldev_queue_pair_setup_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id,
194 					const struct rte_ml_dev_qp_conf *queue_pair_conf,
195 					int socket_id);
196 
197 /**
198  * @internal
199  *
200  * Release memory resources allocated by given queue pair.
201  *
202  * @param dev
203  *	ML device pointer.
204  * @param queue_pair_id
205  *	Queue pair index.
206  *
207  * @return
208  *	- 0 on success.
209  *	- -EAGAIN, if can't close as device is busy.
210  */
211 typedef int (*mldev_queue_pair_release_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id);
212 
213 /**
214  * @internal
215  *
216  * Function used to get device statistics.
217  *
218  * @param dev
219  *	ML device pointer.
220  * @param stats
221  *	Pointer to ML device stats structure to update.
222  *
223  * @return
224  *	- 0 on success.
225  *	- < 0, error on failure.
226  */
227 typedef int (*mldev_stats_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats);
228 
229 /**
230  * @internal
231  *
232  * Function used to reset device statistics.
233  *
234  * @param dev
235  *	ML device pointer.
236  */
237 typedef void (*mldev_stats_reset_t)(struct rte_ml_dev *dev);
238 
239 /**
240  * @internal
241  *
242  * Function used to get names of extended stats.
243  *
244  * @param dev
245  *	ML device pointer.
246  * @param mode
247  *	Mode of stats to retrieve.
248  * @param model_id
249  *	Used to specify model id in model mode. Ignored in device mode.
250  * @param xstats_map
251  *	Array to insert id and names into.
252  * @param size
253  *	Size of xstats_map array.
254  *
255  * @return
256  *	- >= 0 and <= size on success.
257  *	- > size, error. Returns the size of xstats_map array required.
258  *	- < 0, error code on failure.
259  */
260 typedef int (*mldev_xstats_names_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
261 					int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
262 					uint32_t size);
263 
264 /**
265  * @internal
266  *
267  * Function used to get a single extended stat by name.
268  *
269  * @param dev
270  *	ML device pointer.
271  * @param name
272  *	Name of the stat to retrieve.
273  * @param stat_id
274  *	ID of the stat to be returned.
275  * @param value
276  *	Value of the stat to be returned.
277  *
278  * @return
279  *	- = 0 success.
280  *	- < 0, error code on failure.
281  */
282 typedef int (*mldev_xstats_by_name_get_t)(struct rte_ml_dev *dev, const char *name,
283 					  uint16_t *stat_id, uint64_t *value);
284 
285 /**
286  * @internal
287  *
288  * Function used to retrieve extended stats of a device.
289  *
290  * @param dev
291  *	ML device pointer.
292  * @param mode
293  *	Mode of stats to retrieve.
294  * @param model_id
295  *	Used to specify model id in model mode. Ignored in device mode.
296  * @param stat_ids
297  *	Array of ID numbers of the stats to be retrieved.
298  * @param values
299  *	Values of the stats requested by the ID.
300  * @param nb_ids
301  *	Number of stats requested.
302  *
303  * @return
304  *	- >= 0, number of entries filled into the values array.
305  *	- < 0, error code on failure.
306  */
307 typedef int (*mldev_xstats_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
308 				  int32_t model_id, const uint16_t stat_ids[], uint64_t values[],
309 				  uint16_t nb_ids);
310 
311 /**
312  * @internal
313  *
314  * Function used to reset extended stats.
315  *
316  * @param dev
317  *	ML device pointer.
318  * @param mode
319  *	Mode of stats to retrieve.
320  * @param model_id
321  *	Used to specify model id in model mode. Ignored in device mode.
322  * @param stat_ids
323  *	Array of stats IDs to be reset.
324  * @param nb_ids
325  *	Number of IDs in the stat_ids array.
326  *
327  * @return
328  *	- 0 on success.
329  *	- < 0, error code on failure.
330  */
331 typedef int (*mldev_xstats_reset_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
332 				    int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids);
333 
334 /**
335  * @internal
336  *
337  * Function used to dump ML device debug info.
338  *
339  * @param dev
340  *	ML device pointer.
341  * @param fd
342  *	File descriptor to dump the debug info.
343  *
344  * @return
345  *	- 0 on success.
346  *	- < 0, error code on failure.
347  */
348 
349 typedef int (*mldev_dump_t)(struct rte_ml_dev *dev, FILE *fd);
350 
351 /**
352  * @internal
353  *
354  * Function used for selftest of ML device.
355  *
356  * @param dev
357  *	ML device pointer.
358  *
359  * @return
360  *	- 0 on success.
361  *	- < 0, error on failure.
362  */
363 typedef int (*mldev_selftest_t)(struct rte_ml_dev *dev);
364 
365 /**
366  * @internal
367  *
368  * Function used to load an ML model.
369  *
370  * @param dev
371  *	ML device pointer.
372  * @param params
373  *	Model load params.
374  * @param model_id
375  *	Model ID returned by the library.
376  *
377  * @return
378  *	- 0 on success.
379  *	- < 0, error on failure.
380  */
381 typedef int (*mldev_model_load_t)(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
382 				  uint16_t *model_id);
383 
384 /**
385  * @internal
386  *
387  * Function used to unload an ML model.
388  *
389  * @param dev
390  *	ML device pointer.
391  * @param model_id
392  *	Model ID to use.
393  *
394  * @return
395  *	- 0 on success.
396  *	- < 0, error on failure.
397  */
398 typedef int (*mldev_model_unload_t)(struct rte_ml_dev *dev, uint16_t model_id);
399 
400 /**
401  * @internal
402  *
403  * Function used to start an ML model.
404  *
405  * @param dev
406  *	ML device pointer.
407  * @param model_id
408  *	Model ID to use.
409  *
410  * @return
411  *	- 0 on success.
412  *	- <0, error on failure.
413  */
414 typedef int (*mldev_model_start_t)(struct rte_ml_dev *dev, uint16_t model_id);
415 
416 /**
417  * @internal
418  *
419  * Function used to stop an ML model.
420  *
421  * @param dev
422  *	ML device pointer.
423  * @param model_id
424  *	Model ID to use.
425  *
426  * @return
427  *	- 0 on success.
428  *	- <0, error on failure.
429  */
430 typedef int (*mldev_model_stop_t)(struct rte_ml_dev *dev, uint16_t model_id);
431 
432 /**
433  * @internal
434  *
435  * Get info about a model.
436  *
437  * @param dev
438  *	ML device pointer.
439  * @param model_id
440  *	Model ID to use.
441  * @param model_info
442  *	Pointer to model info structure.
443  *
444  * @return
445  *	- 0 on success.
446  *	- <0, error on failure.
447  */
448 typedef int (*mldev_model_info_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
449 				      struct rte_ml_model_info *model_info);
450 
451 /**
452  * @internal
453  *
454  * Update model params.
455  *
456  * @param dev
457  *	ML device pointer.
458  * @param model_id
459  *	Model ID to use.
460  * @param buffer
461  *	Pointer to model params.
462  *
463  * @return
464  *	- 0 on success.
465  *	- <0, error on failure.
466  */
467 typedef int (*mldev_model_params_update_t)(struct rte_ml_dev *dev, uint16_t model_id, void *buffer);
468 
469 /**
470  * @internal
471  *
472  * Get size of input buffers.
473  *
474  * @param dev
475  *	ML device pointer.
476  * @param model_id
477  *	Model ID to use.
478  * @param nb_batches
479  *	Number of batches.
480  * @param input_qsize
481  *	Size of quantized input.
482  * @param input_dsize
483  *	Size of dequantized input.
484  *
485  * @return
486  *	- 0 on success.
487  *	- <0, error on failure.
488  */
489 typedef int (*mldev_io_input_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
490 					 uint32_t nb_batches, uint64_t *input_qsize,
491 					 uint64_t *input_dsize);
492 
493 /**
494  * @internal
495  *
496  * Get size of output buffers.
497  *
498  * @param dev
499  *	ML device pointer.
500  * @param model_id
501  *	Model ID to use.
502  * @param nb_batches
503  *	Number of batches.
504  * @param output_qsize
505  *	Size of quantized output.
506  * @param output_dsize
507  *	Size of dequantized output.
508  *
509  * @return
510  *	- 0 on success.
511  *	- <0, error on failure.
512  */
513 typedef int (*mldev_io_output_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
514 					  uint32_t nb_batches, uint64_t *output_qsize,
515 					  uint64_t *output_dsize);
516 
517 /**
518  * @internal
519  *
520  * Quantize model data.
521  *
522  * @param dev
523  *	ML device pointer.
524  * @param model_id
525  *	Model ID to use.
526  * @param nb_batches
527  *	Number of batches.
528  * @param dbuffer
529  *	Pointer t de-quantized data buffer.
530  * @param qbuffer
531  *	Pointer t de-quantized data buffer.
532  *
533  * @return
534  *	- 0 on success.
535  *	- <0, error on failure.
536  */
537 typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,
538 				   void *dbuffer, void *qbuffer);
539 
540 /**
541  * @internal
542  *
543  * Quantize model data.
544  *
545  * @param dev
546  *	ML device pointer.
547  * @param model_id
548  *	Model ID to use.
549  * @param nb_batches
550  *	Number of batches.
551  * @param qbuffer
552  *	Pointer t de-quantized data buffer.
553  * @param dbuffer
554  *	Pointer t de-quantized data buffer.
555  *
556  * @return
557  *	- 0 on success.
558  *	- <0, error on failure.
559  */
560 typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,
561 				     void *qbuffer, void *dbuffer);
562 
563 /**
564  * @internal
565  *
566  * ML device operations function pointer table.
567  */
568 struct rte_ml_dev_ops {
569 	/** Get device information. */
570 	mldev_info_get_t dev_info_get;
571 
572 	/** Configure device. */
573 	mldev_configure_t dev_configure;
574 
575 	/** Close device. */
576 	mldev_close_t dev_close;
577 
578 	/** Start device. */
579 	mldev_start_t dev_start;
580 
581 	/** Stop device. */
582 	mldev_stop_t dev_stop;
583 
584 	/** Set up a device queue pair. */
585 	mldev_queue_pair_setup_t dev_queue_pair_setup;
586 
587 	/** Release a device queue pair. */
588 	mldev_queue_pair_release_t dev_queue_pair_release;
589 
590 	/** Get device statistics. */
591 	mldev_stats_get_t dev_stats_get;
592 
593 	/** Reset device statistics. */
594 	mldev_stats_reset_t dev_stats_reset;
595 
596 	/** Get names of extended stats. */
597 	mldev_xstats_names_get_t dev_xstats_names_get;
598 
599 	/** Get value of a single extended stat. */
600 	mldev_xstats_by_name_get_t dev_xstats_by_name_get;
601 
602 	/** Get extended stats of a device. */
603 	mldev_xstats_get_t dev_xstats_get;
604 
605 	/** Reset extended stats of the device. */
606 	mldev_xstats_reset_t dev_xstats_reset;
607 
608 	/** Dump ML device debug info. */
609 	mldev_dump_t dev_dump;
610 
611 	/** Dump ML device debug info. */
612 	mldev_selftest_t dev_selftest;
613 
614 	/** Load an ML model. */
615 	mldev_model_load_t model_load;
616 
617 	/** Unload an ML model. */
618 	mldev_model_unload_t model_unload;
619 
620 	/** Start an ML model. */
621 	mldev_model_start_t model_start;
622 
623 	/** Stop an ML model. */
624 	mldev_model_stop_t model_stop;
625 
626 	/** Get model information. */
627 	mldev_model_info_get_t model_info_get;
628 
629 	/** Update model params. */
630 	mldev_model_params_update_t model_params_update;
631 
632 	/** Get input buffer size. */
633 	mldev_io_input_size_get_t io_input_size_get;
634 
635 	/** Get output buffer size. */
636 	mldev_io_output_size_get_t io_output_size_get;
637 
638 	/** Quantize data */
639 	mldev_io_quantize_t io_quantize;
640 
641 	/** De-quantize data */
642 	mldev_io_dequantize_t io_dequantize;
643 };
644 
645 /**
646  * @internal
647  *
648  * The data part, with no function pointers, associated with each device. This structure is safe to
649  * place in shared memory to be common among different processes in a multi-process configuration.
650  */
651 struct rte_ml_dev_data {
652 	/** Device ID for this instance. */
653 	int16_t dev_id;
654 
655 	/** Socket ID where memory is allocated. */
656 	int16_t socket_id;
657 
658 	/** Device state: STOPPED(0) / STARTED(1) */
659 	__extension__ uint8_t dev_started : 1;
660 
661 	/** Number of device queue pairs. */
662 	uint16_t nb_queue_pairs;
663 
664 	/** Number of ML models. */
665 	uint16_t nb_models;
666 
667 	/** Array of pointers to queue pairs. */
668 	void **queue_pairs;
669 
670 	/** Array of pointers to ML models. */
671 	void **models;
672 
673 	/** PMD-specific private data. */
674 	void *dev_private;
675 
676 	/** Unique identifier name. */
677 	char name[RTE_ML_STR_MAX];
678 };
679 
680 /**
681  * @internal
682  *
683  * The data structure associated with each ML device.
684  */
685 struct rte_ml_dev {
686 	/** Pointer to PMD enqueue function. */
687 	mldev_enqueue_t enqueue_burst;
688 
689 	/** Pointer to PMD dequeue function. */
690 	mldev_dequeue_t dequeue_burst;
691 
692 	/** Pointer to PMD Op error get function. */
693 	mldev_op_error_get_t op_error_get;
694 
695 	/** Pointer to device data. */
696 	struct rte_ml_dev_data *data;
697 
698 	/** Functions exported by PMD. */
699 	struct rte_ml_dev_ops *dev_ops;
700 
701 	/** Backing RTE device. */
702 	struct rte_device *device;
703 
704 	/** Flag indicating the device is attached. */
705 	__extension__ uint8_t attached : 1;
706 } __rte_cache_aligned;
707 
708 /**
709  * @internal
710  *
711  * Global structure used for maintaining state of allocated ML devices.
712  */
713 struct rte_ml_dev_global {
714 	/** Device information array. */
715 	struct rte_ml_dev *devs;
716 
717 	/** Device private data array. */
718 	struct rte_ml_dev_data **data;
719 
720 	/** Number of devices found. */
721 	uint8_t nb_devs;
722 
723 	/** Maximum number of devices. */
724 	uint8_t max_devs;
725 };
726 
727 #ifdef __cplusplus
728 }
729 #endif
730 
731 #endif /* RTE_MLDEV_INTERNAL_H */
732