xref: /dpdk/lib/mldev/rte_mldev_core.h (revision 2bf48044dca1892e571fd4964eecaacf6cb0c1c2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_INTERNAL_H
6 #define RTE_MLDEV_INTERNAL_H
7 
8 /**
9  * @file
10  *
11  * MLDEV internal header
12  *
13  * This file contains MLDEV private data structures and macros.
14  *
15  * @note
16  * These APIs are for MLDEV PMDs and library only.
17  */
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #include <stdint.h>
24 
25 #include <dev_driver.h>
26 #include <rte_common.h>
27 #include <rte_log.h>
28 #include <rte_mldev.h>
29 
30 /* Device state */
31 #define ML_DEV_DETACHED (0)
32 #define ML_DEV_ATTACHED (1)
33 
34 struct rte_ml_dev;
35 
36 /**
37  * @internal
38  *
39  * Enqueue a burst of inference requests to a queue on ML device.
40  *
41  * @param dev
42  *	ML device pointer.
43  * @param qp_id
44  *	Queue-pair ID.
45  * @param ops
46  *	Array of ML ops to be enqueued.
47  * @param nb_ops
48  *	Number of ops to enqueue.
49  *
50  * @return
51  *	- Number of ops enqueued.
52  */
53 typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
54 				    uint16_t nb_ops);
55 
56 /**
57  * @internal
58  *
59  * Dequeue a burst of inference requests from a queue on ML device.
60  *
61  * @param dev
62  *	ML device pointer.
63  * @param qp_id
64  *	Queue-pair ID.
65  * @param ops
66  *	Array of ML ops to dequeued.
67  * @param nb_ops
68  *	Number of ops to dequeue.
69  *
70  * @return
71  *	- Number of ops dequeued.
72  */
73 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
74 				    uint16_t nb_ops);
75 
76 /**
77  * @internal
78  *
79  * Get error information for an Op.
80  *
81  * @param dev
82  *	ML device pointer.
83  * @param op
84  *	ML Op handle.
85  * @param error
86  *	Pointer to error structure.
87  *
88  * @return
89  *	- 0 on success.
90  *	- <0, error on failure.
91  */
92 typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op,
93 				    struct rte_ml_op_error *error);
94 
95 /**
96  * Definitions of all functions exported by a driver through the generic structure of type
97  * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device.
98  */
99 
100 /**
101  * @internal
102  *
103  * Function used to get device information.
104  *
105  * @param dev
106  *	ML device pointer.
107  * @param dev_info
108  *	Pointer to info structure.
109  *
110  * @return
111  *	- 0 on success.
112  *	- < 0, error code on failure.
113  */
114 typedef int (*mldev_info_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info);
115 
116 /**
117  * @internal
118  *
119  * Function used to configure device.
120  *
121  * @param dev
122  *	ML device pointer.
123  * @param config
124  *	ML device configurations.
125  *
126  * @return
127  *	- 0 on success
128  *	- < 0, error code on failure.
129  */
130 typedef int (*mldev_configure_t)(struct rte_ml_dev *dev, const struct rte_ml_dev_config *config);
131 
132 /**
133  * @internal
134  *
135  * Function used to close a configured device.
136  *
137  * @param dev
138  *	ML device pointer.
139  *
140  * @return
141  *	- 0 on success.
142  *	- -EAGAIN if can't close as device is busy.
143  *	- < 0, error code on failure, other than busy.
144  */
145 typedef int (*mldev_close_t)(struct rte_ml_dev *dev);
146 
147 /**
148  * @internal
149  *
150  * Function used to start a configured device.
151  *
152  * @param dev
153  *	ML device pointer.
154  *
155  * @return
156  *	- 0 on success.
157  *	- < 0, error code on failure.
158  */
159 typedef int (*mldev_start_t)(struct rte_ml_dev *dev);
160 
161 /**
162  * @internal
163  *
164  * Function used to stop a configured device.
165  *
166  * @param dev
167  *	ML device pointer.
168  *
169  * @return
170  *	- 0 on success.
171  *	- < 0, error code on failure.
172  */
173 typedef int (*mldev_stop_t)(struct rte_ml_dev *dev);
174 
175 /**
176  * @internal
177  *
178  * Setup a queue pair for a device.
179  *
180  * @param dev
181  *	ML device pointer.
182  * @param queue_pair_id
183  *	Queue pair index.
184  * @param queue_pair_conf
185  *	Queue pair configuration structure.
186  * @param socket_id
187  *	Socket index.
188  *
189  * @return
190  *	- 0 on success.
191  *	- < 0, error on failure.
192  */
193 typedef int (*mldev_queue_pair_setup_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id,
194 					const struct rte_ml_dev_qp_conf *queue_pair_conf,
195 					int socket_id);
196 
197 /**
198  * @internal
199  *
200  * Release memory resources allocated by given queue pair.
201  *
202  * @param dev
203  *	ML device pointer.
204  * @param queue_pair_id
205  *	Queue pair index.
206  *
207  * @return
208  *	- 0 on success.
209  *	- -EAGAIN, if can't close as device is busy.
210  */
211 typedef int (*mldev_queue_pair_release_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id);
212 
213 /**
214  * @internal
215  *
216  * Function used to get device statistics.
217  *
218  * @param dev
219  *	ML device pointer.
220  * @param stats
221  *	Pointer to ML device stats structure to update.
222  *
223  * @return
224  *	- 0 on success.
225  *	- < 0, error on failure.
226  */
227 typedef int (*mldev_stats_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats);
228 
229 /**
230  * @internal
231  *
232  * Function used to reset device statistics.
233  *
234  * @param dev
235  *	ML device pointer.
236  */
237 typedef void (*mldev_stats_reset_t)(struct rte_ml_dev *dev);
238 
239 /**
240  * @internal
241  *
242  * Function used to get names of extended stats.
243  *
244  * @param dev
245  *	ML device pointer.
246  * @param xstats_map
247  *	Array to insert id and names into.
248  * @param size
249  *	Size of xstats_map array.
250  *
251  * @return
252  *	- >= 0 and <= size on success.
253  *	- > size, error. Returns the size of xstats_map array required.
254  *	- < 0, error code on failure.
255  */
256 typedef int (*mldev_xstats_names_get_t)(struct rte_ml_dev *dev,
257 					struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
258 
259 /**
260  * @internal
261  *
262  * Function used to get a single extended stat by name.
263  *
264  * @param dev
265  *	ML device pointer.
266  * @param name
267  *	Name of the stat to retrieve.
268  * @param stat_id
269  *	ID of the stat to be returned.
270  * @param value
271  *	Value of the stat to be returned.
272  *
273  * @return
274  *	- >= 0 stat value.
275  *	- < 0, error code on failure.
276  */
277 typedef int (*mldev_xstats_by_name_get_t)(struct rte_ml_dev *dev, const char *name,
278 					  uint16_t *stat_id, uint64_t *value);
279 
280 /**
281  * @internal
282  *
283  * Function used to retrieve extended stats of a device.
284  *
285  * @param dev
286  *	ML device pointer.
287  * @param stat_ids
288  *	Array of ID numbers of the stats to be retrieved.
289  * @param values
290  *	Values of the stats requested by the ID.
291  * @param nb_ids
292  *	Number of stats requested.
293  *
294  * @return
295  *	- >= 0, number of entries filled into the values array.
296  *	- < 0, error code on failure.
297  */
298 typedef int (*mldev_xstats_get_t)(struct rte_ml_dev *dev, const uint16_t *stat_ids,
299 				  uint64_t *values, uint16_t nb_ids);
300 
301 /**
302  * @internal
303  *
304  * Function used to reset extended stats.
305  *
306  * @param dev
307  *	ML device pointer.
308  * @param stat_ids
309  *	Array of stats IDs to be reset.
310  * @param nb_ids
311  *	Number of IDs in the stat_ids array.
312  *
313  * @return
314  *	- 0 on success.
315  *	- < 0, error code on failure.
316  */
317 typedef int (*mldev_xstats_reset_t)(struct rte_ml_dev *dev, const uint16_t *stat_ids,
318 				    uint16_t nb_ids);
319 
320 /**
321  * @internal
322  *
323  * Function used to dump ML device debug info.
324  *
325  * @param dev
326  *	ML device pointer.
327  * @param fd
328  *	File descriptor to dump the debug info.
329  *
330  * @return
331  *	- 0 on success.
332  *	- < 0, error code on failure.
333  */
334 
335 typedef int (*mldev_dump_t)(struct rte_ml_dev *dev, FILE *fd);
336 
337 /**
338  * @internal
339  *
340  * Function used for selftest of ML device.
341  *
342  * @param dev
343  *	ML device pointer.
344  *
345  * @return
346  *	- 0 on success.
347  *	- < 0, error on failure.
348  */
349 typedef int (*mldev_selftest_t)(struct rte_ml_dev *dev);
350 
351 /**
352  * @internal
353  *
354  * Function used to load an ML model.
355  *
356  * @param dev
357  *	ML device pointer.
358  * @param params
359  *	Model load params.
360  * @param model_id
361  *	Model ID returned by the library.
362  *
363  * @return
364  *	- 0 on success.
365  *	- < 0, error on failure.
366  */
367 typedef int (*mldev_model_load_t)(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
368 				  uint16_t *model_id);
369 
370 /**
371  * @internal
372  *
373  * Function used to unload an ML model.
374  *
375  * @param dev
376  *	ML device pointer.
377  * @param model_id
378  *	Model ID to use.
379  *
380  * @return
381  *	- 0 on success.
382  *	- < 0, error on failure.
383  */
384 typedef int (*mldev_model_unload_t)(struct rte_ml_dev *dev, uint16_t model_id);
385 
386 /**
387  * @internal
388  *
389  * Function used to start an ML model.
390  *
391  * @param dev
392  *	ML device pointer.
393  * @param model_id
394  *	Model ID to use.
395  *
396  * @return
397  *	- 0 on success.
398  *	- <0, error on failure.
399  */
400 typedef int (*mldev_model_start_t)(struct rte_ml_dev *dev, uint16_t model_id);
401 
402 /**
403  * @internal
404  *
405  * Function used to stop an ML model.
406  *
407  * @param dev
408  *	ML device pointer.
409  * @param model_id
410  *	Model ID to use.
411  *
412  * @return
413  *	- 0 on success.
414  *	- <0, error on failure.
415  */
416 typedef int (*mldev_model_stop_t)(struct rte_ml_dev *dev, uint16_t model_id);
417 
418 /**
419  * @internal
420  *
421  * Get info about a model.
422  *
423  * @param dev
424  *	ML device pointer.
425  * @param model_id
426  *	Model ID to use.
427  * @param model_info
428  *	Pointer to model info structure.
429  *
430  * @return
431  *	- 0 on success.
432  *	- <0, error on failure.
433  */
434 typedef int (*mldev_model_info_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
435 				      struct rte_ml_model_info *model_info);
436 
437 /**
438  * @internal
439  *
440  * Update model params.
441  *
442  * @param dev
443  *	ML device pointer.
444  * @param model_id
445  *	Model ID to use.
446  * @param buffer
447  *	Pointer to model params.
448  *
449  * @return
450  *	- 0 on success.
451  *	- <0, error on failure.
452  */
453 typedef int (*mldev_model_params_update_t)(struct rte_ml_dev *dev, uint16_t model_id, void *buffer);
454 
455 /**
456  * @internal
457  *
458  * Get size of input buffers.
459  *
460  * @param dev
461  *	ML device pointer.
462  * @param model_id
463  *	Model ID to use.
464  * @param nb_batches
465  *	Number of batches.
466  * @param input_qsize
467  *	Size of quantized input.
468  * @param input_dsize
469  *	Size of dequantized input.
470  *
471  * @return
472  *	- 0 on success.
473  *	- <0, error on failure.
474  */
475 typedef int (*mldev_io_input_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
476 					 uint32_t nb_batches, uint64_t *input_qsize,
477 					 uint64_t *input_dsize);
478 
479 /**
480  * @internal
481  *
482  * Get size of output buffers.
483  *
484  * @param dev
485  *	ML device pointer.
486  * @param model_id
487  *	Model ID to use.
488  * @param nb_batches
489  *	Number of batches.
490  * @param output_qsize
491  *	Size of quantized output.
492  * @param output_dsize
493  *	Size of dequantized output.
494  *
495  * @return
496  *	- 0 on success.
497  *	- <0, error on failure.
498  */
499 typedef int (*mldev_io_output_size_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
500 					  uint32_t nb_batches, uint64_t *output_qsize,
501 					  uint64_t *output_dsize);
502 
503 /**
504  * @internal
505  *
506  * Quantize model data.
507  *
508  * @param dev
509  *	ML device pointer.
510  * @param model_id
511  *	Model ID to use.
512  * @param nb_batches
513  *	Number of batches.
514  * @param dbuffer
515  *	Pointer t de-quantized data buffer.
516  * @param qbuffer
517  *	Pointer t de-quantized data buffer.
518  *
519  * @return
520  *	- 0 on success.
521  *	- <0, error on failure.
522  */
523 typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,
524 				   void *dbuffer, void *qbuffer);
525 
526 /**
527  * @internal
528  *
529  * Quantize model data.
530  *
531  * @param dev
532  *	ML device pointer.
533  * @param model_id
534  *	Model ID to use.
535  * @param nb_batches
536  *	Number of batches.
537  * @param qbuffer
538  *	Pointer t de-quantized data buffer.
539  * @param dbuffer
540  *	Pointer t de-quantized data buffer.
541  *
542  * @return
543  *	- 0 on success.
544  *	- <0, error on failure.
545  */
546 typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,
547 				     void *qbuffer, void *dbuffer);
548 
549 /**
550  * @internal
551  *
552  * ML device operations function pointer table.
553  */
554 struct rte_ml_dev_ops {
555 	/** Get device information. */
556 	mldev_info_get_t dev_info_get;
557 
558 	/** Configure device. */
559 	mldev_configure_t dev_configure;
560 
561 	/** Close device. */
562 	mldev_close_t dev_close;
563 
564 	/** Start device. */
565 	mldev_start_t dev_start;
566 
567 	/** Stop device. */
568 	mldev_stop_t dev_stop;
569 
570 	/** Set up a device queue pair. */
571 	mldev_queue_pair_setup_t dev_queue_pair_setup;
572 
573 	/** Release a device queue pair. */
574 	mldev_queue_pair_release_t dev_queue_pair_release;
575 
576 	/** Get device statistics. */
577 	mldev_stats_get_t dev_stats_get;
578 
579 	/** Reset device statistics. */
580 	mldev_stats_reset_t dev_stats_reset;
581 
582 	/** Get names of extended stats. */
583 	mldev_xstats_names_get_t dev_xstats_names_get;
584 
585 	/** Get value of a single extended stat. */
586 	mldev_xstats_by_name_get_t dev_xstats_by_name_get;
587 
588 	/** Get extended stats of a device. */
589 	mldev_xstats_get_t dev_xstats_get;
590 
591 	/** Reset extended stats of the device. */
592 	mldev_xstats_reset_t dev_xstats_reset;
593 
594 	/** Dump ML device debug info. */
595 	mldev_dump_t dev_dump;
596 
597 	/** Dump ML device debug info. */
598 	mldev_selftest_t dev_selftest;
599 
600 	/** Load an ML model. */
601 	mldev_model_load_t model_load;
602 
603 	/** Unload an ML model. */
604 	mldev_model_unload_t model_unload;
605 
606 	/** Start an ML model. */
607 	mldev_model_start_t model_start;
608 
609 	/** Stop an ML model. */
610 	mldev_model_stop_t model_stop;
611 
612 	/** Get model information. */
613 	mldev_model_info_get_t model_info_get;
614 
615 	/** Update model params. */
616 	mldev_model_params_update_t model_params_update;
617 
618 	/** Get input buffer size. */
619 	mldev_io_input_size_get_t io_input_size_get;
620 
621 	/** Get output buffer size. */
622 	mldev_io_output_size_get_t io_output_size_get;
623 
624 	/** Quantize data */
625 	mldev_io_quantize_t io_quantize;
626 
627 	/** De-quantize data */
628 	mldev_io_dequantize_t io_dequantize;
629 };
630 
631 /**
632  * @internal
633  *
634  * The data part, with no function pointers, associated with each device. This structure is safe to
635  * place in shared memory to be common among different processes in a multi-process configuration.
636  */
637 struct rte_ml_dev_data {
638 	/** Device ID for this instance. */
639 	int16_t dev_id;
640 
641 	/** Socket ID where memory is allocated. */
642 	int16_t socket_id;
643 
644 	/** Device state: STOPPED(0) / STARTED(1) */
645 	__extension__ uint8_t dev_started : 1;
646 
647 	/** Number of device queue pairs. */
648 	uint16_t nb_queue_pairs;
649 
650 	/** Number of ML models. */
651 	uint16_t nb_models;
652 
653 	/** Array of pointers to queue pairs. */
654 	void **queue_pairs;
655 
656 	/** Array of pointers to ML models. */
657 	void **models;
658 
659 	/** PMD-specific private data. */
660 	void *dev_private;
661 
662 	/** Unique identifier name. */
663 	char name[RTE_ML_STR_MAX];
664 };
665 
666 /**
667  * @internal
668  *
669  * The data structure associated with each ML device.
670  */
671 struct rte_ml_dev {
672 	/** Pointer to PMD enqueue function. */
673 	mldev_enqueue_t enqueue_burst;
674 
675 	/** Pointer to PMD dequeue function. */
676 	mldev_dequeue_t dequeue_burst;
677 
678 	/** Pointer to PMD Op error get function. */
679 	mldev_op_error_get_t op_error_get;
680 
681 	/** Pointer to device data. */
682 	struct rte_ml_dev_data *data;
683 
684 	/** Functions exported by PMD. */
685 	struct rte_ml_dev_ops *dev_ops;
686 
687 	/** Backing RTE device. */
688 	struct rte_device *device;
689 
690 	/** Flag indicating the device is attached. */
691 	__extension__ uint8_t attached : 1;
692 } __rte_cache_aligned;
693 
694 /**
695  * @internal
696  *
697  * Global structure used for maintaining state of allocated ML devices.
698  */
699 struct rte_ml_dev_global {
700 	/** Device information array. */
701 	struct rte_ml_dev *devs;
702 
703 	/** Device private data array. */
704 	struct rte_ml_dev_data **data;
705 
706 	/** Number of devices found. */
707 	uint8_t nb_devs;
708 
709 	/** Maximum number of devices. */
710 	uint8_t max_devs;
711 };
712 
713 #ifdef __cplusplus
714 }
715 #endif
716 
717 #endif /* RTE_MLDEV_INTERNAL_H */
718