xref: /dpdk/lib/mldev/rte_mldev_core.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #ifndef RTE_MLDEV_INTERNAL_H
6 #define RTE_MLDEV_INTERNAL_H
7 
8 /**
9  * @file
10  *
11  * MLDEV internal header
12  *
13  * This file contains MLDEV private data structures and macros.
14  *
15  * @note
16  * These APIs are for MLDEV PMDs and library only.
17  */
18 
19 #include <stdint.h>
20 
21 #include <dev_driver.h>
22 #include <rte_common.h>
23 #include <rte_log.h>
24 #include <rte_mldev.h>
25 
26 /* Device state */
27 #define ML_DEV_DETACHED (0)
28 #define ML_DEV_ATTACHED (1)
29 
30 struct rte_ml_dev;
31 
32 /**
33  * @internal
34  *
35  * Enqueue a burst of inference requests to a queue on ML device.
36  *
37  * @param dev
38  *	ML device pointer.
39  * @param qp_id
40  *	Queue-pair ID.
41  * @param ops
42  *	Array of ML ops to be enqueued.
43  * @param nb_ops
44  *	Number of ops to enqueue.
45  *
46  * @return
47  *	- Number of ops enqueued.
48  */
49 typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
50 				    uint16_t nb_ops);
51 
52 /**
53  * @internal
54  *
55  * Dequeue a burst of inference requests from a queue on ML device.
56  *
57  * @param dev
58  *	ML device pointer.
59  * @param qp_id
60  *	Queue-pair ID.
61  * @param ops
62  *	Array of ML ops to dequeued.
63  * @param nb_ops
64  *	Number of ops to dequeue.
65  *
66  * @return
67  *	- Number of ops dequeued.
68  */
69 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
70 				    uint16_t nb_ops);
71 
72 /**
73  * @internal
74  *
75  * Get error information for an Op.
76  *
77  * @param dev
78  *	ML device pointer.
79  * @param op
80  *	ML Op handle.
81  * @param error
82  *	Pointer to error structure.
83  *
84  * @return
85  *	- 0 on success.
86  *	- <0, error on failure.
87  */
88 typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op,
89 				    struct rte_ml_op_error *error);
90 
91 /**
92  * Definitions of all functions exported by a driver through the generic structure of type
93  * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device.
94  */
95 
96 /**
97  * @internal
98  *
99  * Function used to get device information.
100  *
101  * @param dev
102  *	ML device pointer.
103  * @param dev_info
104  *	Pointer to info structure.
105  *
106  * @return
107  *	- 0 on success.
108  *	- < 0, error code on failure.
109  */
110 typedef int (*mldev_info_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info);
111 
112 /**
113  * @internal
114  *
115  * Function used to configure device.
116  *
117  * @param dev
118  *	ML device pointer.
119  * @param config
120  *	ML device configurations.
121  *
122  * @return
123  *	- 0 on success
124  *	- < 0, error code on failure.
125  */
126 typedef int (*mldev_configure_t)(struct rte_ml_dev *dev, const struct rte_ml_dev_config *config);
127 
128 /**
129  * @internal
130  *
131  * Function used to close a configured device.
132  *
133  * @param dev
134  *	ML device pointer.
135  *
136  * @return
137  *	- 0 on success.
138  *	- -EAGAIN if can't close as device is busy.
139  *	- < 0, error code on failure, other than busy.
140  */
141 typedef int (*mldev_close_t)(struct rte_ml_dev *dev);
142 
143 /**
144  * @internal
145  *
146  * Function used to start a configured device.
147  *
148  * @param dev
149  *	ML device pointer.
150  *
151  * @return
152  *	- 0 on success.
153  *	- < 0, error code on failure.
154  */
155 typedef int (*mldev_start_t)(struct rte_ml_dev *dev);
156 
157 /**
158  * @internal
159  *
160  * Function used to stop a configured device.
161  *
162  * @param dev
163  *	ML device pointer.
164  *
165  * @return
166  *	- 0 on success.
167  *	- < 0, error code on failure.
168  */
169 typedef int (*mldev_stop_t)(struct rte_ml_dev *dev);
170 
171 /**
172  * @internal
173  *
174  * Setup a queue pair for a device.
175  *
176  * @param dev
177  *	ML device pointer.
178  * @param queue_pair_id
179  *	Queue pair index.
180  * @param queue_pair_conf
181  *	Queue pair configuration structure.
182  * @param socket_id
183  *	Socket index.
184  *
185  * @return
186  *	- 0 on success.
187  *	- < 0, error on failure.
188  */
189 typedef int (*mldev_queue_pair_setup_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id,
190 					const struct rte_ml_dev_qp_conf *queue_pair_conf,
191 					int socket_id);
192 
193 /**
194  * @internal
195  *
196  * Release memory resources allocated by given queue pair.
197  *
198  * @param dev
199  *	ML device pointer.
200  * @param queue_pair_id
201  *	Queue pair index.
202  *
203  * @return
204  *	- 0 on success.
205  *	- -EAGAIN, if can't close as device is busy.
206  */
207 typedef int (*mldev_queue_pair_release_t)(struct rte_ml_dev *dev, uint16_t queue_pair_id);
208 
209 /**
210  * @internal
211  *
212  * Function used to get device statistics.
213  *
214  * @param dev
215  *	ML device pointer.
216  * @param stats
217  *	Pointer to ML device stats structure to update.
218  *
219  * @return
220  *	- 0 on success.
221  *	- < 0, error on failure.
222  */
223 typedef int (*mldev_stats_get_t)(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats);
224 
225 /**
226  * @internal
227  *
228  * Function used to reset device statistics.
229  *
230  * @param dev
231  *	ML device pointer.
232  */
233 typedef void (*mldev_stats_reset_t)(struct rte_ml_dev *dev);
234 
235 /**
236  * @internal
237  *
238  * Function used to get names of extended stats.
239  *
240  * @param dev
241  *	ML device pointer.
242  * @param mode
243  *	Mode of stats to retrieve.
244  * @param model_id
245  *	Used to specify model id in model mode. Ignored in device mode.
246  * @param xstats_map
247  *	Array to insert id and names into.
248  * @param size
249  *	Size of xstats_map array.
250  *
251  * @return
252  *	- >= 0 and <= size on success.
253  *	- > size, error. Returns the size of xstats_map array required.
254  *	- < 0, error code on failure.
255  */
256 typedef int (*mldev_xstats_names_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
257 					int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
258 					uint32_t size);
259 
260 /**
261  * @internal
262  *
263  * Function used to get a single extended stat by name.
264  *
265  * @param dev
266  *	ML device pointer.
267  * @param name
268  *	Name of the stat to retrieve.
269  * @param stat_id
270  *	ID of the stat to be returned.
271  * @param value
272  *	Value of the stat to be returned.
273  *
274  * @return
275  *	- = 0 success.
276  *	- < 0, error code on failure.
277  */
278 typedef int (*mldev_xstats_by_name_get_t)(struct rte_ml_dev *dev, const char *name,
279 					  uint16_t *stat_id, uint64_t *value);
280 
281 /**
282  * @internal
283  *
284  * Function used to retrieve extended stats of a device.
285  *
286  * @param dev
287  *	ML device pointer.
288  * @param mode
289  *	Mode of stats to retrieve.
290  * @param model_id
291  *	Used to specify model id in model mode. Ignored in device mode.
292  * @param stat_ids
293  *	Array of ID numbers of the stats to be retrieved.
294  * @param values
295  *	Values of the stats requested by the ID.
296  * @param nb_ids
297  *	Number of stats requested.
298  *
299  * @return
300  *	- >= 0, number of entries filled into the values array.
301  *	- < 0, error code on failure.
302  */
303 typedef int (*mldev_xstats_get_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
304 				  int32_t model_id, const uint16_t stat_ids[], uint64_t values[],
305 				  uint16_t nb_ids);
306 
307 /**
308  * @internal
309  *
310  * Function used to reset extended stats.
311  *
312  * @param dev
313  *	ML device pointer.
314  * @param mode
315  *	Mode of stats to retrieve.
316  * @param model_id
317  *	Used to specify model id in model mode. Ignored in device mode.
318  * @param stat_ids
319  *	Array of stats IDs to be reset.
320  * @param nb_ids
321  *	Number of IDs in the stat_ids array.
322  *
323  * @return
324  *	- 0 on success.
325  *	- < 0, error code on failure.
326  */
327 typedef int (*mldev_xstats_reset_t)(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
328 				    int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids);
329 
330 /**
331  * @internal
332  *
333  * Function used to dump ML device debug info.
334  *
335  * @param dev
336  *	ML device pointer.
337  * @param fd
338  *	File descriptor to dump the debug info.
339  *
340  * @return
341  *	- 0 on success.
342  *	- < 0, error code on failure.
343  */
344 
345 typedef int (*mldev_dump_t)(struct rte_ml_dev *dev, FILE *fd);
346 
347 /**
348  * @internal
349  *
350  * Function used for selftest of ML device.
351  *
352  * @param dev
353  *	ML device pointer.
354  *
355  * @return
356  *	- 0 on success.
357  *	- < 0, error on failure.
358  */
359 typedef int (*mldev_selftest_t)(struct rte_ml_dev *dev);
360 
361 /**
362  * @internal
363  *
364  * Function used to load an ML model.
365  *
366  * @param dev
367  *	ML device pointer.
368  * @param params
369  *	Model load params.
370  * @param model_id
371  *	Model ID returned by the library.
372  *
373  * @return
374  *	- 0 on success.
375  *	- < 0, error on failure.
376  */
377 typedef int (*mldev_model_load_t)(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
378 				  uint16_t *model_id);
379 
380 /**
381  * @internal
382  *
383  * Function used to unload an ML model.
384  *
385  * @param dev
386  *	ML device pointer.
387  * @param model_id
388  *	Model ID to use.
389  *
390  * @return
391  *	- 0 on success.
392  *	- < 0, error on failure.
393  */
394 typedef int (*mldev_model_unload_t)(struct rte_ml_dev *dev, uint16_t model_id);
395 
396 /**
397  * @internal
398  *
399  * Function used to start an ML model.
400  *
401  * @param dev
402  *	ML device pointer.
403  * @param model_id
404  *	Model ID to use.
405  *
406  * @return
407  *	- 0 on success.
408  *	- <0, error on failure.
409  */
410 typedef int (*mldev_model_start_t)(struct rte_ml_dev *dev, uint16_t model_id);
411 
412 /**
413  * @internal
414  *
415  * Function used to stop an ML model.
416  *
417  * @param dev
418  *	ML device pointer.
419  * @param model_id
420  *	Model ID to use.
421  *
422  * @return
423  *	- 0 on success.
424  *	- <0, error on failure.
425  */
426 typedef int (*mldev_model_stop_t)(struct rte_ml_dev *dev, uint16_t model_id);
427 
428 /**
429  * @internal
430  *
431  * Get info about a model.
432  *
433  * @param dev
434  *	ML device pointer.
435  * @param model_id
436  *	Model ID to use.
437  * @param model_info
438  *	Pointer to model info structure.
439  *
440  * @return
441  *	- 0 on success.
442  *	- <0, error on failure.
443  */
444 typedef int (*mldev_model_info_get_t)(struct rte_ml_dev *dev, uint16_t model_id,
445 				      struct rte_ml_model_info *model_info);
446 
447 /**
448  * @internal
449  *
450  * Update model params.
451  *
452  * @param dev
453  *	ML device pointer.
454  * @param model_id
455  *	Model ID to use.
456  * @param buffer
457  *	Pointer to model params.
458  *
459  * @return
460  *	- 0 on success.
461  *	- <0, error on failure.
462  */
463 typedef int (*mldev_model_params_update_t)(struct rte_ml_dev *dev, uint16_t model_id, void *buffer);
464 
465 /**
466  * @internal
467  *
468  * Quantize model data.
469  *
470  * @param dev
471  *	ML device pointer.
472  * @param model_id
473  *	Model ID to use.
474  * @param dbuffer
475  *	Pointer t de-quantized data buffer.
476  * @param qbuffer
477  *	Pointer t de-quantized data buffer.
478  *
479  * @return
480  *	- 0 on success.
481  *	- <0, error on failure.
482  */
483 typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id,
484 				   struct rte_ml_buff_seg **dbuffer,
485 				   struct rte_ml_buff_seg **qbuffer);
486 
487 /**
488  * @internal
489  *
490  * Quantize model data.
491  *
492  * @param dev
493  *	ML device pointer.
494  * @param model_id
495  *	Model ID to use.
496  * @param qbuffer
497  *	Pointer t de-quantized data buffer.
498  * @param dbuffer
499  *	Pointer t de-quantized data buffer.
500  *
501  * @return
502  *	- 0 on success.
503  *	- <0, error on failure.
504  */
505 typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id,
506 				     struct rte_ml_buff_seg **qbuffer,
507 				     struct rte_ml_buff_seg **dbuffer);
508 
509 /**
510  * @internal
511  *
512  * ML device operations function pointer table.
513  */
514 struct rte_ml_dev_ops {
515 	/** Get device information. */
516 	mldev_info_get_t dev_info_get;
517 
518 	/** Configure device. */
519 	mldev_configure_t dev_configure;
520 
521 	/** Close device. */
522 	mldev_close_t dev_close;
523 
524 	/** Start device. */
525 	mldev_start_t dev_start;
526 
527 	/** Stop device. */
528 	mldev_stop_t dev_stop;
529 
530 	/** Set up a device queue pair. */
531 	mldev_queue_pair_setup_t dev_queue_pair_setup;
532 
533 	/** Release a device queue pair. */
534 	mldev_queue_pair_release_t dev_queue_pair_release;
535 
536 	/** Get device statistics. */
537 	mldev_stats_get_t dev_stats_get;
538 
539 	/** Reset device statistics. */
540 	mldev_stats_reset_t dev_stats_reset;
541 
542 	/** Get names of extended stats. */
543 	mldev_xstats_names_get_t dev_xstats_names_get;
544 
545 	/** Get value of a single extended stat. */
546 	mldev_xstats_by_name_get_t dev_xstats_by_name_get;
547 
548 	/** Get extended stats of a device. */
549 	mldev_xstats_get_t dev_xstats_get;
550 
551 	/** Reset extended stats of the device. */
552 	mldev_xstats_reset_t dev_xstats_reset;
553 
554 	/** Dump ML device debug info. */
555 	mldev_dump_t dev_dump;
556 
557 	/** Dump ML device debug info. */
558 	mldev_selftest_t dev_selftest;
559 
560 	/** Load an ML model. */
561 	mldev_model_load_t model_load;
562 
563 	/** Unload an ML model. */
564 	mldev_model_unload_t model_unload;
565 
566 	/** Start an ML model. */
567 	mldev_model_start_t model_start;
568 
569 	/** Stop an ML model. */
570 	mldev_model_stop_t model_stop;
571 
572 	/** Get model information. */
573 	mldev_model_info_get_t model_info_get;
574 
575 	/** Update model params. */
576 	mldev_model_params_update_t model_params_update;
577 
578 	/** Quantize data */
579 	mldev_io_quantize_t io_quantize;
580 
581 	/** De-quantize data */
582 	mldev_io_dequantize_t io_dequantize;
583 };
584 
585 /**
586  * @internal
587  *
588  * The data part, with no function pointers, associated with each device. This structure is safe to
589  * place in shared memory to be common among different processes in a multi-process configuration.
590  */
591 struct rte_ml_dev_data {
592 	/** Device ID for this instance. */
593 	int16_t dev_id;
594 
595 	/** Socket ID where memory is allocated. */
596 	int16_t socket_id;
597 
598 	/** Device state: STOPPED(0) / STARTED(1) */
599 	__extension__ uint8_t dev_started : 1;
600 
601 	/** Number of device queue pairs. */
602 	uint16_t nb_queue_pairs;
603 
604 	/** Number of ML models. */
605 	uint16_t nb_models;
606 
607 	/** Array of pointers to queue pairs. */
608 	void **queue_pairs;
609 
610 	/** Array of pointers to ML models. */
611 	void **models;
612 
613 	/** PMD-specific private data. */
614 	void *dev_private;
615 
616 	/** Unique identifier name. */
617 	char name[RTE_ML_STR_MAX];
618 };
619 
620 /**
621  * @internal
622  *
623  * The data structure associated with each ML device.
624  */
625 struct __rte_cache_aligned rte_ml_dev {
626 	/** Pointer to PMD enqueue function. */
627 	mldev_enqueue_t enqueue_burst;
628 
629 	/** Pointer to PMD dequeue function. */
630 	mldev_dequeue_t dequeue_burst;
631 
632 	/** Pointer to PMD Op error get function. */
633 	mldev_op_error_get_t op_error_get;
634 
635 	/** Pointer to device data. */
636 	struct rte_ml_dev_data *data;
637 
638 	/** Functions exported by PMD. */
639 	struct rte_ml_dev_ops *dev_ops;
640 
641 	/** Backing RTE device. */
642 	struct rte_device *device;
643 
644 	/** Flag indicating the device is attached. */
645 	__extension__ uint8_t attached : 1;
646 };
647 
648 /**
649  * @internal
650  *
651  * Global structure used for maintaining state of allocated ML devices.
652  */
653 struct rte_ml_dev_global {
654 	/** Device information array. */
655 	struct rte_ml_dev *devs;
656 
657 	/** Device private data array. */
658 	struct rte_ml_dev_data **data;
659 
660 	/** Number of devices found. */
661 	uint8_t nb_devs;
662 
663 	/** Maximum number of devices. */
664 	uint8_t max_devs;
665 };
666 
667 #endif /* RTE_MLDEV_INTERNAL_H */
668