xref: /dpdk/lib/eal/common/eal_private.h (revision 5dc68f2be8adea7aad24dc56dad57ec9a3bfe413)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4 
5 #ifndef _EAL_PRIVATE_H_
6 #define _EAL_PRIVATE_H_
7 
8 #include <stdbool.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <sys/queue.h>
12 
13 #include <dev_driver.h>
14 #include <rte_lcore.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 
18 #include "eal_internal_cfg.h"
19 
20 /**
21  * Structure storing internal configuration (per-lcore)
22  */
23 struct lcore_config {
24 	rte_thread_t thread_id;    /**< thread identifier */
25 	int pipe_main2worker[2];   /**< communication pipe with main */
26 	int pipe_worker2main[2];   /**< communication pipe with main */
27 
28 	RTE_ATOMIC(lcore_function_t *) volatile f; /**< function to call */
29 	void * volatile arg;       /**< argument of function */
30 	volatile int ret;          /**< return value of function */
31 
32 	volatile RTE_ATOMIC(enum rte_lcore_state_t) state; /**< lcore state */
33 	unsigned int socket_id;    /**< physical socket id for this lcore */
34 	unsigned int core_id;      /**< core number on socket for this lcore */
35 	int core_index;            /**< relative index, starting from 0 */
36 	uint8_t core_role;         /**< role of core eg: OFF, RTE, SERVICE */
37 
38 	rte_cpuset_t cpuset;       /**< cpu set which the lcore affinity to */
39 };
40 
41 extern struct lcore_config lcore_config[RTE_MAX_LCORE];
42 
43 /**
44  * The global RTE configuration structure.
45  */
46 struct rte_config {
47 	uint32_t main_lcore;         /**< Id of the main lcore */
48 	uint32_t lcore_count;        /**< Number of available logical cores. */
49 	uint32_t numa_node_count;    /**< Number of detected NUMA nodes. */
50 	uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
51 	uint32_t service_lcore_count;/**< Number of available service cores. */
52 	enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
53 
54 	/** Primary or secondary configuration */
55 	enum rte_proc_type_t process_type;
56 
57 	/** PA or VA mapping mode */
58 	enum rte_iova_mode iova_mode;
59 
60 	/**
61 	 * Pointer to memory configuration, which may be shared across multiple
62 	 * DPDK instances
63 	 */
64 	struct rte_mem_config *mem_config;
65 };
66 
67 /**
68  * Get the global configuration structure.
69  *
70  * @return
71  *   A pointer to the global configuration structure.
72  */
73 struct rte_config *rte_eal_get_configuration(void);
74 
75 /**
76  * Initialize the memzone subsystem (private to eal).
77  *
78  * @return
79  *   - 0 on success
80  *   - Negative on error
81  */
82 int rte_eal_memzone_init(void);
83 
84 /**
85  * Fill configuration with number of physical and logical processors
86  *
87  * This function is private to EAL.
88  *
89  * Parse /proc/cpuinfo to get the number of physical and logical
90  * processors on the machine.
91  *
92  * @return
93  *   0 on success, negative on error
94  */
95 int rte_eal_cpu_init(void);
96 
97 /**
98  * Check for architecture supported MMU.
99  *
100  * This function is private to EAL.
101  */
102 bool eal_mmu_supported(void);
103 
104 /**
105  * Create memseg lists
106  *
107  * This function is private to EAL.
108  *
109  * Preallocate virtual memory.
110  *
111  * @return
112  *   0 on success, negative on error
113  */
114 int rte_eal_memseg_init(void);
115 
116 /**
117  * Map memory
118  *
119  * This function is private to EAL.
120  *
121  * Fill configuration structure with these infos, and return 0 on success.
122  *
123  * @return
124  *   0 on success, negative on error
125  */
126 int rte_eal_memory_init(void)
127 	__rte_shared_locks_required(rte_mcfg_mem_get_lock());
128 
129 /**
130  * Configure timers
131  *
132  * This function is private to EAL.
133  *
134  * Mmap memory areas used by HPET (high precision event timer) that will
135  * provide our time reference, and configure the TSC frequency also for it
136  * to be used as a reference.
137  *
138  * @return
139  *   0 on success, negative on error
140  */
141 int rte_eal_timer_init(void);
142 
143 /**
144  * Init tail queues for non-EAL library structures. This is to allow
145  * the rings, mempools, etc. lists to be shared among multiple processes
146  *
147  * This function is private to EAL
148  *
149  * @return
150  *    0 on success, negative on error
151  */
152 int rte_eal_tailqs_init(void);
153 
154 /**
155  * Init interrupt handling.
156  *
157  * This function is private to EAL.
158  *
159  * @return
160  *  0 on success, negative on error
161  */
162 int rte_eal_intr_init(void);
163 
164 /**
165  * Init alarm mechanism. This is to allow a callback be called after
166  * specific time.
167  *
168  * This function is private to EAL.
169  *
170  * @return
171  *  0 on success, negative on error
172  */
173 int rte_eal_alarm_init(void);
174 
175 /**
176  * Alarm mechanism cleanup.
177  *
178  * This function is private to EAL.
179  *
180  * @return
181  *  0 on success, negative on error
182  */
183 void rte_eal_alarm_cleanup(void);
184 
185 /**
186  * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
187  * etc.) loaded.
188  *
189  * @param module_name
190  *	The module's name which need to be checked
191  *
192  * @return
193  *	-1 means some error happens(NULL pointer or open failure)
194  *	0  means the module not loaded
195  *	1  means the module loaded
196  */
197 int rte_eal_check_module(const char *module_name);
198 
199 /**
200  * Memory reservation flags.
201  */
202 enum eal_mem_reserve_flags {
203 	/**
204 	 * Reserve hugepages. May be unsupported by some platforms.
205 	 */
206 	EAL_RESERVE_HUGEPAGES = 1 << 0,
207 	/**
208 	 * Force reserving memory at the requested address.
209 	 * This can be a destructive action depending on the implementation.
210 	 *
211 	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
212 	 *      (although implementations are not required to use it).
213 	 */
214 	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
215 };
216 
217 /**
218  * Get virtual area of specified size from the OS.
219  *
220  * This function is private to the EAL.
221  *
222  * @param requested_addr
223  *   Address where to request address space.
224  * @param size
225  *   Size of requested area.
226  * @param page_sz
227  *   Page size on which to align requested virtual area.
228  * @param flags
229  *   EAL_VIRTUAL_AREA_* flags.
230  * @param reserve_flags
231  *   Extra flags passed directly to eal_mem_reserve().
232  *
233  * @return
234  *   Virtual area address if successful.
235  *   NULL if unsuccessful.
236  */
237 
238 #define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
239 /**< don't fail if cannot get exact requested address. */
240 #define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
241 /**< try getting smaller sized (decrement by page size) virtual areas if cannot
242  * get area of requested size.
243  */
244 #define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
245 /**< immediately unmap reserved virtual area. */
246 void *
247 eal_get_virtual_area(void *requested_addr, size_t *size,
248 		size_t page_sz, int flags, int reserve_flags);
249 
250 /**
251  * Initialize a memory segment list and create its backing storage.
252  *
253  * @param msl
254  *  Memory segment list to be filled.
255  * @param name
256  *  Name for the backing storage.
257  * @param page_sz
258  *  Size of segment pages in the MSL.
259  * @param n_segs
260  *  Number of segments.
261  * @param socket_id
262  *  Socket ID. Must not be SOCKET_ID_ANY.
263  * @param heap
264  *  Mark MSL as pointing to a heap.
265  * @return
266  *  0 on success, (-1) on failure and rte_errno is set.
267  */
268 int
269 eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name,
270 	uint64_t page_sz, int n_segs, int socket_id, bool heap);
271 
272 /**
273  * Initialize memory segment list and create its backing storage
274  * with a name corresponding to MSL parameters.
275  *
276  * @param type_msl_idx
277  *  Index of the MSL among other MSLs of the same socket and page size.
278  *
279  * @see eal_memseg_list_init_named for remaining parameters description.
280  */
281 int
282 eal_memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
283 	int n_segs, int socket_id, int type_msl_idx, bool heap);
284 
285 /**
286  * Reserve VA space for a memory segment list
287  * previously initialized with eal_memseg_list_init().
288  *
289  * @param msl
290  *  Initialized memory segment list with page size defined.
291  * @param reserve_flags
292  *  Extra memory reservation flags. Can be 0 if unnecessary.
293  * @return
294  *  0 on success, (-1) on failure and rte_errno is set.
295  */
296 int
297 eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags);
298 
299 /**
300  * Populate MSL, each segment is one page long.
301  *
302  * @param msl
303  *  Initialized memory segment list with page size defined.
304  * @param addr
305  *  Starting address of list segments.
306  * @param n_segs
307  *  Number of segments to populate.
308  */
309 void
310 eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs);
311 
312 /**
313  * Distribute available memory between MSLs.
314  *
315  * @return
316  *  0 on success, (-1) on failure.
317  */
318 int
319 eal_dynmem_memseg_lists_init(void);
320 
321 /**
322  * Preallocate hugepages for dynamic allocation.
323  *
324  * @return
325  *  0 on success, (-1) on failure.
326  */
327 int
328 eal_dynmem_hugepage_init(void);
329 
330 /**
331  * Given the list of hugepage sizes and the number of pages thereof,
332  * calculate the best number of pages of each size to fulfill the request
333  * for RAM on each NUMA node.
334  *
335  * @param memory
336  *  Amounts of memory requested for each NUMA node of RTE_MAX_NUMA_NODES.
337  * @param hp_info
338  *  Information about hugepages of different size.
339  * @param hp_used
340  *  Receives information about used hugepages of each size.
341  * @param num_hp_info
342  *  Number of elements in hp_info and hp_used.
343  * @return
344  *  0 on success, (-1) on failure.
345  */
346 int
347 eal_dynmem_calc_num_pages_per_socket(
348 		uint64_t *memory, struct hugepage_info *hp_info,
349 		struct hugepage_info *hp_used, unsigned int num_hp_info);
350 
351 /**
352  * Get cpu core_id.
353  *
354  * This function is private to the EAL.
355  */
356 unsigned eal_cpu_core_id(unsigned lcore_id);
357 
358 /**
359  * Check if cpu is present.
360  *
361  * This function is private to the EAL.
362  */
363 int eal_cpu_detected(unsigned lcore_id);
364 
365 /**
366  * Set TSC frequency from precise value or estimation
367  *
368  * This function is private to the EAL.
369  */
370 void set_tsc_freq(void);
371 
372 /**
373  * Get precise TSC frequency from system
374  *
375  * This function is private to the EAL.
376  */
377 uint64_t get_tsc_freq(uint64_t arch_hz);
378 
379 /**
380  * Get TSC frequency if the architecture supports.
381  *
382  * This function is private to the EAL.
383  *
384  * @return
385  *   The number of TSC cycles in one second.
386  *   Returns zero if the architecture support is not available.
387  */
388 uint64_t get_tsc_freq_arch(void);
389 
390 /**
391  * Allocate a free lcore to associate to a non-EAL thread.
392  *
393  * @return
394  *   - the id of a lcore with role ROLE_NON_EAL on success.
395  *   - RTE_MAX_LCORE if none was available or initializing was refused (see
396  *     rte_lcore_callback_register).
397  */
398 unsigned int eal_lcore_non_eal_allocate(void);
399 
400 /**
401  * Release the lcore used by a non-EAL thread.
402  * Counterpart of eal_lcore_non_eal_allocate().
403  *
404  * @param lcore_id
405  *   The lcore with role ROLE_NON_EAL to release.
406  */
407 void eal_lcore_non_eal_release(unsigned int lcore_id);
408 
409 /**
410  * Prepare physical memory mapping
411  * i.e. hugepages on Linux and
412  *      contigmem on BSD.
413  *
414  * This function is private to the EAL.
415  */
416 int rte_eal_hugepage_init(void);
417 
418 /**
419  * Creates memory mapping in secondary process
420  * i.e. hugepages on Linux and
421  *      contigmem on BSD.
422  *
423  * This function is private to the EAL.
424  */
425 int rte_eal_hugepage_attach(void);
426 
427 /**
428  * Detaches all memory mappings from a process.
429  *
430  * This function is private to the EAL.
431  */
432 int rte_eal_memory_detach(void);
433 
434 /**
435  * Find a bus capable of identifying a device.
436  *
437  * @param str
438  *   A device identifier (PCI address, virtual PMD name, ...).
439  *
440  * @return
441  *   A valid bus handle if found.
442  *   NULL if no bus is able to parse this device.
443  */
444 struct rte_bus *rte_bus_find_by_device_name(const char *str);
445 
446 /**
447  * For each device on the buses, call the driver-specific function for
448  * device cleanup.
449  *
450  * @return
451  * 0 for successful cleanup
452  * !0 otherwise
453  */
454 int eal_bus_cleanup(void);
455 
456 /**
457  * Create the unix channel for primary/secondary communication.
458  *
459  * @return
460  *   0 on success;
461  *   (<0) on failure.
462  */
463 int rte_mp_channel_init(void);
464 
465 /**
466  * Primary/secondary communication cleanup.
467  */
468 void rte_mp_channel_cleanup(void);
469 
470 /**
471  * @internal
472  * Parse a device string and store its information in an
473  * rte_devargs structure.
474  *
475  * A device description is split by layers of abstraction of the device:
476  * bus, class and driver. Each layer will offer a set of properties that
477  * can be applied either to configure or recognize a device.
478  *
479  * This function will parse those properties and prepare the rte_devargs
480  * to be given to each layers for processing.
481  *
482  * Note: if the "data" field of the devargs points to devstr,
483  * then no dynamic allocation is performed and the rte_devargs
484  * can be safely discarded.
485  *
486  * Otherwise ``data`` will hold a workable copy of devstr, that will be
487  * used by layers descriptors within rte_devargs. In this case,
488  * any rte_devargs should be cleaned-up before being freed.
489  *
490  * @param da
491  *   rte_devargs structure to fill.
492  *
493  * @param devstr
494  *   Device string.
495  *
496  * @return
497  *   0 on success.
498  *   Negative errno values on error (rte_errno is set).
499  */
500 int
501 rte_devargs_layers_parse(struct rte_devargs *devargs,
502 			 const char *devstr);
503 
504 /*
505  * probe a device at local process.
506  *
507  * @param devargs
508  *   Device arguments including bus, class and driver properties.
509  * @param new_dev
510  *   new device be probed as output.
511  * @return
512  *   0 on success, negative on error.
513  */
514 int local_dev_probe(const char *devargs, struct rte_device **new_dev);
515 
516 /**
517  * Hotplug remove a given device from a specific bus at local process.
518  *
519  * @param dev
520  *   Data structure of the device to remove.
521  * @return
522  *   0 on success, negative on error.
523  */
524 int local_dev_remove(struct rte_device *dev);
525 
526 /**
527  * Iterate over all buses to find the corresponding bus to handle the sigbus
528  * error.
529  * @param failure_addr
530  *	Pointer of the fault address of the sigbus error.
531  *
532  * @return
533  *	 0 success to handle the sigbus.
534  *	-1 failed to handle the sigbus
535  *	 1 no bus can handler the sigbus
536  */
537 int rte_bus_sigbus_handler(const void *failure_addr);
538 
539 /**
540  * Get OS-specific EAL mapping base address.
541  */
542 uint64_t
543 eal_get_baseaddr(void);
544 
545 void *
546 eal_malloc_no_trace(const char *type, size_t size, unsigned int align);
547 
548 void eal_free_no_trace(void *addr);
549 
550 /** Options for eal_file_open(). */
551 enum eal_open_flags {
552 	/** Open file for reading. */
553 	EAL_OPEN_READONLY = 0x00,
554 	/** Open file for reading and writing. */
555 	EAL_OPEN_READWRITE = 0x02,
556 	/**
557 	 * Create the file if it doesn't exist.
558 	 * New files are only accessible to the owner (0600 equivalent).
559 	 */
560 	EAL_OPEN_CREATE = 0x04
561 };
562 
563 /**
564  * Open or create a file.
565  *
566  * @param path
567  *  Path to the file.
568  * @param flags
569  *  A combination of eal_open_flags controlling operation and FD behavior.
570  * @return
571  *  Open file descriptor on success, (-1) on failure and rte_errno is set.
572  */
573 int
574 eal_file_open(const char *path, int flags);
575 
576 /** File locking operation. */
577 enum eal_flock_op {
578 	EAL_FLOCK_SHARED,    /**< Acquire a shared lock. */
579 	EAL_FLOCK_EXCLUSIVE, /**< Acquire an exclusive lock. */
580 	EAL_FLOCK_UNLOCK     /**< Release a previously taken lock. */
581 };
582 
583 /** Behavior on file locking conflict. */
584 enum eal_flock_mode {
585 	EAL_FLOCK_WAIT,  /**< Wait until the file gets unlocked to lock it. */
586 	EAL_FLOCK_RETURN /**< Return immediately if the file is locked. */
587 };
588 
589 /**
590  * Lock or unlock the file.
591  *
592  * On failure @code rte_errno @endcode is set to the error code
593  * specified by POSIX flock(3) description.
594  *
595  * @param fd
596  *  Opened file descriptor.
597  * @param op
598  *  Operation to perform.
599  * @param mode
600  *  Behavior on conflict.
601  * @return
602  *  0 on success, (-1) on failure.
603  */
604 int
605 eal_file_lock(int fd, enum eal_flock_op op, enum eal_flock_mode mode);
606 
607 /**
608  * Truncate or extend the file to the specified size.
609  *
610  * On failure @code rte_errno @endcode is set to the error code
611  * specified by POSIX ftruncate(3) description.
612  *
613  * @param fd
614  *  Opened file descriptor.
615  * @param size
616  *  Desired file size.
617  * @return
618  *  0 on success, (-1) on failure.
619  */
620 int
621 eal_file_truncate(int fd, ssize_t size);
622 
623 /**
624  * Reserve a region of virtual memory.
625  *
626  * Use eal_mem_free() to free reserved memory.
627  *
628  * @param requested_addr
629  *  A desired reservation address which must be page-aligned.
630  *  The system might not respect it.
631  *  NULL means the address will be chosen by the system.
632  * @param size
633  *  Reservation size. Must be a multiple of system page size.
634  * @param flags
635  *  Reservation options, a combination of eal_mem_reserve_flags.
636  * @returns
637  *  Starting address of the reserved area on success, NULL on failure.
638  *  Callers must not access this memory until remapping it.
639  */
640 void *
641 eal_mem_reserve(void *requested_addr, size_t size, int flags);
642 
643 /**
644  * Free memory obtained by eal_mem_reserve() and possibly allocated.
645  *
646  * If *virt* and *size* describe a part of the reserved region,
647  * only this part of the region is freed (accurately up to the system
648  * page size). If *virt* points to allocated memory, *size* must match
649  * the one specified on allocation. The behavior is undefined
650  * if the memory pointed by *virt* is obtained from another source
651  * than listed above.
652  *
653  * @param virt
654  *  A virtual address in a region previously reserved.
655  * @param size
656  *  Number of bytes to unreserve.
657  */
658 void
659 eal_mem_free(void *virt, size_t size);
660 
661 /**
662  * Configure memory region inclusion into dumps.
663  *
664  * @param virt
665  *  Starting address of the region.
666  * @param size
667  *  Size of the region.
668  * @param dump
669  *  True to include memory into dumps, false to exclude.
670  * @return
671  *  0 on success, (-1) on failure and rte_errno is set.
672  */
673 int
674 eal_mem_set_dump(void *virt, size_t size, bool dump);
675 
676 /**
677  * Sets the runtime directory of DPDK
678  *
679  * @param run_dir
680  *   The new runtime directory path of DPDK
681  * @return
682  *   0 on success, (-1) on failure.
683  */
684 int
685 eal_set_runtime_dir(const char *run_dir);
686 
687 /**
688  * Get the internal configuration structure.
689  *
690  * @return
691  *   A pointer to the internal configuration structure.
692  */
693 struct internal_config *
694 eal_get_internal_configuration(void);
695 
696 /**
697  * Get the current value of the rte_application_usage pointer
698  *
699  * @return
700  *   Pointer to the current value of rte_application_usage .
701  */
702 rte_usage_hook_t
703 eal_get_application_usage_hook(void);
704 
705 /**
706  * Instruct primary process that a secondary process wants to attach.
707  */
708 bool __rte_mp_enable(void);
709 
710 /**
711  * Init per-lcore info in current thread.
712  *
713  * @param lcore_id
714  *   identifier of lcore.
715  * @param cpuset
716  *   CPU affinity for this thread.
717  */
718 void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
719 
720 /**
721  * Uninitialize per-lcore info for current thread.
722  */
723 void __rte_thread_uninit(void);
724 
725 /**
726  * asprintf(3) replacement for Windows.
727  */
728 #ifdef RTE_EXEC_ENV_WINDOWS
729 __rte_format_printf(2, 3)
730 int eal_asprintf(char **buffer, const char *format, ...);
731 
732 #define asprintf(buffer, format, ...) \
733 		eal_asprintf(buffer, format, ##__VA_ARGS__)
734 #endif
735 
736 #define EAL_LOG(level, ...) \
737 	RTE_LOG_LINE(level, EAL, "" __VA_ARGS__)
738 
739 #endif /* _EAL_PRIVATE_H_ */
740