xref: /dpdk/lib/eal/linux/eal.c (revision 515cd4a488b6a0c6e40d20e6b10d8e89657dc23f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation.
3  * Copyright(c) 2012-2014 6WIND S.A.
4  */
5 
6 #include <ctype.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <pthread.h>
13 #include <getopt.h>
14 #include <sys/file.h>
15 #include <dirent.h>
16 #include <fcntl.h>
17 #include <fnmatch.h>
18 #include <stddef.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23 #if defined(RTE_ARCH_X86)
24 #include <sys/io.h>
25 #endif
26 #include <linux/version.h>
27 
28 #include <rte_common.h>
29 #include <rte_debug.h>
30 #include <rte_memory.h>
31 #include <rte_launch.h>
32 #include <rte_eal.h>
33 #include <rte_errno.h>
34 #include <rte_lcore.h>
35 #include <rte_service_component.h>
36 #include <rte_log.h>
37 #include <rte_string_fns.h>
38 #include <rte_cpuflags.h>
39 #include <rte_bus.h>
40 #include <rte_version.h>
41 #include <malloc_heap.h>
42 #include <rte_vfio.h>
43 
44 #include <telemetry_internal.h>
45 #include "eal_private.h"
46 #include "eal_thread.h"
47 #include "eal_internal_cfg.h"
48 #include "eal_filesystem.h"
49 #include "eal_hugepages.h"
50 #include "eal_memcfg.h"
51 #include "eal_trace.h"
52 #include "eal_log.h"
53 #include "eal_options.h"
54 #include "eal_vfio.h"
55 #include "hotplug_mp.h"
56 
57 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
58 
59 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
60 
61 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups"
62 
63 /* define fd variable here, because file needs to be kept open for the
64  * duration of the program, as we hold a write lock on it in the primary proc */
65 static int mem_cfg_fd = -1;
66 
67 static struct flock wr_lock = {
68 		.l_type = F_WRLCK,
69 		.l_whence = SEEK_SET,
70 		.l_start = offsetof(struct rte_mem_config, memsegs),
71 		.l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs),
72 };
73 
74 /* internal configuration (per-core) */
75 struct lcore_config lcore_config[RTE_MAX_LCORE];
76 
77 /* used by rte_rdtsc() */
78 int rte_cycles_vmware_tsc_map;
79 
80 
81 int
82 eal_clean_runtime_dir(void)
83 {
84 	const char *runtime_dir = rte_eal_get_runtime_dir();
85 	DIR *dir;
86 	struct dirent *dirent;
87 	int dir_fd, fd, lck_result;
88 	static const char * const filters[] = {
89 		"fbarray_*",
90 		"mp_socket_*"
91 	};
92 
93 	/* open directory */
94 	dir = opendir(runtime_dir);
95 	if (!dir) {
96 		RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
97 				runtime_dir);
98 		goto error;
99 	}
100 	dir_fd = dirfd(dir);
101 
102 	/* lock the directory before doing anything, to avoid races */
103 	if (flock(dir_fd, LOCK_EX) < 0) {
104 		RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
105 			runtime_dir);
106 		goto error;
107 	}
108 
109 	dirent = readdir(dir);
110 	if (!dirent) {
111 		RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
112 				runtime_dir);
113 		goto error;
114 	}
115 
116 	while (dirent != NULL) {
117 		unsigned int f_idx;
118 		bool skip = true;
119 
120 		/* skip files that don't match the patterns */
121 		for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
122 			const char *filter = filters[f_idx];
123 
124 			if (fnmatch(filter, dirent->d_name, 0) == 0) {
125 				skip = false;
126 				break;
127 			}
128 		}
129 		if (skip) {
130 			dirent = readdir(dir);
131 			continue;
132 		}
133 
134 		/* try and lock the file */
135 		fd = openat(dir_fd, dirent->d_name, O_RDONLY);
136 
137 		/* skip to next file */
138 		if (fd == -1) {
139 			dirent = readdir(dir);
140 			continue;
141 		}
142 
143 		/* non-blocking lock */
144 		lck_result = flock(fd, LOCK_EX | LOCK_NB);
145 
146 		/* if lock succeeds, remove the file */
147 		if (lck_result != -1)
148 			unlinkat(dir_fd, dirent->d_name, 0);
149 		close(fd);
150 		dirent = readdir(dir);
151 	}
152 
153 	/* closedir closes dir_fd and drops the lock */
154 	closedir(dir);
155 	return 0;
156 
157 error:
158 	if (dir)
159 		closedir(dir);
160 
161 	RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
162 		strerror(errno));
163 
164 	return -1;
165 }
166 
167 
168 /* create memory configuration in shared/mmap memory. Take out
169  * a write lock on the memsegs, so we can auto-detect primary/secondary.
170  * This means we never close the file while running (auto-close on exit).
171  * We also don't lock the whole file, so that in future we can use read-locks
172  * on other parts, e.g. memzones, to detect if there are running secondary
173  * processes. */
174 static int
175 rte_eal_config_create(void)
176 {
177 	struct rte_config *config = rte_eal_get_configuration();
178 	size_t page_sz = sysconf(_SC_PAGE_SIZE);
179 	size_t cfg_len = sizeof(*config->mem_config);
180 	size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
181 	void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
182 	int retval;
183 	const struct internal_config *internal_conf =
184 		eal_get_internal_configuration();
185 
186 	const char *pathname = eal_runtime_config_path();
187 
188 	if (internal_conf->no_shconf)
189 		return 0;
190 
191 	/* map the config before hugepage address so that we don't waste a page */
192 	if (internal_conf->base_virtaddr != 0)
193 		rte_mem_cfg_addr = (void *)
194 			RTE_ALIGN_FLOOR(internal_conf->base_virtaddr -
195 			sizeof(struct rte_mem_config), page_sz);
196 	else
197 		rte_mem_cfg_addr = NULL;
198 
199 	if (mem_cfg_fd < 0){
200 		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
201 		if (mem_cfg_fd < 0) {
202 			RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
203 				pathname);
204 			return -1;
205 		}
206 	}
207 
208 	retval = ftruncate(mem_cfg_fd, cfg_len);
209 	if (retval < 0){
210 		close(mem_cfg_fd);
211 		mem_cfg_fd = -1;
212 		RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
213 			pathname);
214 		return -1;
215 	}
216 
217 	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
218 	if (retval < 0){
219 		close(mem_cfg_fd);
220 		mem_cfg_fd = -1;
221 		RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
222 			"process running?\n", pathname);
223 		return -1;
224 	}
225 
226 	/* reserve space for config */
227 	rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
228 			&cfg_len_aligned, page_sz, 0, 0);
229 	if (rte_mem_cfg_addr == NULL) {
230 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
231 		close(mem_cfg_fd);
232 		mem_cfg_fd = -1;
233 		return -1;
234 	}
235 
236 	/* remap the actual file into the space we've just reserved */
237 	mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
238 			cfg_len_aligned, PROT_READ | PROT_WRITE,
239 			MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
240 	if (mapped_mem_cfg_addr == MAP_FAILED) {
241 		munmap(rte_mem_cfg_addr, cfg_len);
242 		close(mem_cfg_fd);
243 		mem_cfg_fd = -1;
244 		RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
245 		return -1;
246 	}
247 
248 	memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config));
249 	config->mem_config = rte_mem_cfg_addr;
250 
251 	/* store address of the config in the config itself so that secondary
252 	 * processes could later map the config into this exact location
253 	 */
254 	config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
255 	config->mem_config->dma_maskbits = 0;
256 
257 	return 0;
258 }
259 
260 /* attach to an existing shared memory config */
261 static int
262 rte_eal_config_attach(void)
263 {
264 	struct rte_config *config = rte_eal_get_configuration();
265 	struct rte_mem_config *mem_config;
266 	const struct internal_config *internal_conf =
267 		eal_get_internal_configuration();
268 
269 	const char *pathname = eal_runtime_config_path();
270 
271 	if (internal_conf->no_shconf)
272 		return 0;
273 
274 	if (mem_cfg_fd < 0){
275 		mem_cfg_fd = open(pathname, O_RDWR);
276 		if (mem_cfg_fd < 0) {
277 			RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
278 				pathname);
279 			return -1;
280 		}
281 	}
282 
283 	/* map it as read-only first */
284 	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
285 			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
286 	if (mem_config == MAP_FAILED) {
287 		close(mem_cfg_fd);
288 		mem_cfg_fd = -1;
289 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
290 			errno, strerror(errno));
291 		return -1;
292 	}
293 
294 	config->mem_config = mem_config;
295 
296 	return 0;
297 }
298 
299 /* reattach the shared config at exact memory location primary process has it */
300 static int
301 rte_eal_config_reattach(void)
302 {
303 	struct rte_config *config = rte_eal_get_configuration();
304 	struct rte_mem_config *mem_config;
305 	void *rte_mem_cfg_addr;
306 	const struct internal_config *internal_conf =
307 		eal_get_internal_configuration();
308 
309 	if (internal_conf->no_shconf)
310 		return 0;
311 
312 	/* save the address primary process has mapped shared config to */
313 	rte_mem_cfg_addr =
314 		(void *) (uintptr_t) config->mem_config->mem_cfg_addr;
315 
316 	/* unmap original config */
317 	munmap(config->mem_config, sizeof(struct rte_mem_config));
318 
319 	/* remap the config at proper address */
320 	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
321 			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
322 			mem_cfg_fd, 0);
323 
324 	close(mem_cfg_fd);
325 	mem_cfg_fd = -1;
326 
327 	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
328 		if (mem_config != MAP_FAILED) {
329 			/* errno is stale, don't use */
330 			RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
331 				" - please use '--" OPT_BASE_VIRTADDR
332 				"' option\n", rte_mem_cfg_addr, mem_config);
333 			munmap(mem_config, sizeof(struct rte_mem_config));
334 			return -1;
335 		}
336 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
337 			errno, strerror(errno));
338 		return -1;
339 	}
340 
341 	config->mem_config = mem_config;
342 
343 	return 0;
344 }
345 
346 /* Detect if we are a primary or a secondary process */
347 enum rte_proc_type_t
348 eal_proc_type_detect(void)
349 {
350 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
351 	const char *pathname = eal_runtime_config_path();
352 	const struct internal_config *internal_conf =
353 		eal_get_internal_configuration();
354 
355 	/* if there no shared config, there can be no secondary processes */
356 	if (!internal_conf->no_shconf) {
357 		/* if we can open the file but not get a write-lock we are a
358 		 * secondary process. NOTE: if we get a file handle back, we
359 		 * keep that open and don't close it to prevent a race condition
360 		 * between multiple opens.
361 		 */
362 		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
363 				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
364 			ptype = RTE_PROC_SECONDARY;
365 	}
366 
367 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
368 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
369 
370 	return ptype;
371 }
372 
373 /* Sets up rte_config structure with the pointer to shared memory config.*/
374 static int
375 rte_config_init(void)
376 {
377 	struct rte_config *config = rte_eal_get_configuration();
378 	const struct internal_config *internal_conf =
379 		eal_get_internal_configuration();
380 
381 	config->process_type = internal_conf->process_type;
382 
383 	switch (config->process_type) {
384 	case RTE_PROC_PRIMARY:
385 		if (rte_eal_config_create() < 0)
386 			return -1;
387 		eal_mcfg_update_from_internal();
388 		break;
389 	case RTE_PROC_SECONDARY:
390 		if (rte_eal_config_attach() < 0)
391 			return -1;
392 		eal_mcfg_wait_complete();
393 		if (eal_mcfg_check_version() < 0) {
394 			RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
395 			return -1;
396 		}
397 		if (rte_eal_config_reattach() < 0)
398 			return -1;
399 		if (!__rte_mp_enable()) {
400 			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
401 			return -1;
402 		}
403 		eal_mcfg_update_internal();
404 		break;
405 	case RTE_PROC_AUTO:
406 	case RTE_PROC_INVALID:
407 		RTE_LOG(ERR, EAL, "Invalid process type %d\n",
408 			config->process_type);
409 		return -1;
410 	}
411 
412 	return 0;
413 }
414 
415 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
416 static void
417 eal_hugedirs_unlock(void)
418 {
419 	int i;
420 	struct internal_config *internal_conf =
421 		eal_get_internal_configuration();
422 
423 	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
424 	{
425 		/* skip uninitialized */
426 		if (internal_conf->hugepage_info[i].lock_descriptor < 0)
427 			continue;
428 		/* unlock hugepage file */
429 		flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN);
430 		close(internal_conf->hugepage_info[i].lock_descriptor);
431 		/* reset the field */
432 		internal_conf->hugepage_info[i].lock_descriptor = -1;
433 	}
434 }
435 
436 /* display usage */
437 static void
438 eal_usage(const char *prgname)
439 {
440 	rte_usage_hook_t hook = eal_get_application_usage_hook();
441 
442 	printf("\nUsage: %s ", prgname);
443 	eal_common_usage();
444 	printf("EAL Linux options:\n"
445 	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
446 	       "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
447 	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
448 	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
449 	       "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
450 	       "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
451 	       "  --"OPT_VFIO_VF_TOKEN"     VF token (UUID) shared between SR-IOV PF and VFs\n"
452 	       "  --"OPT_LEGACY_MEM"        Legacy memory mode (no dynamic allocation, contiguous segments)\n"
453 	       "  --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
454 	       "  --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n"
455 	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
456 	       "                      Allocate worker thread stacks from hugepage memory.\n"
457 	       "                      Size is in units of kbytes and defaults to system\n"
458 	       "                      thread stack size if not specified.\n"
459 	       "\n");
460 	/* Allow the application to print its usage message too if hook is set */
461 	if (hook) {
462 		printf("===== Application Usage =====\n\n");
463 		(hook)(prgname);
464 	}
465 }
466 
467 static int
468 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
469 {
470 	char * arg[RTE_MAX_NUMA_NODES];
471 	char *end;
472 	int arg_num, i, len;
473 
474 	len = strnlen(strval, SOCKET_MEM_STRLEN);
475 	if (len == SOCKET_MEM_STRLEN) {
476 		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
477 		return -1;
478 	}
479 
480 	/* all other error cases will be caught later */
481 	if (!isdigit(strval[len-1]))
482 		return -1;
483 
484 	/* split the optarg into separate socket values */
485 	arg_num = rte_strsplit(strval, len,
486 			arg, RTE_MAX_NUMA_NODES, ',');
487 
488 	/* if split failed, or 0 arguments */
489 	if (arg_num <= 0)
490 		return -1;
491 
492 	/* parse each defined socket option */
493 	errno = 0;
494 	for (i = 0; i < arg_num; i++) {
495 		uint64_t val;
496 		end = NULL;
497 		val = strtoull(arg[i], &end, 10);
498 
499 		/* check for invalid input */
500 		if ((errno != 0)  ||
501 				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
502 			return -1;
503 		val <<= 20;
504 		socket_arg[i] = val;
505 	}
506 
507 	return 0;
508 }
509 
510 static int
511 eal_parse_vfio_intr(const char *mode)
512 {
513 	struct internal_config *internal_conf =
514 		eal_get_internal_configuration();
515 	unsigned i;
516 	static struct {
517 		const char *name;
518 		enum rte_intr_mode value;
519 	} map[] = {
520 		{ "legacy", RTE_INTR_MODE_LEGACY },
521 		{ "msi", RTE_INTR_MODE_MSI },
522 		{ "msix", RTE_INTR_MODE_MSIX },
523 	};
524 
525 	for (i = 0; i < RTE_DIM(map); i++) {
526 		if (!strcmp(mode, map[i].name)) {
527 			internal_conf->vfio_intr_mode = map[i].value;
528 			return 0;
529 		}
530 	}
531 	return -1;
532 }
533 
534 static int
535 eal_parse_vfio_vf_token(const char *vf_token)
536 {
537 	struct internal_config *cfg = eal_get_internal_configuration();
538 	rte_uuid_t uuid;
539 
540 	if (!rte_uuid_parse(vf_token, uuid)) {
541 		rte_uuid_copy(cfg->vfio_vf_token, uuid);
542 		return 0;
543 	}
544 
545 	return -1;
546 }
547 
548 /* Parse the arguments for --log-level only */
549 static void
550 eal_log_level_parse(int argc, char **argv)
551 {
552 	int opt;
553 	char **argvopt;
554 	int option_index;
555 	const int old_optind = optind;
556 	const int old_optopt = optopt;
557 	char * const old_optarg = optarg;
558 	struct internal_config *internal_conf =
559 		eal_get_internal_configuration();
560 
561 	argvopt = argv;
562 	optind = 1;
563 
564 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
565 				  eal_long_options, &option_index)) != EOF) {
566 
567 		int ret;
568 
569 		/* getopt is not happy, stop right now */
570 		if (opt == '?')
571 			break;
572 
573 		ret = (opt == OPT_LOG_LEVEL_NUM) ?
574 			eal_parse_common_option(opt, optarg, internal_conf) : 0;
575 
576 		/* common parser is not happy */
577 		if (ret < 0)
578 			break;
579 	}
580 
581 	/* restore getopt lib */
582 	optind = old_optind;
583 	optopt = old_optopt;
584 	optarg = old_optarg;
585 }
586 
587 static int
588 eal_parse_huge_worker_stack(const char *arg)
589 {
590 	struct internal_config *cfg = eal_get_internal_configuration();
591 
592 	if (arg == NULL || arg[0] == '\0') {
593 		pthread_attr_t attr;
594 		int ret;
595 
596 		if (pthread_attr_init(&attr) != 0) {
597 			RTE_LOG(ERR, EAL, "Could not retrieve default stack size\n");
598 			return -1;
599 		}
600 		ret = pthread_attr_getstacksize(&attr, &cfg->huge_worker_stack_size);
601 		pthread_attr_destroy(&attr);
602 		if (ret != 0) {
603 			RTE_LOG(ERR, EAL, "Could not retrieve default stack size\n");
604 			return -1;
605 		}
606 	} else {
607 		unsigned long stack_size;
608 		char *end;
609 
610 		errno = 0;
611 		stack_size = strtoul(arg, &end, 10);
612 		if (errno || end == NULL || stack_size == 0 ||
613 				stack_size >= (size_t)-1 / 1024)
614 			return -1;
615 
616 		cfg->huge_worker_stack_size = stack_size * 1024;
617 	}
618 
619 	RTE_LOG(DEBUG, EAL, "Each worker thread will use %zu kB of DPDK memory as stack\n",
620 		cfg->huge_worker_stack_size / 1024);
621 	return 0;
622 }
623 
624 /* Parse the argument given in the command line of the application */
625 static int
626 eal_parse_args(int argc, char **argv)
627 {
628 	int opt, ret;
629 	char **argvopt;
630 	int option_index;
631 	char *prgname = argv[0];
632 	const int old_optind = optind;
633 	const int old_optopt = optopt;
634 	char * const old_optarg = optarg;
635 	struct internal_config *internal_conf =
636 		eal_get_internal_configuration();
637 
638 	argvopt = argv;
639 	optind = 1;
640 
641 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
642 				  eal_long_options, &option_index)) != EOF) {
643 
644 		/* getopt didn't recognise the option */
645 		if (opt == '?') {
646 			eal_usage(prgname);
647 			ret = -1;
648 			goto out;
649 		}
650 
651 		/* eal_log_level_parse() already handled this option */
652 		if (opt == OPT_LOG_LEVEL_NUM)
653 			continue;
654 
655 		ret = eal_parse_common_option(opt, optarg, internal_conf);
656 		/* common parser is not happy */
657 		if (ret < 0) {
658 			eal_usage(prgname);
659 			ret = -1;
660 			goto out;
661 		}
662 		/* common parser handled this option */
663 		if (ret == 0)
664 			continue;
665 
666 		switch (opt) {
667 		case 'h':
668 			eal_usage(prgname);
669 			exit(EXIT_SUCCESS);
670 
671 		case OPT_HUGE_DIR_NUM:
672 		{
673 			char *hdir = strdup(optarg);
674 			if (hdir == NULL)
675 				RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
676 			else {
677 				/* free old hugepage dir */
678 				free(internal_conf->hugepage_dir);
679 				internal_conf->hugepage_dir = hdir;
680 			}
681 			break;
682 		}
683 		case OPT_FILE_PREFIX_NUM:
684 		{
685 			char *prefix = strdup(optarg);
686 			if (prefix == NULL)
687 				RTE_LOG(ERR, EAL, "Could not store file prefix\n");
688 			else {
689 				/* free old prefix */
690 				free(internal_conf->hugefile_prefix);
691 				internal_conf->hugefile_prefix = prefix;
692 			}
693 			break;
694 		}
695 		case OPT_SOCKET_MEM_NUM:
696 			if (eal_parse_socket_arg(optarg,
697 					internal_conf->socket_mem) < 0) {
698 				RTE_LOG(ERR, EAL, "invalid parameters for --"
699 						OPT_SOCKET_MEM "\n");
700 				eal_usage(prgname);
701 				ret = -1;
702 				goto out;
703 			}
704 			internal_conf->force_sockets = 1;
705 			break;
706 
707 		case OPT_SOCKET_LIMIT_NUM:
708 			if (eal_parse_socket_arg(optarg,
709 					internal_conf->socket_limit) < 0) {
710 				RTE_LOG(ERR, EAL, "invalid parameters for --"
711 						OPT_SOCKET_LIMIT "\n");
712 				eal_usage(prgname);
713 				ret = -1;
714 				goto out;
715 			}
716 			internal_conf->force_socket_limits = 1;
717 			break;
718 
719 		case OPT_VFIO_INTR_NUM:
720 			if (eal_parse_vfio_intr(optarg) < 0) {
721 				RTE_LOG(ERR, EAL, "invalid parameters for --"
722 						OPT_VFIO_INTR "\n");
723 				eal_usage(prgname);
724 				ret = -1;
725 				goto out;
726 			}
727 			break;
728 
729 		case OPT_VFIO_VF_TOKEN_NUM:
730 			if (eal_parse_vfio_vf_token(optarg) < 0) {
731 				RTE_LOG(ERR, EAL, "invalid parameters for --"
732 						OPT_VFIO_VF_TOKEN "\n");
733 				eal_usage(prgname);
734 				ret = -1;
735 				goto out;
736 			}
737 			break;
738 
739 		case OPT_CREATE_UIO_DEV_NUM:
740 			internal_conf->create_uio_dev = 1;
741 			break;
742 
743 		case OPT_MBUF_POOL_OPS_NAME_NUM:
744 		{
745 			char *ops_name = strdup(optarg);
746 			if (ops_name == NULL)
747 				RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
748 			else {
749 				/* free old ops name */
750 				free(internal_conf->user_mbuf_pool_ops_name);
751 
752 				internal_conf->user_mbuf_pool_ops_name =
753 						ops_name;
754 			}
755 			break;
756 		}
757 		case OPT_MATCH_ALLOCATIONS_NUM:
758 			internal_conf->match_allocations = 1;
759 			break;
760 
761 		case OPT_HUGE_WORKER_STACK_NUM:
762 			if (eal_parse_huge_worker_stack(optarg) < 0) {
763 				RTE_LOG(ERR, EAL, "invalid parameter for --"
764 					OPT_HUGE_WORKER_STACK"\n");
765 				eal_usage(prgname);
766 				ret = -1;
767 				goto out;
768 			}
769 			break;
770 
771 		default:
772 			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
773 				RTE_LOG(ERR, EAL, "Option %c is not supported "
774 					"on Linux\n", opt);
775 			} else if (opt >= OPT_LONG_MIN_NUM &&
776 				   opt < OPT_LONG_MAX_NUM) {
777 				RTE_LOG(ERR, EAL, "Option %s is not supported "
778 					"on Linux\n",
779 					eal_long_options[option_index].name);
780 			} else {
781 				RTE_LOG(ERR, EAL, "Option %d is not supported "
782 					"on Linux\n", opt);
783 			}
784 			eal_usage(prgname);
785 			ret = -1;
786 			goto out;
787 		}
788 	}
789 
790 	/* create runtime data directory. In no_shconf mode, skip any errors */
791 	if (eal_create_runtime_dir() < 0) {
792 		if (internal_conf->no_shconf == 0) {
793 			RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
794 			ret = -1;
795 			goto out;
796 		} else
797 			RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n");
798 	}
799 
800 	if (eal_adjust_config(internal_conf) != 0) {
801 		ret = -1;
802 		goto out;
803 	}
804 
805 	/* sanity checks */
806 	if (eal_check_common_options(internal_conf) != 0) {
807 		eal_usage(prgname);
808 		ret = -1;
809 		goto out;
810 	}
811 
812 	if (optind >= 0)
813 		argv[optind-1] = prgname;
814 	ret = optind-1;
815 
816 out:
817 	/* restore getopt lib */
818 	optind = old_optind;
819 	optopt = old_optopt;
820 	optarg = old_optarg;
821 
822 	return ret;
823 }
824 
825 static int
826 check_socket(const struct rte_memseg_list *msl, void *arg)
827 {
828 	int *socket_id = arg;
829 
830 	if (msl->external)
831 		return 0;
832 
833 	return *socket_id == msl->socket_id;
834 }
835 
836 static void
837 eal_check_mem_on_local_socket(void)
838 {
839 	int socket_id;
840 	const struct rte_config *config = rte_eal_get_configuration();
841 
842 	socket_id = rte_lcore_to_socket_id(config->main_lcore);
843 
844 	if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
845 		RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n");
846 }
847 
848 static int
849 sync_func(__rte_unused void *arg)
850 {
851 	return 0;
852 }
853 
854 /*
855  * Request iopl privilege for all RPL, returns 0 on success
856  * iopl() call is mostly for the i386 architecture. For other architectures,
857  * return -1 to indicate IO privilege can't be changed in this way.
858  */
859 int
860 rte_eal_iopl_init(void)
861 {
862 #if defined(RTE_ARCH_X86)
863 	if (iopl(3) != 0)
864 		return -1;
865 #endif
866 	return 0;
867 }
868 
869 #ifdef VFIO_PRESENT
870 static int rte_eal_vfio_setup(void)
871 {
872 	if (rte_vfio_enable("vfio"))
873 		return -1;
874 
875 	return 0;
876 }
877 #endif
878 
879 static void rte_eal_init_alert(const char *msg)
880 {
881 	fprintf(stderr, "EAL: FATAL: %s\n", msg);
882 	RTE_LOG(ERR, EAL, "%s\n", msg);
883 }
884 
885 /*
886  * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the
887  * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel
888  * IOMMU groups. If IOMMU is not enabled, that path would be empty.
889  * Therefore, checking if the path is empty will tell us if IOMMU is enabled.
890  */
891 static bool
892 is_iommu_enabled(void)
893 {
894 	DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH);
895 	struct dirent *d;
896 	int n = 0;
897 
898 	/* if directory doesn't exist, assume IOMMU is not enabled */
899 	if (dir == NULL)
900 		return false;
901 
902 	while ((d = readdir(dir)) != NULL) {
903 		/* skip dot and dot-dot */
904 		if (++n > 2)
905 			break;
906 	}
907 	closedir(dir);
908 
909 	return n > 2;
910 }
911 
912 static int
913 eal_worker_thread_create(unsigned int lcore_id)
914 {
915 	pthread_attr_t *attrp = NULL;
916 	void *stack_ptr = NULL;
917 	pthread_attr_t attr;
918 	size_t stack_size;
919 	int ret = -1;
920 
921 	stack_size = eal_get_internal_configuration()->huge_worker_stack_size;
922 	if (stack_size != 0) {
923 		/* Allocate NUMA aware stack memory and set pthread attributes */
924 		stack_ptr = rte_zmalloc_socket("lcore_stack", stack_size,
925 			RTE_CACHE_LINE_SIZE, rte_lcore_to_socket_id(lcore_id));
926 		if (stack_ptr == NULL) {
927 			rte_eal_init_alert("Cannot allocate worker lcore stack memory");
928 			rte_errno = ENOMEM;
929 			goto out;
930 		}
931 
932 		if (pthread_attr_init(&attr) != 0) {
933 			rte_eal_init_alert("Cannot init pthread attributes");
934 			rte_errno = EFAULT;
935 			goto out;
936 		}
937 		attrp = &attr;
938 
939 		if (pthread_attr_setstack(attrp, stack_ptr, stack_size) != 0) {
940 			rte_eal_init_alert("Cannot set pthread stack attributes");
941 			rte_errno = EFAULT;
942 			goto out;
943 		}
944 	}
945 
946 	if (pthread_create(&lcore_config[lcore_id].thread_id, attrp,
947 			eal_thread_loop, (void *)(uintptr_t)lcore_id) == 0)
948 		ret = 0;
949 
950 out:
951 	if (ret != 0)
952 		rte_free(stack_ptr);
953 	if (attrp != NULL)
954 		pthread_attr_destroy(attrp);
955 	return ret;
956 }
957 
958 /* Launch threads, called at application init(). */
959 int
960 rte_eal_init(int argc, char **argv)
961 {
962 	int i, fctret, ret;
963 	static uint32_t run_once;
964 	uint32_t has_run = 0;
965 	const char *p;
966 	static char logid[PATH_MAX];
967 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
968 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
969 	bool phys_addrs;
970 	const struct rte_config *config = rte_eal_get_configuration();
971 	struct internal_config *internal_conf =
972 		eal_get_internal_configuration();
973 
974 	/* checks if the machine is adequate */
975 	if (!rte_cpu_is_supported()) {
976 		rte_eal_init_alert("unsupported cpu type.");
977 		rte_errno = ENOTSUP;
978 		return -1;
979 	}
980 
981 	if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0,
982 					__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
983 		rte_eal_init_alert("already called initialization.");
984 		rte_errno = EALREADY;
985 		return -1;
986 	}
987 
988 	p = strrchr(argv[0], '/');
989 	strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
990 
991 	eal_reset_internal_config(internal_conf);
992 
993 	/* set log level as early as possible */
994 	eal_log_level_parse(argc, argv);
995 
996 	/* clone argv to report out later in telemetry */
997 	eal_save_args(argc, argv);
998 
999 	if (rte_eal_cpu_init() < 0) {
1000 		rte_eal_init_alert("Cannot detect lcores.");
1001 		rte_errno = ENOTSUP;
1002 		return -1;
1003 	}
1004 
1005 	fctret = eal_parse_args(argc, argv);
1006 	if (fctret < 0) {
1007 		rte_eal_init_alert("Invalid 'command line' arguments.");
1008 		rte_errno = EINVAL;
1009 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1010 		return -1;
1011 	}
1012 
1013 	if (eal_plugins_init() < 0) {
1014 		rte_eal_init_alert("Cannot init plugins");
1015 		rte_errno = EINVAL;
1016 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1017 		return -1;
1018 	}
1019 
1020 	if (eal_trace_init() < 0) {
1021 		rte_eal_init_alert("Cannot init trace");
1022 		rte_errno = EFAULT;
1023 		return -1;
1024 	}
1025 
1026 	if (eal_option_device_parse()) {
1027 		rte_errno = ENODEV;
1028 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1029 		return -1;
1030 	}
1031 
1032 	if (rte_config_init() < 0) {
1033 		rte_eal_init_alert("Cannot init config");
1034 		return -1;
1035 	}
1036 
1037 	if (rte_eal_intr_init() < 0) {
1038 		rte_eal_init_alert("Cannot init interrupt-handling thread");
1039 		return -1;
1040 	}
1041 
1042 	if (rte_eal_alarm_init() < 0) {
1043 		rte_eal_init_alert("Cannot init alarm");
1044 		/* rte_eal_alarm_init sets rte_errno on failure. */
1045 		return -1;
1046 	}
1047 
1048 	/* Put mp channel init before bus scan so that we can init the vdev
1049 	 * bus through mp channel in the secondary process before the bus scan.
1050 	 */
1051 	if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
1052 		rte_eal_init_alert("failed to init mp channel");
1053 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1054 			rte_errno = EFAULT;
1055 			return -1;
1056 		}
1057 	}
1058 
1059 	/* register multi-process action callbacks for hotplug */
1060 	if (eal_mp_dev_hotplug_init() < 0) {
1061 		rte_eal_init_alert("failed to register mp callback for hotplug");
1062 		return -1;
1063 	}
1064 
1065 	if (rte_bus_scan()) {
1066 		rte_eal_init_alert("Cannot scan the buses for devices");
1067 		rte_errno = ENODEV;
1068 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1069 		return -1;
1070 	}
1071 
1072 	phys_addrs = rte_eal_using_phys_addrs() != 0;
1073 
1074 	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
1075 	if (internal_conf->iova_mode == RTE_IOVA_DC) {
1076 		/* autodetect the IOVA mapping mode */
1077 		enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
1078 
1079 		if (iova_mode == RTE_IOVA_DC) {
1080 			RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n");
1081 
1082 			if (!phys_addrs) {
1083 				/* if we have no access to physical addresses,
1084 				 * pick IOVA as VA mode.
1085 				 */
1086 				iova_mode = RTE_IOVA_VA;
1087 				RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n");
1088 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
1089 			} else if (rte_eal_check_module("rte_kni") == 1) {
1090 				iova_mode = RTE_IOVA_PA;
1091 				RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n");
1092 #endif
1093 			} else if (is_iommu_enabled()) {
1094 				/* we have an IOMMU, pick IOVA as VA mode */
1095 				iova_mode = RTE_IOVA_VA;
1096 				RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n");
1097 			} else {
1098 				/* physical addresses available, and no IOMMU
1099 				 * found, so pick IOVA as PA.
1100 				 */
1101 				iova_mode = RTE_IOVA_PA;
1102 				RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
1103 			}
1104 		}
1105 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
1106 		/* Workaround for KNI which requires physical address to work
1107 		 * in kernels < 4.10
1108 		 */
1109 		if (iova_mode == RTE_IOVA_VA &&
1110 				rte_eal_check_module("rte_kni") == 1) {
1111 			if (phys_addrs) {
1112 				iova_mode = RTE_IOVA_PA;
1113 				RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
1114 			} else {
1115 				RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
1116 			}
1117 		}
1118 #endif
1119 		rte_eal_get_configuration()->iova_mode = iova_mode;
1120 	} else {
1121 		rte_eal_get_configuration()->iova_mode =
1122 			internal_conf->iova_mode;
1123 	}
1124 
1125 	if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
1126 		rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
1127 		rte_errno = EINVAL;
1128 		return -1;
1129 	}
1130 
1131 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
1132 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
1133 
1134 	if (internal_conf->no_hugetlbfs == 0) {
1135 		/* rte_config isn't initialized yet */
1136 		ret = internal_conf->process_type == RTE_PROC_PRIMARY ?
1137 				eal_hugepage_info_init() :
1138 				eal_hugepage_info_read();
1139 		if (ret < 0) {
1140 			rte_eal_init_alert("Cannot get hugepage information.");
1141 			rte_errno = EACCES;
1142 			__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1143 			return -1;
1144 		}
1145 	}
1146 
1147 	if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) {
1148 		if (internal_conf->no_hugetlbfs)
1149 			internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE;
1150 	}
1151 
1152 	if (internal_conf->vmware_tsc_map == 1) {
1153 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
1154 		rte_cycles_vmware_tsc_map = 1;
1155 		RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
1156 				"you must have monitor_control.pseudo_perfctr = TRUE\n");
1157 #else
1158 		RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
1159 				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
1160 #endif
1161 	}
1162 
1163 	if (eal_log_init(logid, internal_conf->syslog_facility) < 0) {
1164 		rte_eal_init_alert("Cannot init logging.");
1165 		rte_errno = ENOMEM;
1166 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1167 		return -1;
1168 	}
1169 
1170 #ifdef VFIO_PRESENT
1171 	if (rte_eal_vfio_setup() < 0) {
1172 		rte_eal_init_alert("Cannot init VFIO");
1173 		rte_errno = EAGAIN;
1174 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1175 		return -1;
1176 	}
1177 #endif
1178 	/* in secondary processes, memory init may allocate additional fbarrays
1179 	 * not present in primary processes, so to avoid any potential issues,
1180 	 * initialize memzones first.
1181 	 */
1182 	if (rte_eal_memzone_init() < 0) {
1183 		rte_eal_init_alert("Cannot init memzone");
1184 		rte_errno = ENODEV;
1185 		return -1;
1186 	}
1187 
1188 	if (rte_eal_memory_init() < 0) {
1189 		rte_eal_init_alert("Cannot init memory");
1190 		rte_errno = ENOMEM;
1191 		return -1;
1192 	}
1193 
1194 	/* the directories are locked during eal_hugepage_info_init */
1195 	eal_hugedirs_unlock();
1196 
1197 	if (rte_eal_malloc_heap_init() < 0) {
1198 		rte_eal_init_alert("Cannot init malloc heap");
1199 		rte_errno = ENODEV;
1200 		return -1;
1201 	}
1202 
1203 	if (rte_eal_tailqs_init() < 0) {
1204 		rte_eal_init_alert("Cannot init tail queues for objects");
1205 		rte_errno = EFAULT;
1206 		return -1;
1207 	}
1208 
1209 	if (rte_eal_timer_init() < 0) {
1210 		rte_eal_init_alert("Cannot init HPET or TSC timers");
1211 		rte_errno = ENOTSUP;
1212 		return -1;
1213 	}
1214 
1215 	eal_check_mem_on_local_socket();
1216 
1217 	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
1218 			&lcore_config[config->main_lcore].cpuset) != 0) {
1219 		rte_eal_init_alert("Cannot set affinity");
1220 		rte_errno = EINVAL;
1221 		return -1;
1222 	}
1223 	__rte_thread_init(config->main_lcore,
1224 		&lcore_config[config->main_lcore].cpuset);
1225 
1226 	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
1227 	RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
1228 		config->main_lcore, (uintptr_t)pthread_self(), cpuset,
1229 		ret == 0 ? "" : "...");
1230 
1231 	RTE_LCORE_FOREACH_WORKER(i) {
1232 
1233 		/*
1234 		 * create communication pipes between main thread
1235 		 * and children
1236 		 */
1237 		if (pipe(lcore_config[i].pipe_main2worker) < 0)
1238 			rte_panic("Cannot create pipe\n");
1239 		if (pipe(lcore_config[i].pipe_worker2main) < 0)
1240 			rte_panic("Cannot create pipe\n");
1241 
1242 		lcore_config[i].state = WAIT;
1243 
1244 		/* create a thread for each lcore */
1245 		ret = eal_worker_thread_create(i);
1246 		if (ret != 0)
1247 			rte_panic("Cannot create thread\n");
1248 
1249 		/* Set thread_name for aid in debugging. */
1250 		snprintf(thread_name, sizeof(thread_name),
1251 			"rte-worker-%d", i);
1252 		ret = rte_thread_setname(lcore_config[i].thread_id,
1253 						thread_name);
1254 		if (ret != 0)
1255 			RTE_LOG(DEBUG, EAL,
1256 				"Cannot set name for lcore thread\n");
1257 
1258 		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
1259 			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
1260 		if (ret != 0)
1261 			rte_panic("Cannot set affinity\n");
1262 	}
1263 
1264 	/*
1265 	 * Launch a dummy function on all worker lcores, so that main lcore
1266 	 * knows they are all ready when this function returns.
1267 	 */
1268 	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN);
1269 	rte_eal_mp_wait_lcore();
1270 
1271 	/* initialize services so vdevs register service during bus_probe. */
1272 	ret = rte_service_init();
1273 	if (ret) {
1274 		rte_eal_init_alert("rte_service_init() failed");
1275 		rte_errno = -ret;
1276 		return -1;
1277 	}
1278 
1279 	/* Probe all the buses and devices/drivers on them */
1280 	if (rte_bus_probe()) {
1281 		rte_eal_init_alert("Cannot probe devices");
1282 		rte_errno = ENOTSUP;
1283 		return -1;
1284 	}
1285 
1286 #ifdef VFIO_PRESENT
1287 	/* Register mp action after probe() so that we got enough info */
1288 	if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
1289 		return -1;
1290 #endif
1291 
1292 	/* initialize default service/lcore mappings and start running. Ignore
1293 	 * -ENOTSUP, as it indicates no service coremask passed to EAL.
1294 	 */
1295 	ret = rte_service_start_with_defaults();
1296 	if (ret < 0 && ret != -ENOTSUP) {
1297 		rte_errno = -ret;
1298 		return -1;
1299 	}
1300 
1301 	/*
1302 	 * Clean up unused files in runtime directory. We do this at the end of
1303 	 * init and not at the beginning because we want to clean stuff up
1304 	 * whether we are primary or secondary process, but we cannot remove
1305 	 * primary process' files because secondary should be able to run even
1306 	 * if primary process is dead.
1307 	 *
1308 	 * In no_shconf mode, no runtime directory is created in the first
1309 	 * place, so no cleanup needed.
1310 	 */
1311 	if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) {
1312 		rte_eal_init_alert("Cannot clear runtime directory");
1313 		return -1;
1314 	}
1315 	if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) {
1316 		int tlog = rte_log_register_type_and_pick_level(
1317 				"lib.telemetry", RTE_LOG_WARNING);
1318 		if (tlog < 0)
1319 			tlog = RTE_LOGTYPE_EAL;
1320 		if (rte_telemetry_init(rte_eal_get_runtime_dir(),
1321 				rte_version(),
1322 				&internal_conf->ctrl_cpuset, rte_log, tlog) != 0)
1323 			return -1;
1324 	}
1325 
1326 	eal_mcfg_complete();
1327 
1328 	return fctret;
1329 }
1330 
1331 static int
1332 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
1333 		void *arg __rte_unused)
1334 {
1335 	/* ms is const, so find this memseg */
1336 	struct rte_memseg *found;
1337 
1338 	if (msl->external)
1339 		return 0;
1340 
1341 	found = rte_mem_virt2memseg(ms->addr, msl);
1342 
1343 	found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
1344 
1345 	return 0;
1346 }
1347 
1348 int
1349 rte_eal_cleanup(void)
1350 {
1351 	/* if we're in a primary process, we need to mark hugepages as freeable
1352 	 * so that finalization can release them back to the system.
1353 	 */
1354 	struct internal_config *internal_conf =
1355 		eal_get_internal_configuration();
1356 
1357 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
1358 			internal_conf->hugepage_file.unlink_existing)
1359 		rte_memseg_walk(mark_freeable, NULL);
1360 
1361 	rte_service_finalize();
1362 #ifdef VFIO_PRESENT
1363 	vfio_mp_sync_cleanup();
1364 #endif
1365 	rte_mp_channel_cleanup();
1366 	rte_trace_save();
1367 	eal_trace_fini();
1368 	/* after this point, any DPDK pointers will become dangling */
1369 	rte_eal_memory_detach();
1370 	eal_mp_dev_hotplug_cleanup();
1371 	rte_eal_malloc_heap_cleanup();
1372 	rte_eal_alarm_cleanup();
1373 	eal_cleanup_config(internal_conf);
1374 	rte_eal_log_cleanup();
1375 	return 0;
1376 }
1377 
1378 int rte_eal_create_uio_dev(void)
1379 {
1380 	const struct internal_config *internal_conf =
1381 		eal_get_internal_configuration();
1382 
1383 	return internal_conf->create_uio_dev;
1384 }
1385 
1386 enum rte_intr_mode
1387 rte_eal_vfio_intr_mode(void)
1388 {
1389 	const struct internal_config *internal_conf =
1390 		eal_get_internal_configuration();
1391 
1392 	return internal_conf->vfio_intr_mode;
1393 }
1394 
1395 void
1396 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token)
1397 {
1398 	struct internal_config *cfg = eal_get_internal_configuration();
1399 
1400 	rte_uuid_copy(vf_token, cfg->vfio_vf_token);
1401 }
1402 
1403 int
1404 rte_eal_check_module(const char *module_name)
1405 {
1406 	char sysfs_mod_name[PATH_MAX];
1407 	struct stat st;
1408 	int n;
1409 
1410 	if (NULL == module_name)
1411 		return -1;
1412 
1413 	/* Check if there is sysfs mounted */
1414 	if (stat("/sys/module", &st) != 0) {
1415 		RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
1416 			errno, strerror(errno));
1417 		return -1;
1418 	}
1419 
1420 	/* A module might be built-in, therefore try sysfs */
1421 	n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
1422 	if (n < 0 || n > PATH_MAX) {
1423 		RTE_LOG(DEBUG, EAL, "Could not format module path\n");
1424 		return -1;
1425 	}
1426 
1427 	if (stat(sysfs_mod_name, &st) != 0) {
1428 		RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
1429 		        sysfs_mod_name, errno, strerror(errno));
1430 		return 0;
1431 	}
1432 
1433 	/* Module has been found */
1434 	return 1;
1435 }
1436