xref: /dpdk/lib/eal/linux/eal.c (revision 3a80d7fb2ecdd6e8e48e56e3726b26980fa2a089)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation.
3  * Copyright(c) 2012-2014 6WIND S.A.
4  */
5 
6 #include <ctype.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <pthread.h>
13 #include <getopt.h>
14 #include <sys/file.h>
15 #include <dirent.h>
16 #include <fcntl.h>
17 #include <fnmatch.h>
18 #include <stddef.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23 #if defined(RTE_ARCH_X86)
24 #include <sys/io.h>
25 #endif
26 #include <linux/version.h>
27 
28 #include <rte_common.h>
29 #include <rte_debug.h>
30 #include <rte_memory.h>
31 #include <rte_launch.h>
32 #include <rte_eal.h>
33 #include <rte_errno.h>
34 #include <rte_lcore.h>
35 #include <rte_service_component.h>
36 #include <rte_log.h>
37 #include <rte_string_fns.h>
38 #include <rte_cpuflags.h>
39 #include <rte_bus.h>
40 #include <rte_version.h>
41 #include <malloc_heap.h>
42 #include <rte_vfio.h>
43 
44 #include <telemetry_internal.h>
45 #include "eal_private.h"
46 #include "eal_thread.h"
47 #include "eal_internal_cfg.h"
48 #include "eal_filesystem.h"
49 #include "eal_hugepages.h"
50 #include "eal_memcfg.h"
51 #include "eal_trace.h"
52 #include "eal_log.h"
53 #include "eal_options.h"
54 #include "eal_vfio.h"
55 #include "hotplug_mp.h"
56 
57 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
58 
59 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
60 
61 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups"
62 
63 /* define fd variable here, because file needs to be kept open for the
64  * duration of the program, as we hold a write lock on it in the primary proc */
65 static int mem_cfg_fd = -1;
66 
67 static struct flock wr_lock = {
68 		.l_type = F_WRLCK,
69 		.l_whence = SEEK_SET,
70 		.l_start = offsetof(struct rte_mem_config, memsegs),
71 		.l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs),
72 };
73 
74 /* internal configuration (per-core) */
75 struct lcore_config lcore_config[RTE_MAX_LCORE];
76 
77 /* used by rte_rdtsc() */
78 int rte_cycles_vmware_tsc_map;
79 
80 
81 int
82 eal_clean_runtime_dir(void)
83 {
84 	const char *runtime_dir = rte_eal_get_runtime_dir();
85 	DIR *dir;
86 	struct dirent *dirent;
87 	int dir_fd, fd, lck_result;
88 	static const char * const filters[] = {
89 		"fbarray_*",
90 		"mp_socket_*"
91 	};
92 
93 	/* open directory */
94 	dir = opendir(runtime_dir);
95 	if (!dir) {
96 		RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
97 				runtime_dir);
98 		goto error;
99 	}
100 	dir_fd = dirfd(dir);
101 
102 	/* lock the directory before doing anything, to avoid races */
103 	if (flock(dir_fd, LOCK_EX) < 0) {
104 		RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
105 			runtime_dir);
106 		goto error;
107 	}
108 
109 	dirent = readdir(dir);
110 	if (!dirent) {
111 		RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
112 				runtime_dir);
113 		goto error;
114 	}
115 
116 	while (dirent != NULL) {
117 		unsigned int f_idx;
118 		bool skip = true;
119 
120 		/* skip files that don't match the patterns */
121 		for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
122 			const char *filter = filters[f_idx];
123 
124 			if (fnmatch(filter, dirent->d_name, 0) == 0) {
125 				skip = false;
126 				break;
127 			}
128 		}
129 		if (skip) {
130 			dirent = readdir(dir);
131 			continue;
132 		}
133 
134 		/* try and lock the file */
135 		fd = openat(dir_fd, dirent->d_name, O_RDONLY);
136 
137 		/* skip to next file */
138 		if (fd == -1) {
139 			dirent = readdir(dir);
140 			continue;
141 		}
142 
143 		/* non-blocking lock */
144 		lck_result = flock(fd, LOCK_EX | LOCK_NB);
145 
146 		/* if lock succeeds, remove the file */
147 		if (lck_result != -1)
148 			unlinkat(dir_fd, dirent->d_name, 0);
149 		close(fd);
150 		dirent = readdir(dir);
151 	}
152 
153 	/* closedir closes dir_fd and drops the lock */
154 	closedir(dir);
155 	return 0;
156 
157 error:
158 	if (dir)
159 		closedir(dir);
160 
161 	RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
162 		strerror(errno));
163 
164 	return -1;
165 }
166 
167 
168 /* create memory configuration in shared/mmap memory. Take out
169  * a write lock on the memsegs, so we can auto-detect primary/secondary.
170  * This means we never close the file while running (auto-close on exit).
171  * We also don't lock the whole file, so that in future we can use read-locks
172  * on other parts, e.g. memzones, to detect if there are running secondary
173  * processes. */
174 static int
175 rte_eal_config_create(void)
176 {
177 	struct rte_config *config = rte_eal_get_configuration();
178 	size_t page_sz = sysconf(_SC_PAGE_SIZE);
179 	size_t cfg_len = sizeof(*config->mem_config);
180 	size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
181 	void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
182 	int retval;
183 	const struct internal_config *internal_conf =
184 		eal_get_internal_configuration();
185 
186 	const char *pathname = eal_runtime_config_path();
187 
188 	if (internal_conf->no_shconf)
189 		return 0;
190 
191 	/* map the config before hugepage address so that we don't waste a page */
192 	if (internal_conf->base_virtaddr != 0)
193 		rte_mem_cfg_addr = (void *)
194 			RTE_ALIGN_FLOOR(internal_conf->base_virtaddr -
195 			sizeof(struct rte_mem_config), page_sz);
196 	else
197 		rte_mem_cfg_addr = NULL;
198 
199 	if (mem_cfg_fd < 0){
200 		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
201 		if (mem_cfg_fd < 0) {
202 			RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
203 				pathname);
204 			return -1;
205 		}
206 	}
207 
208 	retval = ftruncate(mem_cfg_fd, cfg_len);
209 	if (retval < 0){
210 		close(mem_cfg_fd);
211 		mem_cfg_fd = -1;
212 		RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
213 			pathname);
214 		return -1;
215 	}
216 
217 	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
218 	if (retval < 0){
219 		close(mem_cfg_fd);
220 		mem_cfg_fd = -1;
221 		RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
222 			"process running?\n", pathname);
223 		return -1;
224 	}
225 
226 	/* reserve space for config */
227 	rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
228 			&cfg_len_aligned, page_sz, 0, 0);
229 	if (rte_mem_cfg_addr == NULL) {
230 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
231 		close(mem_cfg_fd);
232 		mem_cfg_fd = -1;
233 		return -1;
234 	}
235 
236 	/* remap the actual file into the space we've just reserved */
237 	mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
238 			cfg_len_aligned, PROT_READ | PROT_WRITE,
239 			MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
240 	if (mapped_mem_cfg_addr == MAP_FAILED) {
241 		munmap(rte_mem_cfg_addr, cfg_len);
242 		close(mem_cfg_fd);
243 		mem_cfg_fd = -1;
244 		RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
245 		return -1;
246 	}
247 
248 	memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config));
249 	config->mem_config = rte_mem_cfg_addr;
250 
251 	/* store address of the config in the config itself so that secondary
252 	 * processes could later map the config into this exact location
253 	 */
254 	config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
255 	config->mem_config->dma_maskbits = 0;
256 
257 	return 0;
258 }
259 
260 /* attach to an existing shared memory config */
261 static int
262 rte_eal_config_attach(void)
263 {
264 	struct rte_config *config = rte_eal_get_configuration();
265 	struct rte_mem_config *mem_config;
266 	const struct internal_config *internal_conf =
267 		eal_get_internal_configuration();
268 
269 	const char *pathname = eal_runtime_config_path();
270 
271 	if (internal_conf->no_shconf)
272 		return 0;
273 
274 	if (mem_cfg_fd < 0){
275 		mem_cfg_fd = open(pathname, O_RDWR);
276 		if (mem_cfg_fd < 0) {
277 			RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
278 				pathname);
279 			return -1;
280 		}
281 	}
282 
283 	/* map it as read-only first */
284 	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
285 			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
286 	if (mem_config == MAP_FAILED) {
287 		close(mem_cfg_fd);
288 		mem_cfg_fd = -1;
289 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
290 			errno, strerror(errno));
291 		return -1;
292 	}
293 
294 	config->mem_config = mem_config;
295 
296 	return 0;
297 }
298 
299 /* reattach the shared config at exact memory location primary process has it */
300 static int
301 rte_eal_config_reattach(void)
302 {
303 	struct rte_config *config = rte_eal_get_configuration();
304 	struct rte_mem_config *mem_config;
305 	void *rte_mem_cfg_addr;
306 	const struct internal_config *internal_conf =
307 		eal_get_internal_configuration();
308 
309 	if (internal_conf->no_shconf)
310 		return 0;
311 
312 	/* save the address primary process has mapped shared config to */
313 	rte_mem_cfg_addr =
314 		(void *) (uintptr_t) config->mem_config->mem_cfg_addr;
315 
316 	/* unmap original config */
317 	munmap(config->mem_config, sizeof(struct rte_mem_config));
318 
319 	/* remap the config at proper address */
320 	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
321 			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
322 			mem_cfg_fd, 0);
323 
324 	close(mem_cfg_fd);
325 	mem_cfg_fd = -1;
326 
327 	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
328 		if (mem_config != MAP_FAILED) {
329 			/* errno is stale, don't use */
330 			RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
331 				" - please use '--" OPT_BASE_VIRTADDR
332 				"' option\n", rte_mem_cfg_addr, mem_config);
333 			munmap(mem_config, sizeof(struct rte_mem_config));
334 			return -1;
335 		}
336 		RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
337 			errno, strerror(errno));
338 		return -1;
339 	}
340 
341 	config->mem_config = mem_config;
342 
343 	return 0;
344 }
345 
346 /* Detect if we are a primary or a secondary process */
347 enum rte_proc_type_t
348 eal_proc_type_detect(void)
349 {
350 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
351 	const char *pathname = eal_runtime_config_path();
352 	const struct internal_config *internal_conf =
353 		eal_get_internal_configuration();
354 
355 	/* if there no shared config, there can be no secondary processes */
356 	if (!internal_conf->no_shconf) {
357 		/* if we can open the file but not get a write-lock we are a
358 		 * secondary process. NOTE: if we get a file handle back, we
359 		 * keep that open and don't close it to prevent a race condition
360 		 * between multiple opens.
361 		 */
362 		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
363 				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
364 			ptype = RTE_PROC_SECONDARY;
365 	}
366 
367 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
368 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
369 
370 	return ptype;
371 }
372 
373 /* Sets up rte_config structure with the pointer to shared memory config.*/
374 static int
375 rte_config_init(void)
376 {
377 	struct rte_config *config = rte_eal_get_configuration();
378 	const struct internal_config *internal_conf =
379 		eal_get_internal_configuration();
380 
381 	config->process_type = internal_conf->process_type;
382 
383 	switch (config->process_type) {
384 	case RTE_PROC_PRIMARY:
385 		if (rte_eal_config_create() < 0)
386 			return -1;
387 		eal_mcfg_update_from_internal();
388 		break;
389 	case RTE_PROC_SECONDARY:
390 		if (rte_eal_config_attach() < 0)
391 			return -1;
392 		eal_mcfg_wait_complete();
393 		if (eal_mcfg_check_version() < 0) {
394 			RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
395 			return -1;
396 		}
397 		if (rte_eal_config_reattach() < 0)
398 			return -1;
399 		if (!__rte_mp_enable()) {
400 			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
401 			return -1;
402 		}
403 		eal_mcfg_update_internal();
404 		break;
405 	case RTE_PROC_AUTO:
406 	case RTE_PROC_INVALID:
407 		RTE_LOG(ERR, EAL, "Invalid process type %d\n",
408 			config->process_type);
409 		return -1;
410 	}
411 
412 	return 0;
413 }
414 
415 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
416 static void
417 eal_hugedirs_unlock(void)
418 {
419 	int i;
420 	struct internal_config *internal_conf =
421 		eal_get_internal_configuration();
422 
423 	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
424 	{
425 		/* skip uninitialized */
426 		if (internal_conf->hugepage_info[i].lock_descriptor < 0)
427 			continue;
428 		/* unlock hugepage file */
429 		flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN);
430 		close(internal_conf->hugepage_info[i].lock_descriptor);
431 		/* reset the field */
432 		internal_conf->hugepage_info[i].lock_descriptor = -1;
433 	}
434 }
435 
436 /* display usage */
437 static void
438 eal_usage(const char *prgname)
439 {
440 	rte_usage_hook_t hook = eal_get_application_usage_hook();
441 
442 	printf("\nUsage: %s ", prgname);
443 	eal_common_usage();
444 	printf("EAL Linux options:\n"
445 	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
446 	       "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
447 	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
448 	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
449 	       "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
450 	       "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
451 	       "  --"OPT_VFIO_VF_TOKEN"     VF token (UUID) shared between SR-IOV PF and VFs\n"
452 	       "  --"OPT_LEGACY_MEM"        Legacy memory mode (no dynamic allocation, contiguous segments)\n"
453 	       "  --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
454 	       "  --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n"
455 	       "  --"OPT_HUGE_WORKER_STACK"[=size]\n"
456 	       "                      Allocate worker thread stacks from hugepage memory.\n"
457 	       "                      Size is in units of kbytes and defaults to system\n"
458 	       "                      thread stack size if not specified.\n"
459 	       "\n");
460 	/* Allow the application to print its usage message too if hook is set */
461 	if (hook) {
462 		printf("===== Application Usage =====\n\n");
463 		(hook)(prgname);
464 	}
465 }
466 
467 static int
468 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
469 {
470 	char * arg[RTE_MAX_NUMA_NODES];
471 	char *end;
472 	int arg_num, i, len;
473 
474 	len = strnlen(strval, SOCKET_MEM_STRLEN);
475 	if (len == SOCKET_MEM_STRLEN) {
476 		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
477 		return -1;
478 	}
479 
480 	/* all other error cases will be caught later */
481 	if (!isdigit(strval[len-1]))
482 		return -1;
483 
484 	/* split the optarg into separate socket values */
485 	arg_num = rte_strsplit(strval, len,
486 			arg, RTE_MAX_NUMA_NODES, ',');
487 
488 	/* if split failed, or 0 arguments */
489 	if (arg_num <= 0)
490 		return -1;
491 
492 	/* parse each defined socket option */
493 	errno = 0;
494 	for (i = 0; i < arg_num; i++) {
495 		uint64_t val;
496 		end = NULL;
497 		val = strtoull(arg[i], &end, 10);
498 
499 		/* check for invalid input */
500 		if ((errno != 0)  ||
501 				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
502 			return -1;
503 		val <<= 20;
504 		socket_arg[i] = val;
505 	}
506 
507 	return 0;
508 }
509 
510 static int
511 eal_parse_vfio_intr(const char *mode)
512 {
513 	struct internal_config *internal_conf =
514 		eal_get_internal_configuration();
515 	unsigned i;
516 	static struct {
517 		const char *name;
518 		enum rte_intr_mode value;
519 	} map[] = {
520 		{ "legacy", RTE_INTR_MODE_LEGACY },
521 		{ "msi", RTE_INTR_MODE_MSI },
522 		{ "msix", RTE_INTR_MODE_MSIX },
523 	};
524 
525 	for (i = 0; i < RTE_DIM(map); i++) {
526 		if (!strcmp(mode, map[i].name)) {
527 			internal_conf->vfio_intr_mode = map[i].value;
528 			return 0;
529 		}
530 	}
531 	return -1;
532 }
533 
534 static int
535 eal_parse_vfio_vf_token(const char *vf_token)
536 {
537 	struct internal_config *cfg = eal_get_internal_configuration();
538 	rte_uuid_t uuid;
539 
540 	if (!rte_uuid_parse(vf_token, uuid)) {
541 		rte_uuid_copy(cfg->vfio_vf_token, uuid);
542 		return 0;
543 	}
544 
545 	return -1;
546 }
547 
548 /* Parse the arguments for --log-level only */
549 static void
550 eal_log_level_parse(int argc, char **argv)
551 {
552 	int opt;
553 	char **argvopt;
554 	int option_index;
555 	const int old_optind = optind;
556 	const int old_optopt = optopt;
557 	char * const old_optarg = optarg;
558 	struct internal_config *internal_conf =
559 		eal_get_internal_configuration();
560 
561 	argvopt = argv;
562 	optind = 1;
563 
564 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
565 				  eal_long_options, &option_index)) != EOF) {
566 
567 		int ret;
568 
569 		/* getopt is not happy, stop right now */
570 		if (opt == '?')
571 			break;
572 
573 		ret = (opt == OPT_LOG_LEVEL_NUM) ?
574 			eal_parse_common_option(opt, optarg, internal_conf) : 0;
575 
576 		/* common parser is not happy */
577 		if (ret < 0)
578 			break;
579 	}
580 
581 	/* restore getopt lib */
582 	optind = old_optind;
583 	optopt = old_optopt;
584 	optarg = old_optarg;
585 }
586 
587 static int
588 eal_parse_huge_worker_stack(const char *arg)
589 {
590 	struct internal_config *cfg = eal_get_internal_configuration();
591 
592 	if (arg == NULL || arg[0] == '\0') {
593 		pthread_attr_t attr;
594 		int ret;
595 
596 		if (pthread_attr_init(&attr) != 0) {
597 			RTE_LOG(ERR, EAL, "Could not retrieve default stack size\n");
598 			return -1;
599 		}
600 		ret = pthread_attr_getstacksize(&attr, &cfg->huge_worker_stack_size);
601 		pthread_attr_destroy(&attr);
602 		if (ret != 0) {
603 			RTE_LOG(ERR, EAL, "Could not retrieve default stack size\n");
604 			return -1;
605 		}
606 	} else {
607 		unsigned long stack_size;
608 		char *end;
609 
610 		errno = 0;
611 		stack_size = strtoul(arg, &end, 10);
612 		if (errno || end == NULL || stack_size == 0 ||
613 				stack_size >= (size_t)-1 / 1024)
614 			return -1;
615 
616 		cfg->huge_worker_stack_size = stack_size * 1024;
617 	}
618 
619 	RTE_LOG(DEBUG, EAL, "Each worker thread will use %zu kB of DPDK memory as stack\n",
620 		cfg->huge_worker_stack_size / 1024);
621 	return 0;
622 }
623 
624 /* Parse the argument given in the command line of the application */
625 static int
626 eal_parse_args(int argc, char **argv)
627 {
628 	int opt, ret;
629 	char **argvopt;
630 	int option_index;
631 	char *prgname = argv[0];
632 	const int old_optind = optind;
633 	const int old_optopt = optopt;
634 	char * const old_optarg = optarg;
635 	struct internal_config *internal_conf =
636 		eal_get_internal_configuration();
637 
638 	argvopt = argv;
639 	optind = 1;
640 
641 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
642 				  eal_long_options, &option_index)) != EOF) {
643 
644 		/* getopt didn't recognise the option */
645 		if (opt == '?') {
646 			eal_usage(prgname);
647 			ret = -1;
648 			goto out;
649 		}
650 
651 		/* eal_log_level_parse() already handled this option */
652 		if (opt == OPT_LOG_LEVEL_NUM)
653 			continue;
654 
655 		ret = eal_parse_common_option(opt, optarg, internal_conf);
656 		/* common parser is not happy */
657 		if (ret < 0) {
658 			eal_usage(prgname);
659 			ret = -1;
660 			goto out;
661 		}
662 		/* common parser handled this option */
663 		if (ret == 0)
664 			continue;
665 
666 		switch (opt) {
667 		case 'h':
668 			eal_usage(prgname);
669 			exit(EXIT_SUCCESS);
670 
671 		case OPT_HUGE_DIR_NUM:
672 		{
673 			char *hdir = strdup(optarg);
674 			if (hdir == NULL)
675 				RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
676 			else {
677 				/* free old hugepage dir */
678 				free(internal_conf->hugepage_dir);
679 				internal_conf->hugepage_dir = hdir;
680 			}
681 			break;
682 		}
683 		case OPT_FILE_PREFIX_NUM:
684 		{
685 			char *prefix = strdup(optarg);
686 			if (prefix == NULL)
687 				RTE_LOG(ERR, EAL, "Could not store file prefix\n");
688 			else {
689 				/* free old prefix */
690 				free(internal_conf->hugefile_prefix);
691 				internal_conf->hugefile_prefix = prefix;
692 			}
693 			break;
694 		}
695 		case OPT_SOCKET_MEM_NUM:
696 			if (eal_parse_socket_arg(optarg,
697 					internal_conf->socket_mem) < 0) {
698 				RTE_LOG(ERR, EAL, "invalid parameters for --"
699 						OPT_SOCKET_MEM "\n");
700 				eal_usage(prgname);
701 				ret = -1;
702 				goto out;
703 			}
704 			internal_conf->force_sockets = 1;
705 			break;
706 
707 		case OPT_SOCKET_LIMIT_NUM:
708 			if (eal_parse_socket_arg(optarg,
709 					internal_conf->socket_limit) < 0) {
710 				RTE_LOG(ERR, EAL, "invalid parameters for --"
711 						OPT_SOCKET_LIMIT "\n");
712 				eal_usage(prgname);
713 				ret = -1;
714 				goto out;
715 			}
716 			internal_conf->force_socket_limits = 1;
717 			break;
718 
719 		case OPT_VFIO_INTR_NUM:
720 			if (eal_parse_vfio_intr(optarg) < 0) {
721 				RTE_LOG(ERR, EAL, "invalid parameters for --"
722 						OPT_VFIO_INTR "\n");
723 				eal_usage(prgname);
724 				ret = -1;
725 				goto out;
726 			}
727 			break;
728 
729 		case OPT_VFIO_VF_TOKEN_NUM:
730 			if (eal_parse_vfio_vf_token(optarg) < 0) {
731 				RTE_LOG(ERR, EAL, "invalid parameters for --"
732 						OPT_VFIO_VF_TOKEN "\n");
733 				eal_usage(prgname);
734 				ret = -1;
735 				goto out;
736 			}
737 			break;
738 
739 		case OPT_CREATE_UIO_DEV_NUM:
740 			internal_conf->create_uio_dev = 1;
741 			break;
742 
743 		case OPT_MBUF_POOL_OPS_NAME_NUM:
744 		{
745 			char *ops_name = strdup(optarg);
746 			if (ops_name == NULL)
747 				RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
748 			else {
749 				/* free old ops name */
750 				free(internal_conf->user_mbuf_pool_ops_name);
751 
752 				internal_conf->user_mbuf_pool_ops_name =
753 						ops_name;
754 			}
755 			break;
756 		}
757 		case OPT_MATCH_ALLOCATIONS_NUM:
758 			internal_conf->match_allocations = 1;
759 			break;
760 
761 		case OPT_HUGE_WORKER_STACK_NUM:
762 			if (eal_parse_huge_worker_stack(optarg) < 0) {
763 				RTE_LOG(ERR, EAL, "invalid parameter for --"
764 					OPT_HUGE_WORKER_STACK"\n");
765 				eal_usage(prgname);
766 				ret = -1;
767 				goto out;
768 			}
769 			break;
770 
771 		default:
772 			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
773 				RTE_LOG(ERR, EAL, "Option %c is not supported "
774 					"on Linux\n", opt);
775 			} else if (opt >= OPT_LONG_MIN_NUM &&
776 				   opt < OPT_LONG_MAX_NUM) {
777 				RTE_LOG(ERR, EAL, "Option %s is not supported "
778 					"on Linux\n",
779 					eal_long_options[option_index].name);
780 			} else {
781 				RTE_LOG(ERR, EAL, "Option %d is not supported "
782 					"on Linux\n", opt);
783 			}
784 			eal_usage(prgname);
785 			ret = -1;
786 			goto out;
787 		}
788 	}
789 
790 	/* create runtime data directory. In no_shconf mode, skip any errors */
791 	if (eal_create_runtime_dir() < 0) {
792 		if (internal_conf->no_shconf == 0) {
793 			RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
794 			ret = -1;
795 			goto out;
796 		} else
797 			RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n");
798 	}
799 
800 	if (eal_adjust_config(internal_conf) != 0) {
801 		ret = -1;
802 		goto out;
803 	}
804 
805 	/* sanity checks */
806 	if (eal_check_common_options(internal_conf) != 0) {
807 		eal_usage(prgname);
808 		ret = -1;
809 		goto out;
810 	}
811 
812 	if (optind >= 0)
813 		argv[optind-1] = prgname;
814 	ret = optind-1;
815 
816 out:
817 	/* restore getopt lib */
818 	optind = old_optind;
819 	optopt = old_optopt;
820 	optarg = old_optarg;
821 
822 	return ret;
823 }
824 
825 static int
826 check_socket(const struct rte_memseg_list *msl, void *arg)
827 {
828 	int *socket_id = arg;
829 
830 	if (msl->external)
831 		return 0;
832 
833 	return *socket_id == msl->socket_id;
834 }
835 
836 static void
837 eal_check_mem_on_local_socket(void)
838 {
839 	int socket_id;
840 	const struct rte_config *config = rte_eal_get_configuration();
841 
842 	socket_id = rte_lcore_to_socket_id(config->main_lcore);
843 
844 	if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
845 		RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n");
846 }
847 
848 static int
849 sync_func(__rte_unused void *arg)
850 {
851 	return 0;
852 }
853 
854 /*
855  * Request iopl privilege for all RPL, returns 0 on success
856  * iopl() call is mostly for the i386 architecture. For other architectures,
857  * return -1 to indicate IO privilege can't be changed in this way.
858  */
859 int
860 rte_eal_iopl_init(void)
861 {
862 #if defined(RTE_ARCH_X86)
863 	if (iopl(3) != 0)
864 		return -1;
865 #endif
866 	return 0;
867 }
868 
869 #ifdef VFIO_PRESENT
870 static int rte_eal_vfio_setup(void)
871 {
872 	if (rte_vfio_enable("vfio"))
873 		return -1;
874 
875 	return 0;
876 }
877 #endif
878 
879 static void rte_eal_init_alert(const char *msg)
880 {
881 	fprintf(stderr, "EAL: FATAL: %s\n", msg);
882 	RTE_LOG(ERR, EAL, "%s\n", msg);
883 }
884 
885 /*
886  * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the
887  * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel
888  * IOMMU groups. If IOMMU is not enabled, that path would be empty.
889  * Therefore, checking if the path is empty will tell us if IOMMU is enabled.
890  */
891 static bool
892 is_iommu_enabled(void)
893 {
894 	DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH);
895 	struct dirent *d;
896 	int n = 0;
897 
898 	/* if directory doesn't exist, assume IOMMU is not enabled */
899 	if (dir == NULL)
900 		return false;
901 
902 	while ((d = readdir(dir)) != NULL) {
903 		/* skip dot and dot-dot */
904 		if (++n > 2)
905 			break;
906 	}
907 	closedir(dir);
908 
909 	return n > 2;
910 }
911 
912 static __rte_noreturn void *
913 eal_worker_thread_loop(void *arg)
914 {
915 	eal_thread_loop(arg);
916 }
917 
918 static int
919 eal_worker_thread_create(unsigned int lcore_id)
920 {
921 	pthread_attr_t *attrp = NULL;
922 	void *stack_ptr = NULL;
923 	pthread_attr_t attr;
924 	size_t stack_size;
925 	int ret = -1;
926 
927 	stack_size = eal_get_internal_configuration()->huge_worker_stack_size;
928 	if (stack_size != 0) {
929 		/* Allocate NUMA aware stack memory and set pthread attributes */
930 		stack_ptr = rte_zmalloc_socket("lcore_stack", stack_size,
931 			RTE_CACHE_LINE_SIZE, rte_lcore_to_socket_id(lcore_id));
932 		if (stack_ptr == NULL) {
933 			rte_eal_init_alert("Cannot allocate worker lcore stack memory");
934 			rte_errno = ENOMEM;
935 			goto out;
936 		}
937 
938 		if (pthread_attr_init(&attr) != 0) {
939 			rte_eal_init_alert("Cannot init pthread attributes");
940 			rte_errno = EFAULT;
941 			goto out;
942 		}
943 		attrp = &attr;
944 
945 		if (pthread_attr_setstack(attrp, stack_ptr, stack_size) != 0) {
946 			rte_eal_init_alert("Cannot set pthread stack attributes");
947 			rte_errno = EFAULT;
948 			goto out;
949 		}
950 	}
951 
952 	if (pthread_create((pthread_t *)&lcore_config[lcore_id].thread_id.opaque_id,
953 			attrp, eal_worker_thread_loop, (void *)(uintptr_t)lcore_id) == 0)
954 		ret = 0;
955 
956 out:
957 	if (ret != 0)
958 		rte_free(stack_ptr);
959 	if (attrp != NULL)
960 		pthread_attr_destroy(attrp);
961 	return ret;
962 }
963 
964 /* Launch threads, called at application init(). */
965 int
966 rte_eal_init(int argc, char **argv)
967 {
968 	int i, fctret, ret;
969 	static uint32_t run_once;
970 	uint32_t has_run = 0;
971 	const char *p;
972 	static char logid[PATH_MAX];
973 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
974 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
975 	bool phys_addrs;
976 	const struct rte_config *config = rte_eal_get_configuration();
977 	struct internal_config *internal_conf =
978 		eal_get_internal_configuration();
979 
980 	/* checks if the machine is adequate */
981 	if (!rte_cpu_is_supported()) {
982 		rte_eal_init_alert("unsupported cpu type.");
983 		rte_errno = ENOTSUP;
984 		return -1;
985 	}
986 
987 	if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0,
988 					__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
989 		rte_eal_init_alert("already called initialization.");
990 		rte_errno = EALREADY;
991 		return -1;
992 	}
993 
994 	p = strrchr(argv[0], '/');
995 	strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
996 
997 	eal_reset_internal_config(internal_conf);
998 
999 	/* set log level as early as possible */
1000 	eal_log_level_parse(argc, argv);
1001 
1002 	/* clone argv to report out later in telemetry */
1003 	eal_save_args(argc, argv);
1004 
1005 	if (rte_eal_cpu_init() < 0) {
1006 		rte_eal_init_alert("Cannot detect lcores.");
1007 		rte_errno = ENOTSUP;
1008 		return -1;
1009 	}
1010 
1011 	fctret = eal_parse_args(argc, argv);
1012 	if (fctret < 0) {
1013 		rte_eal_init_alert("Invalid 'command line' arguments.");
1014 		rte_errno = EINVAL;
1015 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1016 		return -1;
1017 	}
1018 
1019 	if (eal_plugins_init() < 0) {
1020 		rte_eal_init_alert("Cannot init plugins");
1021 		rte_errno = EINVAL;
1022 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1023 		return -1;
1024 	}
1025 
1026 	if (eal_trace_init() < 0) {
1027 		rte_eal_init_alert("Cannot init trace");
1028 		rte_errno = EFAULT;
1029 		return -1;
1030 	}
1031 
1032 	if (eal_option_device_parse()) {
1033 		rte_errno = ENODEV;
1034 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1035 		return -1;
1036 	}
1037 
1038 	if (rte_config_init() < 0) {
1039 		rte_eal_init_alert("Cannot init config");
1040 		return -1;
1041 	}
1042 
1043 	if (rte_eal_intr_init() < 0) {
1044 		rte_eal_init_alert("Cannot init interrupt-handling thread");
1045 		return -1;
1046 	}
1047 
1048 	if (rte_eal_alarm_init() < 0) {
1049 		rte_eal_init_alert("Cannot init alarm");
1050 		/* rte_eal_alarm_init sets rte_errno on failure. */
1051 		return -1;
1052 	}
1053 
1054 	/* Put mp channel init before bus scan so that we can init the vdev
1055 	 * bus through mp channel in the secondary process before the bus scan.
1056 	 */
1057 	if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
1058 		rte_eal_init_alert("failed to init mp channel");
1059 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1060 			rte_errno = EFAULT;
1061 			return -1;
1062 		}
1063 	}
1064 
1065 	/* register multi-process action callbacks for hotplug */
1066 	if (eal_mp_dev_hotplug_init() < 0) {
1067 		rte_eal_init_alert("failed to register mp callback for hotplug");
1068 		return -1;
1069 	}
1070 
1071 	if (rte_bus_scan()) {
1072 		rte_eal_init_alert("Cannot scan the buses for devices");
1073 		rte_errno = ENODEV;
1074 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1075 		return -1;
1076 	}
1077 
1078 	phys_addrs = rte_eal_using_phys_addrs() != 0;
1079 
1080 	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
1081 	if (internal_conf->iova_mode == RTE_IOVA_DC) {
1082 		/* autodetect the IOVA mapping mode */
1083 		enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
1084 
1085 		if (iova_mode == RTE_IOVA_DC) {
1086 			RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n");
1087 
1088 			if (!phys_addrs) {
1089 				/* if we have no access to physical addresses,
1090 				 * pick IOVA as VA mode.
1091 				 */
1092 				iova_mode = RTE_IOVA_VA;
1093 				RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n");
1094 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
1095 			} else if (rte_eal_check_module("rte_kni") == 1) {
1096 				iova_mode = RTE_IOVA_PA;
1097 				RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n");
1098 #endif
1099 			} else if (is_iommu_enabled()) {
1100 				/* we have an IOMMU, pick IOVA as VA mode */
1101 				iova_mode = RTE_IOVA_VA;
1102 				RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n");
1103 			} else {
1104 				/* physical addresses available, and no IOMMU
1105 				 * found, so pick IOVA as PA.
1106 				 */
1107 				iova_mode = RTE_IOVA_PA;
1108 				RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
1109 			}
1110 		}
1111 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
1112 		/* Workaround for KNI which requires physical address to work
1113 		 * in kernels < 4.10
1114 		 */
1115 		if (iova_mode == RTE_IOVA_VA &&
1116 				rte_eal_check_module("rte_kni") == 1) {
1117 			if (phys_addrs) {
1118 				iova_mode = RTE_IOVA_PA;
1119 				RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
1120 			} else {
1121 				RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
1122 			}
1123 		}
1124 #endif
1125 		rte_eal_get_configuration()->iova_mode = iova_mode;
1126 	} else {
1127 		rte_eal_get_configuration()->iova_mode =
1128 			internal_conf->iova_mode;
1129 	}
1130 
1131 	if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
1132 		rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
1133 		rte_errno = EINVAL;
1134 		return -1;
1135 	}
1136 
1137 	if (rte_eal_iova_mode() == RTE_IOVA_PA && !RTE_IOVA_AS_PA) {
1138 		rte_eal_init_alert("Cannot use IOVA as 'PA' as it is disabled during build");
1139 		rte_errno = EINVAL;
1140 		return -1;
1141 	}
1142 
1143 	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
1144 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
1145 
1146 	if (internal_conf->no_hugetlbfs == 0) {
1147 		/* rte_config isn't initialized yet */
1148 		ret = internal_conf->process_type == RTE_PROC_PRIMARY ?
1149 				eal_hugepage_info_init() :
1150 				eal_hugepage_info_read();
1151 		if (ret < 0) {
1152 			rte_eal_init_alert("Cannot get hugepage information.");
1153 			rte_errno = EACCES;
1154 			__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1155 			return -1;
1156 		}
1157 	}
1158 
1159 	if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) {
1160 		if (internal_conf->no_hugetlbfs)
1161 			internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE;
1162 	}
1163 
1164 	if (internal_conf->vmware_tsc_map == 1) {
1165 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
1166 		rte_cycles_vmware_tsc_map = 1;
1167 		RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
1168 				"you must have monitor_control.pseudo_perfctr = TRUE\n");
1169 #else
1170 		RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
1171 				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
1172 #endif
1173 	}
1174 
1175 	if (eal_log_init(logid, internal_conf->syslog_facility) < 0) {
1176 		rte_eal_init_alert("Cannot init logging.");
1177 		rte_errno = ENOMEM;
1178 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1179 		return -1;
1180 	}
1181 
1182 #ifdef VFIO_PRESENT
1183 	if (rte_eal_vfio_setup() < 0) {
1184 		rte_eal_init_alert("Cannot init VFIO");
1185 		rte_errno = EAGAIN;
1186 		__atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
1187 		return -1;
1188 	}
1189 #endif
1190 	/* in secondary processes, memory init may allocate additional fbarrays
1191 	 * not present in primary processes, so to avoid any potential issues,
1192 	 * initialize memzones first.
1193 	 */
1194 	if (rte_eal_memzone_init() < 0) {
1195 		rte_eal_init_alert("Cannot init memzone");
1196 		rte_errno = ENODEV;
1197 		return -1;
1198 	}
1199 
1200 	if (rte_eal_memory_init() < 0) {
1201 		rte_eal_init_alert("Cannot init memory");
1202 		rte_errno = ENOMEM;
1203 		return -1;
1204 	}
1205 
1206 	/* the directories are locked during eal_hugepage_info_init */
1207 	eal_hugedirs_unlock();
1208 
1209 	if (rte_eal_malloc_heap_init() < 0) {
1210 		rte_eal_init_alert("Cannot init malloc heap");
1211 		rte_errno = ENODEV;
1212 		return -1;
1213 	}
1214 
1215 	if (rte_eal_tailqs_init() < 0) {
1216 		rte_eal_init_alert("Cannot init tail queues for objects");
1217 		rte_errno = EFAULT;
1218 		return -1;
1219 	}
1220 
1221 	if (rte_eal_timer_init() < 0) {
1222 		rte_eal_init_alert("Cannot init HPET or TSC timers");
1223 		rte_errno = ENOTSUP;
1224 		return -1;
1225 	}
1226 
1227 	eal_check_mem_on_local_socket();
1228 
1229 	if (rte_thread_set_affinity_by_id(rte_thread_self(),
1230 			&lcore_config[config->main_lcore].cpuset) != 0) {
1231 		rte_eal_init_alert("Cannot set affinity");
1232 		rte_errno = EINVAL;
1233 		return -1;
1234 	}
1235 	__rte_thread_init(config->main_lcore,
1236 		&lcore_config[config->main_lcore].cpuset);
1237 
1238 	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
1239 	RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
1240 		config->main_lcore, (uintptr_t)pthread_self(), cpuset,
1241 		ret == 0 ? "" : "...");
1242 
1243 	RTE_LCORE_FOREACH_WORKER(i) {
1244 
1245 		/*
1246 		 * create communication pipes between main thread
1247 		 * and children
1248 		 */
1249 		if (pipe(lcore_config[i].pipe_main2worker) < 0)
1250 			rte_panic("Cannot create pipe\n");
1251 		if (pipe(lcore_config[i].pipe_worker2main) < 0)
1252 			rte_panic("Cannot create pipe\n");
1253 
1254 		lcore_config[i].state = WAIT;
1255 
1256 		/* create a thread for each lcore */
1257 		ret = eal_worker_thread_create(i);
1258 		if (ret != 0)
1259 			rte_panic("Cannot create thread\n");
1260 
1261 		/* Set thread_name for aid in debugging. */
1262 		snprintf(thread_name, sizeof(thread_name),
1263 			"rte-worker-%d", i);
1264 		ret = rte_thread_setname((pthread_t)lcore_config[i].thread_id.opaque_id,
1265 			thread_name);
1266 		if (ret != 0)
1267 			RTE_LOG(DEBUG, EAL,
1268 				"Cannot set name for lcore thread\n");
1269 
1270 		ret = rte_thread_set_affinity_by_id(lcore_config[i].thread_id,
1271 			&lcore_config[i].cpuset);
1272 		if (ret != 0)
1273 			rte_panic("Cannot set affinity\n");
1274 	}
1275 
1276 	/*
1277 	 * Launch a dummy function on all worker lcores, so that main lcore
1278 	 * knows they are all ready when this function returns.
1279 	 */
1280 	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN);
1281 	rte_eal_mp_wait_lcore();
1282 
1283 	/* initialize services so vdevs register service during bus_probe. */
1284 	ret = rte_service_init();
1285 	if (ret) {
1286 		rte_eal_init_alert("rte_service_init() failed");
1287 		rte_errno = -ret;
1288 		return -1;
1289 	}
1290 
1291 	/* Probe all the buses and devices/drivers on them */
1292 	if (rte_bus_probe()) {
1293 		rte_eal_init_alert("Cannot probe devices");
1294 		rte_errno = ENOTSUP;
1295 		return -1;
1296 	}
1297 
1298 #ifdef VFIO_PRESENT
1299 	/* Register mp action after probe() so that we got enough info */
1300 	if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
1301 		return -1;
1302 #endif
1303 
1304 	/* initialize default service/lcore mappings and start running. Ignore
1305 	 * -ENOTSUP, as it indicates no service coremask passed to EAL.
1306 	 */
1307 	ret = rte_service_start_with_defaults();
1308 	if (ret < 0 && ret != -ENOTSUP) {
1309 		rte_errno = -ret;
1310 		return -1;
1311 	}
1312 
1313 	/*
1314 	 * Clean up unused files in runtime directory. We do this at the end of
1315 	 * init and not at the beginning because we want to clean stuff up
1316 	 * whether we are primary or secondary process, but we cannot remove
1317 	 * primary process' files because secondary should be able to run even
1318 	 * if primary process is dead.
1319 	 *
1320 	 * In no_shconf mode, no runtime directory is created in the first
1321 	 * place, so no cleanup needed.
1322 	 */
1323 	if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) {
1324 		rte_eal_init_alert("Cannot clear runtime directory");
1325 		return -1;
1326 	}
1327 	if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) {
1328 		int tlog = rte_log_register_type_and_pick_level(
1329 				"lib.telemetry", RTE_LOG_WARNING);
1330 		if (tlog < 0)
1331 			tlog = RTE_LOGTYPE_EAL;
1332 		if (rte_telemetry_init(rte_eal_get_runtime_dir(),
1333 				rte_version(),
1334 				&internal_conf->ctrl_cpuset, rte_log, tlog) != 0)
1335 			return -1;
1336 	}
1337 
1338 	eal_mcfg_complete();
1339 
1340 	return fctret;
1341 }
1342 
1343 static int
1344 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
1345 		void *arg __rte_unused)
1346 {
1347 	/* ms is const, so find this memseg */
1348 	struct rte_memseg *found;
1349 
1350 	if (msl->external)
1351 		return 0;
1352 
1353 	found = rte_mem_virt2memseg(ms->addr, msl);
1354 
1355 	found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
1356 
1357 	return 0;
1358 }
1359 
1360 int
1361 rte_eal_cleanup(void)
1362 {
1363 	/* if we're in a primary process, we need to mark hugepages as freeable
1364 	 * so that finalization can release them back to the system.
1365 	 */
1366 	struct internal_config *internal_conf =
1367 		eal_get_internal_configuration();
1368 
1369 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
1370 			internal_conf->hugepage_file.unlink_existing)
1371 		rte_memseg_walk(mark_freeable, NULL);
1372 
1373 	rte_service_finalize();
1374 #ifdef VFIO_PRESENT
1375 	vfio_mp_sync_cleanup();
1376 #endif
1377 	rte_mp_channel_cleanup();
1378 	eal_bus_cleanup();
1379 	rte_trace_save();
1380 	eal_trace_fini();
1381 	/* after this point, any DPDK pointers will become dangling */
1382 	rte_eal_memory_detach();
1383 	eal_mp_dev_hotplug_cleanup();
1384 	rte_eal_malloc_heap_cleanup();
1385 	rte_eal_alarm_cleanup();
1386 	eal_cleanup_config(internal_conf);
1387 	rte_eal_log_cleanup();
1388 	return 0;
1389 }
1390 
1391 int rte_eal_create_uio_dev(void)
1392 {
1393 	const struct internal_config *internal_conf =
1394 		eal_get_internal_configuration();
1395 
1396 	return internal_conf->create_uio_dev;
1397 }
1398 
1399 enum rte_intr_mode
1400 rte_eal_vfio_intr_mode(void)
1401 {
1402 	const struct internal_config *internal_conf =
1403 		eal_get_internal_configuration();
1404 
1405 	return internal_conf->vfio_intr_mode;
1406 }
1407 
1408 void
1409 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token)
1410 {
1411 	struct internal_config *cfg = eal_get_internal_configuration();
1412 
1413 	rte_uuid_copy(vf_token, cfg->vfio_vf_token);
1414 }
1415 
1416 int
1417 rte_eal_check_module(const char *module_name)
1418 {
1419 	char sysfs_mod_name[PATH_MAX];
1420 	struct stat st;
1421 	int n;
1422 
1423 	if (NULL == module_name)
1424 		return -1;
1425 
1426 	/* Check if there is sysfs mounted */
1427 	if (stat("/sys/module", &st) != 0) {
1428 		RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
1429 			errno, strerror(errno));
1430 		return -1;
1431 	}
1432 
1433 	/* A module might be built-in, therefore try sysfs */
1434 	n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
1435 	if (n < 0 || n > PATH_MAX) {
1436 		RTE_LOG(DEBUG, EAL, "Could not format module path\n");
1437 		return -1;
1438 	}
1439 
1440 	if (stat(sysfs_mod_name, &st) != 0) {
1441 		RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
1442 		        sysfs_mod_name, errno, strerror(errno));
1443 		return 0;
1444 	}
1445 
1446 	/* Module has been found */
1447 	return 1;
1448 }
1449