xref: /dpdk/lib/eal/freebsd/eal.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation.
3  * Copyright(c) 2014 6WIND S.A.
4  */
5 
6 #include <ctype.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdarg.h>
12 #include <unistd.h>
13 #include <pthread.h>
14 #include <syslog.h>
15 #include <getopt.h>
16 #include <sys/file.h>
17 #include <stddef.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <sys/mman.h>
21 #include <sys/queue.h>
22 #include <sys/stat.h>
23 
24 #include <rte_common.h>
25 #include <rte_debug.h>
26 #include <rte_memory.h>
27 #include <rte_launch.h>
28 #include <rte_eal.h>
29 #include <rte_eal_memconfig.h>
30 #include <rte_errno.h>
31 #include <rte_per_lcore.h>
32 #include <rte_lcore.h>
33 #include <rte_service_component.h>
34 #include <rte_log.h>
35 #include <rte_random.h>
36 #include <rte_cycles.h>
37 #include <rte_string_fns.h>
38 #include <rte_cpuflags.h>
39 #include <rte_interrupts.h>
40 #include <rte_bus.h>
41 #include <rte_dev.h>
42 #include <rte_devargs.h>
43 #include <rte_version.h>
44 #include <rte_vfio.h>
45 #include <malloc_heap.h>
46 #include <telemetry_internal.h>
47 
48 #include "eal_private.h"
49 #include "eal_thread.h"
50 #include "eal_internal_cfg.h"
51 #include "eal_filesystem.h"
52 #include "eal_hugepages.h"
53 #include "eal_options.h"
54 #include "eal_memcfg.h"
55 #include "eal_trace.h"
56 
57 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
58 
59 /* define fd variable here, because file needs to be kept open for the
60  * duration of the program, as we hold a write lock on it in the primary proc */
61 static int mem_cfg_fd = -1;
62 
63 static struct flock wr_lock = {
64 		.l_type = F_WRLCK,
65 		.l_whence = SEEK_SET,
66 		.l_start = offsetof(struct rte_mem_config, memsegs),
67 		.l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs),
68 };
69 
70 /* internal configuration (per-core) */
71 struct lcore_config lcore_config[RTE_MAX_LCORE];
72 
73 /* used by rte_rdtsc() */
74 int rte_cycles_vmware_tsc_map;
75 
76 
77 int
78 eal_clean_runtime_dir(void)
79 {
80 	/* FreeBSD doesn't need this implemented for now, because, unlike Linux,
81 	 * FreeBSD doesn't create per-process files, so no need to clean up.
82 	 */
83 	return 0;
84 }
85 
86 /* create memory configuration in shared/mmap memory. Take out
87  * a write lock on the memsegs, so we can auto-detect primary/secondary.
88  * This means we never close the file while running (auto-close on exit).
89  * We also don't lock the whole file, so that in future we can use read-locks
90  * on other parts, e.g. memzones, to detect if there are running secondary
91  * processes. */
92 static int
93 rte_eal_config_create(void)
94 {
95 	struct rte_config *config = rte_eal_get_configuration();
96 	const struct internal_config *internal_conf =
97 		eal_get_internal_configuration();
98 	size_t page_sz = sysconf(_SC_PAGE_SIZE);
99 	size_t cfg_len = sizeof(struct rte_mem_config);
100 	size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
101 	void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
102 	int retval;
103 
104 	const char *pathname = eal_runtime_config_path();
105 
106 	if (internal_conf->no_shconf)
107 		return 0;
108 
109 	/* map the config before base address so that we don't waste a page */
110 	if (internal_conf->base_virtaddr != 0)
111 		rte_mem_cfg_addr = (void *)
112 			RTE_ALIGN_FLOOR(internal_conf->base_virtaddr -
113 			sizeof(struct rte_mem_config), page_sz);
114 	else
115 		rte_mem_cfg_addr = NULL;
116 
117 	if (mem_cfg_fd < 0){
118 		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
119 		if (mem_cfg_fd < 0) {
120 			EAL_LOG(ERR, "Cannot open '%s' for rte_mem_config",
121 				pathname);
122 			return -1;
123 		}
124 	}
125 
126 	retval = ftruncate(mem_cfg_fd, cfg_len);
127 	if (retval < 0){
128 		close(mem_cfg_fd);
129 		mem_cfg_fd = -1;
130 		EAL_LOG(ERR, "Cannot resize '%s' for rte_mem_config",
131 			pathname);
132 		return -1;
133 	}
134 
135 	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
136 	if (retval < 0){
137 		close(mem_cfg_fd);
138 		mem_cfg_fd = -1;
139 		EAL_LOG(ERR, "Cannot create lock on '%s'. Is another primary "
140 			"process running?", pathname);
141 		return -1;
142 	}
143 
144 	/* reserve space for config */
145 	rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
146 			&cfg_len_aligned, page_sz, 0, 0);
147 	if (rte_mem_cfg_addr == NULL) {
148 		EAL_LOG(ERR, "Cannot mmap memory for rte_config");
149 		close(mem_cfg_fd);
150 		mem_cfg_fd = -1;
151 		return -1;
152 	}
153 
154 	/* remap the actual file into the space we've just reserved */
155 	mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
156 			cfg_len_aligned, PROT_READ | PROT_WRITE,
157 			MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
158 	if (mapped_mem_cfg_addr == MAP_FAILED) {
159 		EAL_LOG(ERR, "Cannot remap memory for rte_config");
160 		munmap(rte_mem_cfg_addr, cfg_len);
161 		close(mem_cfg_fd);
162 		mem_cfg_fd = -1;
163 		return -1;
164 	}
165 
166 	memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config));
167 	config->mem_config = rte_mem_cfg_addr;
168 
169 	/* store address of the config in the config itself so that secondary
170 	 * processes could later map the config into this exact location
171 	 */
172 	config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
173 	return 0;
174 }
175 
176 /* attach to an existing shared memory config */
177 static int
178 rte_eal_config_attach(void)
179 {
180 	void *rte_mem_cfg_addr;
181 	const char *pathname = eal_runtime_config_path();
182 	struct rte_config *config = rte_eal_get_configuration();
183 	const struct internal_config *internal_conf =
184 		eal_get_internal_configuration();
185 
186 
187 	if (internal_conf->no_shconf)
188 		return 0;
189 
190 	if (mem_cfg_fd < 0){
191 		mem_cfg_fd = open(pathname, O_RDWR);
192 		if (mem_cfg_fd < 0) {
193 			EAL_LOG(ERR, "Cannot open '%s' for rte_mem_config",
194 				pathname);
195 			return -1;
196 		}
197 	}
198 
199 	rte_mem_cfg_addr = mmap(NULL, sizeof(*config->mem_config),
200 				PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
201 	/* don't close the fd here, it will be closed on reattach */
202 	if (rte_mem_cfg_addr == MAP_FAILED) {
203 		close(mem_cfg_fd);
204 		mem_cfg_fd = -1;
205 		EAL_LOG(ERR, "Cannot mmap memory for rte_config! error %i (%s)",
206 			errno, strerror(errno));
207 		return -1;
208 	}
209 
210 	config->mem_config = rte_mem_cfg_addr;
211 
212 	return 0;
213 }
214 
215 /* reattach the shared config at exact memory location primary process has it */
216 static int
217 rte_eal_config_reattach(void)
218 {
219 	struct rte_mem_config *mem_config;
220 	void *rte_mem_cfg_addr;
221 	struct rte_config *config = rte_eal_get_configuration();
222 	const struct internal_config *internal_conf =
223 		eal_get_internal_configuration();
224 
225 	if (internal_conf->no_shconf)
226 		return 0;
227 
228 	/* save the address primary process has mapped shared config to */
229 	rte_mem_cfg_addr =
230 			(void *)(uintptr_t)config->mem_config->mem_cfg_addr;
231 
232 	/* unmap original config */
233 	munmap(config->mem_config, sizeof(struct rte_mem_config));
234 
235 	/* remap the config at proper address */
236 	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
237 			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
238 			mem_cfg_fd, 0);
239 	close(mem_cfg_fd);
240 	mem_cfg_fd = -1;
241 
242 	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
243 		if (mem_config != MAP_FAILED) {
244 			/* errno is stale, don't use */
245 			EAL_LOG(ERR, "Cannot mmap memory for rte_config at [%p], got [%p]"
246 					  " - please use '--" OPT_BASE_VIRTADDR
247 					  "' option",
248 				rte_mem_cfg_addr, mem_config);
249 			munmap(mem_config, sizeof(struct rte_mem_config));
250 			return -1;
251 		}
252 		EAL_LOG(ERR, "Cannot mmap memory for rte_config! error %i (%s)",
253 			errno, strerror(errno));
254 		return -1;
255 	}
256 
257 	config->mem_config = mem_config;
258 
259 	return 0;
260 }
261 
262 /* Detect if we are a primary or a secondary process */
263 enum rte_proc_type_t
264 eal_proc_type_detect(void)
265 {
266 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
267 	const char *pathname = eal_runtime_config_path();
268 	const struct internal_config *internal_conf =
269 		eal_get_internal_configuration();
270 
271 	/* if there no shared config, there can be no secondary processes */
272 	if (!internal_conf->no_shconf) {
273 		/* if we can open the file but not get a write-lock we are a
274 		 * secondary process. NOTE: if we get a file handle back, we
275 		 * keep that open and don't close it to prevent a race condition
276 		 * between multiple opens.
277 		 */
278 		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
279 				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
280 			ptype = RTE_PROC_SECONDARY;
281 	}
282 
283 	EAL_LOG(INFO, "Auto-detected process type: %s",
284 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
285 
286 	return ptype;
287 }
288 
289 /* Sets up rte_config structure with the pointer to shared memory config.*/
290 static int
291 rte_config_init(void)
292 {
293 	struct rte_config *config = rte_eal_get_configuration();
294 	const struct internal_config *internal_conf =
295 		eal_get_internal_configuration();
296 
297 	config->process_type = internal_conf->process_type;
298 
299 	switch (config->process_type) {
300 	case RTE_PROC_PRIMARY:
301 		if (rte_eal_config_create() < 0)
302 			return -1;
303 		eal_mcfg_update_from_internal();
304 		break;
305 	case RTE_PROC_SECONDARY:
306 		if (rte_eal_config_attach() < 0)
307 			return -1;
308 		eal_mcfg_wait_complete();
309 		if (eal_mcfg_check_version() < 0) {
310 			EAL_LOG(ERR, "Primary and secondary process DPDK version mismatch");
311 			return -1;
312 		}
313 		if (rte_eal_config_reattach() < 0)
314 			return -1;
315 		if (!__rte_mp_enable()) {
316 			EAL_LOG(ERR, "Primary process refused secondary attachment");
317 			return -1;
318 		}
319 		eal_mcfg_update_internal();
320 		break;
321 	case RTE_PROC_AUTO:
322 	case RTE_PROC_INVALID:
323 		EAL_LOG(ERR, "Invalid process type %d",
324 			config->process_type);
325 		return -1;
326 	}
327 
328 	return 0;
329 }
330 
331 /* display usage */
332 static void
333 eal_usage(const char *prgname)
334 {
335 	rte_usage_hook_t hook = eal_get_application_usage_hook();
336 
337 	printf("\nUsage: %s ", prgname);
338 	eal_common_usage();
339 	/* Allow the application to print its usage message too if hook is set */
340 	if (hook) {
341 		printf("===== Application Usage =====\n\n");
342 		(hook)(prgname);
343 	}
344 }
345 
346 static inline size_t
347 eal_get_hugepage_mem_size(void)
348 {
349 	uint64_t size = 0;
350 	unsigned i, j;
351 	struct internal_config *internal_conf =
352 		eal_get_internal_configuration();
353 
354 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
355 		struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
356 		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
357 			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
358 				size += hpi->hugepage_sz * hpi->num_pages[j];
359 			}
360 		}
361 	}
362 
363 	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
364 }
365 
366 /* Parse the arguments for --log-level only */
367 static void
368 eal_log_level_parse(int argc, char **argv)
369 {
370 	int opt;
371 	char **argvopt;
372 	int option_index;
373 	const int old_optind = optind;
374 	const int old_optopt = optopt;
375 	const int old_optreset = optreset;
376 	char * const old_optarg = optarg;
377 	struct internal_config *internal_conf =
378 		eal_get_internal_configuration();
379 
380 	argvopt = argv;
381 	optind = 1;
382 	optreset = 1;
383 
384 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
385 				  eal_long_options, &option_index)) != EOF) {
386 
387 		int ret;
388 
389 		/* getopt is not happy, stop right now */
390 		if (opt == '?')
391 			break;
392 
393 		ret = (opt == OPT_LOG_LEVEL_NUM) ?
394 		    eal_parse_common_option(opt, optarg, internal_conf) : 0;
395 
396 		/* common parser is not happy */
397 		if (ret < 0)
398 			break;
399 	}
400 
401 	/* restore getopt lib */
402 	optind = old_optind;
403 	optopt = old_optopt;
404 	optreset = old_optreset;
405 	optarg = old_optarg;
406 }
407 
408 /* Parse the argument given in the command line of the application */
409 static int
410 eal_parse_args(int argc, char **argv)
411 {
412 	int opt, ret;
413 	char **argvopt;
414 	int option_index;
415 	char *prgname = argv[0];
416 	const int old_optind = optind;
417 	const int old_optopt = optopt;
418 	const int old_optreset = optreset;
419 	char * const old_optarg = optarg;
420 	struct internal_config *internal_conf =
421 		eal_get_internal_configuration();
422 
423 	argvopt = argv;
424 	optind = 1;
425 	optreset = 1;
426 
427 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
428 				  eal_long_options, &option_index)) != EOF) {
429 
430 		/* getopt didn't recognise the option */
431 		if (opt == '?') {
432 			eal_usage(prgname);
433 			ret = -1;
434 			goto out;
435 		}
436 
437 		/* eal_log_level_parse() already handled this option */
438 		if (opt == OPT_LOG_LEVEL_NUM)
439 			continue;
440 
441 		ret = eal_parse_common_option(opt, optarg, internal_conf);
442 		/* common parser is not happy */
443 		if (ret < 0) {
444 			eal_usage(prgname);
445 			ret = -1;
446 			goto out;
447 		}
448 		/* common parser handled this option */
449 		if (ret == 0)
450 			continue;
451 
452 		switch (opt) {
453 		case OPT_MBUF_POOL_OPS_NAME_NUM:
454 		{
455 			char *ops_name = strdup(optarg);
456 			if (ops_name == NULL)
457 				EAL_LOG(ERR, "Could not store mbuf pool ops name");
458 			else {
459 				/* free old ops name */
460 				free(internal_conf->user_mbuf_pool_ops_name);
461 
462 				internal_conf->user_mbuf_pool_ops_name =
463 						ops_name;
464 			}
465 			break;
466 		}
467 		case OPT_HELP_NUM:
468 			eal_usage(prgname);
469 			exit(EXIT_SUCCESS);
470 		default:
471 			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
472 				EAL_LOG(ERR, "Option %c is not supported "
473 					"on FreeBSD", opt);
474 			} else if (opt >= OPT_LONG_MIN_NUM &&
475 				   opt < OPT_LONG_MAX_NUM) {
476 				EAL_LOG(ERR, "Option %s is not supported "
477 					"on FreeBSD",
478 					eal_long_options[option_index].name);
479 			} else {
480 				EAL_LOG(ERR, "Option %d is not supported "
481 					"on FreeBSD", opt);
482 			}
483 			eal_usage(prgname);
484 			ret = -1;
485 			goto out;
486 		}
487 	}
488 
489 	/* create runtime data directory. In no_shconf mode, skip any errors */
490 	if (eal_create_runtime_dir() < 0) {
491 		if (internal_conf->no_shconf == 0) {
492 			EAL_LOG(ERR, "Cannot create runtime directory");
493 			ret = -1;
494 			goto out;
495 		} else
496 			EAL_LOG(WARNING, "No DPDK runtime directory created");
497 	}
498 
499 	if (eal_adjust_config(internal_conf) != 0) {
500 		ret = -1;
501 		goto out;
502 	}
503 
504 	/* sanity checks */
505 	if (eal_check_common_options(internal_conf) != 0) {
506 		eal_usage(prgname);
507 		ret = -1;
508 		goto out;
509 	}
510 
511 	if (optind >= 0)
512 		argv[optind-1] = prgname;
513 	ret = optind-1;
514 
515 out:
516 	/* restore getopt lib */
517 	optind = old_optind;
518 	optopt = old_optopt;
519 	optreset = old_optreset;
520 	optarg = old_optarg;
521 
522 	return ret;
523 }
524 
525 static int
526 check_socket(const struct rte_memseg_list *msl, void *arg)
527 {
528 	int *socket_id = arg;
529 
530 	if (msl->external)
531 		return 0;
532 
533 	if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
534 		return 1;
535 
536 	return 0;
537 }
538 
539 static void
540 eal_check_mem_on_local_socket(void)
541 {
542 	int socket_id;
543 	const struct rte_config *config = rte_eal_get_configuration();
544 
545 	socket_id = rte_lcore_to_socket_id(config->main_lcore);
546 
547 	if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
548 		EAL_LOG(WARNING, "WARNING: Main core has no memory on local socket!");
549 }
550 
551 
552 static int
553 sync_func(__rte_unused void *arg)
554 {
555 	return 0;
556 }
557 /* Abstraction for port I/0 privilege */
558 int
559 rte_eal_iopl_init(void)
560 {
561 	static int fd = -1;
562 
563 	if (fd < 0)
564 		fd = open("/dev/io", O_RDWR);
565 
566 	if (fd < 0)
567 		return -1;
568 	/* keep fd open for iopl */
569 	return 0;
570 }
571 
572 static void rte_eal_init_alert(const char *msg)
573 {
574 	fprintf(stderr, "EAL: FATAL: %s\n", msg);
575 	EAL_LOG(ERR, "%s", msg);
576 }
577 
578 /* Launch threads, called at application init(). */
579 int
580 rte_eal_init(int argc, char **argv)
581 {
582 	int i, fctret, ret;
583 	static uint32_t run_once;
584 	uint32_t has_run = 0;
585 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
586 	char thread_name[RTE_THREAD_NAME_SIZE];
587 	const struct rte_config *config = rte_eal_get_configuration();
588 	struct internal_config *internal_conf =
589 		eal_get_internal_configuration();
590 	bool has_phys_addr;
591 	enum rte_iova_mode iova_mode;
592 
593 	/* checks if the machine is adequate */
594 	if (!rte_cpu_is_supported()) {
595 		rte_eal_init_alert("unsupported cpu type.");
596 		rte_errno = ENOTSUP;
597 		return -1;
598 	}
599 
600 	if (!rte_atomic_compare_exchange_strong_explicit(&run_once, &has_run, 1,
601 					rte_memory_order_relaxed, rte_memory_order_relaxed)) {
602 		rte_eal_init_alert("already called initialization.");
603 		rte_errno = EALREADY;
604 		return -1;
605 	}
606 
607 	eal_reset_internal_config(internal_conf);
608 
609 	/* clone argv to report out later in telemetry */
610 	eal_save_args(argc, argv);
611 
612 	/* set log level as early as possible */
613 	eal_log_level_parse(argc, argv);
614 
615 	if (rte_eal_cpu_init() < 0) {
616 		rte_eal_init_alert("Cannot detect lcores.");
617 		rte_errno = ENOTSUP;
618 		return -1;
619 	}
620 
621 	fctret = eal_parse_args(argc, argv);
622 	if (fctret < 0) {
623 		rte_eal_init_alert("Invalid 'command line' arguments.");
624 		rte_errno = EINVAL;
625 		rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
626 		return -1;
627 	}
628 
629 	/* FreeBSD always uses legacy memory model */
630 	internal_conf->legacy_mem = true;
631 	if (internal_conf->in_memory) {
632 		EAL_LOG(WARNING, "Warning: ignoring unsupported flag, '%s'",
633 			OPT_IN_MEMORY);
634 		internal_conf->in_memory = false;
635 	}
636 
637 	if (eal_plugins_init() < 0) {
638 		rte_eal_init_alert("Cannot init plugins");
639 		rte_errno = EINVAL;
640 		rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
641 		return -1;
642 	}
643 
644 	if (eal_trace_init() < 0) {
645 		rte_eal_init_alert("Cannot init trace");
646 		rte_errno = EFAULT;
647 		rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
648 		return -1;
649 	}
650 
651 	if (eal_option_device_parse()) {
652 		rte_errno = ENODEV;
653 		rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
654 		return -1;
655 	}
656 
657 	if (rte_config_init() < 0) {
658 		rte_eal_init_alert("Cannot init config");
659 		return -1;
660 	}
661 
662 	if (rte_eal_intr_init() < 0) {
663 		rte_eal_init_alert("Cannot init interrupt-handling thread");
664 		return -1;
665 	}
666 
667 	if (rte_eal_alarm_init() < 0) {
668 		rte_eal_init_alert("Cannot init alarm");
669 		/* rte_eal_alarm_init sets rte_errno on failure. */
670 		return -1;
671 	}
672 
673 	/* Put mp channel init before bus scan so that we can init the vdev
674 	 * bus through mp channel in the secondary process before the bus scan.
675 	 */
676 	if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
677 		rte_eal_init_alert("failed to init mp channel");
678 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
679 			rte_errno = EFAULT;
680 			return -1;
681 		}
682 	}
683 
684 	if (rte_bus_scan()) {
685 		rte_eal_init_alert("Cannot scan the buses for devices");
686 		rte_errno = ENODEV;
687 		rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
688 		return -1;
689 	}
690 
691 	/*
692 	 * PA are only available for hugepages via contigmem.
693 	 * If contigmem is inaccessible, rte_eal_hugepage_init() will fail
694 	 * with a message describing the cause.
695 	 */
696 	has_phys_addr = internal_conf->no_hugetlbfs == 0;
697 	iova_mode = internal_conf->iova_mode;
698 	if (iova_mode == RTE_IOVA_DC) {
699 		EAL_LOG(DEBUG, "Specific IOVA mode is not requested, autodetecting");
700 		if (has_phys_addr) {
701 			EAL_LOG(DEBUG, "Selecting IOVA mode according to bus requests");
702 			iova_mode = rte_bus_get_iommu_class();
703 			if (iova_mode == RTE_IOVA_DC) {
704 				if (!RTE_IOVA_IN_MBUF) {
705 					iova_mode = RTE_IOVA_VA;
706 					EAL_LOG(DEBUG, "IOVA as VA mode is forced by build option.");
707 				} else	{
708 					iova_mode = RTE_IOVA_PA;
709 				}
710 			}
711 		} else {
712 			iova_mode = RTE_IOVA_VA;
713 		}
714 	}
715 
716 	if (iova_mode == RTE_IOVA_PA && !has_phys_addr) {
717 		rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
718 		rte_errno = EINVAL;
719 		return -1;
720 	}
721 
722 	if (iova_mode == RTE_IOVA_PA && !RTE_IOVA_IN_MBUF) {
723 		rte_eal_init_alert("Cannot use IOVA as 'PA' as it is disabled during build");
724 		rte_errno = EINVAL;
725 		return -1;
726 	}
727 
728 	rte_eal_get_configuration()->iova_mode = iova_mode;
729 	EAL_LOG(INFO, "Selected IOVA mode '%s'",
730 		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
731 
732 	if (internal_conf->no_hugetlbfs == 0) {
733 		/* rte_config isn't initialized yet */
734 		ret = internal_conf->process_type == RTE_PROC_PRIMARY ?
735 			eal_hugepage_info_init() :
736 			eal_hugepage_info_read();
737 		if (ret < 0) {
738 			rte_eal_init_alert("Cannot get hugepage information.");
739 			rte_errno = EACCES;
740 			rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed);
741 			return -1;
742 		}
743 	}
744 
745 	if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) {
746 		if (internal_conf->no_hugetlbfs)
747 			internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE;
748 		else
749 			internal_conf->memory = eal_get_hugepage_mem_size();
750 	}
751 
752 	if (internal_conf->vmware_tsc_map == 1) {
753 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
754 		rte_cycles_vmware_tsc_map = 1;
755 		EAL_LOG(DEBUG, "Using VMWARE TSC MAP, "
756 				"you must have monitor_control.pseudo_perfctr = TRUE");
757 #else
758 		EAL_LOG(WARNING, "Ignoring --vmware-tsc-map because "
759 				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set");
760 #endif
761 	}
762 
763 	/* in secondary processes, memory init may allocate additional fbarrays
764 	 * not present in primary processes, so to avoid any potential issues,
765 	 * initialize memzones first.
766 	 */
767 	if (rte_eal_memzone_init() < 0) {
768 		rte_eal_init_alert("Cannot init memzone");
769 		rte_errno = ENODEV;
770 		return -1;
771 	}
772 
773 	rte_mcfg_mem_read_lock();
774 
775 	if (rte_eal_memory_init() < 0) {
776 		rte_mcfg_mem_read_unlock();
777 		rte_eal_init_alert("Cannot init memory");
778 		rte_errno = ENOMEM;
779 		return -1;
780 	}
781 
782 	if (rte_eal_malloc_heap_init() < 0) {
783 		rte_mcfg_mem_read_unlock();
784 		rte_eal_init_alert("Cannot init malloc heap");
785 		rte_errno = ENODEV;
786 		return -1;
787 	}
788 
789 	rte_mcfg_mem_read_unlock();
790 
791 	if (rte_eal_malloc_heap_populate() < 0) {
792 		rte_eal_init_alert("Cannot init malloc heap");
793 		rte_errno = ENODEV;
794 		return -1;
795 	}
796 
797 	if (rte_eal_tailqs_init() < 0) {
798 		rte_eal_init_alert("Cannot init tail queues for objects");
799 		rte_errno = EFAULT;
800 		return -1;
801 	}
802 
803 	if (rte_eal_timer_init() < 0) {
804 		rte_eal_init_alert("Cannot init HPET or TSC timers");
805 		rte_errno = ENOTSUP;
806 		return -1;
807 	}
808 
809 	eal_check_mem_on_local_socket();
810 
811 	if (rte_thread_set_affinity_by_id(rte_thread_self(),
812 			&lcore_config[config->main_lcore].cpuset) != 0) {
813 		rte_eal_init_alert("Cannot set affinity");
814 		rte_errno = EINVAL;
815 		return -1;
816 	}
817 	__rte_thread_init(config->main_lcore,
818 		&lcore_config[config->main_lcore].cpuset);
819 
820 	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
821 
822 	EAL_LOG(DEBUG, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])",
823 		config->main_lcore, (uintptr_t)pthread_self(), cpuset,
824 		ret == 0 ? "" : "...");
825 
826 	RTE_LCORE_FOREACH_WORKER(i) {
827 
828 		/*
829 		 * create communication pipes between main thread
830 		 * and children
831 		 */
832 		if (pipe(lcore_config[i].pipe_main2worker) < 0)
833 			rte_panic("Cannot create pipe\n");
834 		if (pipe(lcore_config[i].pipe_worker2main) < 0)
835 			rte_panic("Cannot create pipe\n");
836 
837 		lcore_config[i].state = WAIT;
838 
839 		/* create a thread for each lcore */
840 		ret = rte_thread_create(&lcore_config[i].thread_id, NULL,
841 				     eal_thread_loop, (void *)(uintptr_t)i);
842 		if (ret != 0)
843 			rte_panic("Cannot create thread\n");
844 
845 		/* Set thread_name for aid in debugging. */
846 		snprintf(thread_name, sizeof(thread_name),
847 				"dpdk-worker%d", i);
848 		rte_thread_set_name(lcore_config[i].thread_id, thread_name);
849 
850 		ret = rte_thread_set_affinity_by_id(lcore_config[i].thread_id,
851 			&lcore_config[i].cpuset);
852 		if (ret != 0)
853 			rte_panic("Cannot set affinity\n");
854 	}
855 
856 	/*
857 	 * Launch a dummy function on all worker lcores, so that main lcore
858 	 * knows they are all ready when this function returns.
859 	 */
860 	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN);
861 	rte_eal_mp_wait_lcore();
862 
863 	/* initialize services so vdevs register service during bus_probe. */
864 	ret = rte_service_init();
865 	if (ret) {
866 		rte_eal_init_alert("rte_service_init() failed");
867 		rte_errno = -ret;
868 		return -1;
869 	}
870 
871 	/* Probe all the buses and devices/drivers on them */
872 	if (rte_bus_probe()) {
873 		rte_eal_init_alert("Cannot probe devices");
874 		rte_errno = ENOTSUP;
875 		return -1;
876 	}
877 
878 	/* initialize default service/lcore mappings and start running. Ignore
879 	 * -ENOTSUP, as it indicates no service coremask passed to EAL.
880 	 */
881 	ret = rte_service_start_with_defaults();
882 	if (ret < 0 && ret != -ENOTSUP) {
883 		rte_errno = -ret;
884 		return -1;
885 	}
886 
887 	/*
888 	 * Clean up unused files in runtime directory. We do this at the end of
889 	 * init and not at the beginning because we want to clean stuff up
890 	 * whether we are primary or secondary process, but we cannot remove
891 	 * primary process' files because secondary should be able to run even
892 	 * if primary process is dead.
893 	 *
894 	 * In no_shconf mode, no runtime directory is created in the first
895 	 * place, so no cleanup needed.
896 	 */
897 	if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) {
898 		rte_eal_init_alert("Cannot clear runtime directory");
899 		return -1;
900 	}
901 	if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) {
902 		if (rte_telemetry_init(rte_eal_get_runtime_dir(),
903 				rte_version(),
904 				&internal_conf->ctrl_cpuset) != 0)
905 			return -1;
906 	}
907 
908 	eal_mcfg_complete();
909 
910 	return fctret;
911 }
912 
913 int
914 rte_eal_cleanup(void)
915 {
916 	static uint32_t run_once;
917 	uint32_t has_run = 0;
918 
919 	if (!rte_atomic_compare_exchange_strong_explicit(&run_once, &has_run, 1,
920 			rte_memory_order_relaxed, rte_memory_order_relaxed)) {
921 		EAL_LOG(WARNING, "Already called cleanup");
922 		rte_errno = EALREADY;
923 		return -1;
924 	}
925 
926 	struct internal_config *internal_conf =
927 		eal_get_internal_configuration();
928 	rte_service_finalize();
929 	rte_mp_channel_cleanup();
930 	eal_bus_cleanup();
931 	rte_trace_save();
932 	eal_trace_fini();
933 	rte_eal_alarm_cleanup();
934 	/* after this point, any DPDK pointers will become dangling */
935 	rte_eal_memory_detach();
936 	eal_cleanup_config(internal_conf);
937 	return 0;
938 }
939 
940 int rte_eal_create_uio_dev(void)
941 {
942 	const struct internal_config *internal_conf =
943 		eal_get_internal_configuration();
944 	return internal_conf->create_uio_dev;
945 }
946 
947 enum rte_intr_mode
948 rte_eal_vfio_intr_mode(void)
949 {
950 	return RTE_INTR_MODE_NONE;
951 }
952 
953 void
954 rte_eal_vfio_get_vf_token(__rte_unused rte_uuid_t vf_token)
955 {
956 }
957 
958 int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
959 		      __rte_unused const char *dev_addr,
960 		      __rte_unused int *vfio_dev_fd,
961 		      __rte_unused struct vfio_device_info *device_info)
962 {
963 	rte_errno = ENOTSUP;
964 	return -1;
965 }
966 
967 int rte_vfio_release_device(__rte_unused const char *sysfs_base,
968 			__rte_unused const char *dev_addr,
969 			__rte_unused int fd)
970 {
971 	rte_errno = ENOTSUP;
972 	return -1;
973 }
974 
975 int rte_vfio_enable(__rte_unused const char *modname)
976 {
977 	rte_errno = ENOTSUP;
978 	return -1;
979 }
980 
981 int rte_vfio_is_enabled(__rte_unused const char *modname)
982 {
983 	return 0;
984 }
985 
986 int rte_vfio_noiommu_is_enabled(void)
987 {
988 	return 0;
989 }
990 
991 int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
992 {
993 	rte_errno = ENOTSUP;
994 	return -1;
995 }
996 
997 int
998 rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
999 		       __rte_unused const char *dev_addr,
1000 		       __rte_unused int *iommu_group_num)
1001 {
1002 	rte_errno = ENOTSUP;
1003 	return -1;
1004 }
1005 
1006 int
1007 rte_vfio_get_container_fd(void)
1008 {
1009 	rte_errno = ENOTSUP;
1010 	return -1;
1011 }
1012 
1013 int
1014 rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
1015 {
1016 	rte_errno = ENOTSUP;
1017 	return -1;
1018 }
1019 
1020 int
1021 rte_vfio_container_create(void)
1022 {
1023 	rte_errno = ENOTSUP;
1024 	return -1;
1025 }
1026 
1027 int
1028 rte_vfio_container_destroy(__rte_unused int container_fd)
1029 {
1030 	rte_errno = ENOTSUP;
1031 	return -1;
1032 }
1033 
1034 int
1035 rte_vfio_container_group_bind(__rte_unused int container_fd,
1036 		__rte_unused int iommu_group_num)
1037 {
1038 	rte_errno = ENOTSUP;
1039 	return -1;
1040 }
1041 
1042 int
1043 rte_vfio_container_group_unbind(__rte_unused int container_fd,
1044 		__rte_unused int iommu_group_num)
1045 {
1046 	rte_errno = ENOTSUP;
1047 	return -1;
1048 }
1049 
1050 int
1051 rte_vfio_container_dma_map(__rte_unused int container_fd,
1052 			__rte_unused uint64_t vaddr,
1053 			__rte_unused uint64_t iova,
1054 			__rte_unused uint64_t len)
1055 {
1056 	rte_errno = ENOTSUP;
1057 	return -1;
1058 }
1059 
1060 int
1061 rte_vfio_container_dma_unmap(__rte_unused int container_fd,
1062 			__rte_unused uint64_t vaddr,
1063 			__rte_unused uint64_t iova,
1064 			__rte_unused uint64_t len)
1065 {
1066 	rte_errno = ENOTSUP;
1067 	return -1;
1068 }
1069