199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2010-2018 Intel Corporation. 399a2dd95SBruce Richardson * Copyright(c) 2012-2014 6WIND S.A. 499a2dd95SBruce Richardson */ 599a2dd95SBruce Richardson 699a2dd95SBruce Richardson #include <stdio.h> 799a2dd95SBruce Richardson #include <stdlib.h> 899a2dd95SBruce Richardson #include <stdint.h> 999a2dd95SBruce Richardson #include <string.h> 1099a2dd95SBruce Richardson #include <stdarg.h> 1199a2dd95SBruce Richardson #include <unistd.h> 1299a2dd95SBruce Richardson #include <pthread.h> 1399a2dd95SBruce Richardson #include <syslog.h> 1499a2dd95SBruce Richardson #include <getopt.h> 1599a2dd95SBruce Richardson #include <sys/file.h> 1699a2dd95SBruce Richardson #include <dirent.h> 1799a2dd95SBruce Richardson #include <fcntl.h> 1899a2dd95SBruce Richardson #include <fnmatch.h> 1999a2dd95SBruce Richardson #include <stddef.h> 2099a2dd95SBruce Richardson #include <errno.h> 2199a2dd95SBruce Richardson #include <limits.h> 2299a2dd95SBruce Richardson #include <sys/mman.h> 2399a2dd95SBruce Richardson #include <sys/queue.h> 2499a2dd95SBruce Richardson #include <sys/stat.h> 2599a2dd95SBruce Richardson #if defined(RTE_ARCH_X86) 2699a2dd95SBruce Richardson #include <sys/io.h> 2799a2dd95SBruce Richardson #endif 2899a2dd95SBruce Richardson #include <linux/version.h> 2999a2dd95SBruce Richardson 3099a2dd95SBruce Richardson #include <rte_compat.h> 3199a2dd95SBruce Richardson #include <rte_common.h> 3299a2dd95SBruce Richardson #include <rte_debug.h> 3399a2dd95SBruce Richardson #include <rte_memory.h> 3499a2dd95SBruce Richardson #include <rte_launch.h> 3599a2dd95SBruce Richardson #include <rte_eal.h> 3699a2dd95SBruce Richardson #include <rte_errno.h> 3799a2dd95SBruce Richardson #include <rte_per_lcore.h> 3899a2dd95SBruce Richardson #include <rte_lcore.h> 3999a2dd95SBruce Richardson #include <rte_service_component.h> 4099a2dd95SBruce Richardson #include <rte_log.h> 4199a2dd95SBruce Richardson #include <rte_random.h> 4299a2dd95SBruce Richardson #include <rte_cycles.h> 4399a2dd95SBruce Richardson #include <rte_string_fns.h> 4499a2dd95SBruce Richardson #include <rte_cpuflags.h> 4599a2dd95SBruce Richardson #include <rte_interrupts.h> 4699a2dd95SBruce Richardson #include <rte_bus.h> 4799a2dd95SBruce Richardson #include <rte_dev.h> 4899a2dd95SBruce Richardson #include <rte_devargs.h> 4999a2dd95SBruce Richardson #include <rte_version.h> 5099a2dd95SBruce Richardson #include <malloc_heap.h> 5199a2dd95SBruce Richardson #include <rte_vfio.h> 5299a2dd95SBruce Richardson 5399a2dd95SBruce Richardson #include <telemetry_internal.h> 5499a2dd95SBruce Richardson #include "eal_private.h" 5599a2dd95SBruce Richardson #include "eal_thread.h" 5699a2dd95SBruce Richardson #include "eal_internal_cfg.h" 5799a2dd95SBruce Richardson #include "eal_filesystem.h" 5899a2dd95SBruce Richardson #include "eal_hugepages.h" 5999a2dd95SBruce Richardson #include "eal_memcfg.h" 6099a2dd95SBruce Richardson #include "eal_trace.h" 6199a2dd95SBruce Richardson #include "eal_log.h" 6299a2dd95SBruce Richardson #include "eal_options.h" 6399a2dd95SBruce Richardson #include "eal_vfio.h" 6499a2dd95SBruce Richardson #include "hotplug_mp.h" 6599a2dd95SBruce Richardson 6699a2dd95SBruce Richardson #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) 6799a2dd95SBruce Richardson 6899a2dd95SBruce Richardson #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) 6999a2dd95SBruce Richardson 7099a2dd95SBruce Richardson #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" 7199a2dd95SBruce Richardson 7299a2dd95SBruce Richardson /* define fd variable here, because file needs to be kept open for the 7399a2dd95SBruce Richardson * duration of the program, as we hold a write lock on it in the primary proc */ 7499a2dd95SBruce Richardson static int mem_cfg_fd = -1; 7599a2dd95SBruce Richardson 7699a2dd95SBruce Richardson static struct flock wr_lock = { 7799a2dd95SBruce Richardson .l_type = F_WRLCK, 7899a2dd95SBruce Richardson .l_whence = SEEK_SET, 7999a2dd95SBruce Richardson .l_start = offsetof(struct rte_mem_config, memsegs), 8099a2dd95SBruce Richardson .l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs), 8199a2dd95SBruce Richardson }; 8299a2dd95SBruce Richardson 8399a2dd95SBruce Richardson /* internal configuration (per-core) */ 8499a2dd95SBruce Richardson struct lcore_config lcore_config[RTE_MAX_LCORE]; 8599a2dd95SBruce Richardson 8699a2dd95SBruce Richardson /* used by rte_rdtsc() */ 8799a2dd95SBruce Richardson int rte_cycles_vmware_tsc_map; 8899a2dd95SBruce Richardson 8999a2dd95SBruce Richardson static const char *default_runtime_dir = "/var/run"; 9099a2dd95SBruce Richardson 9199a2dd95SBruce Richardson int 9299a2dd95SBruce Richardson eal_create_runtime_dir(void) 9399a2dd95SBruce Richardson { 9499a2dd95SBruce Richardson const char *directory = default_runtime_dir; 9599a2dd95SBruce Richardson const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR"); 9699a2dd95SBruce Richardson const char *fallback = "/tmp"; 9799a2dd95SBruce Richardson char run_dir[PATH_MAX]; 9899a2dd95SBruce Richardson char tmp[PATH_MAX]; 9999a2dd95SBruce Richardson int ret; 10099a2dd95SBruce Richardson 10199a2dd95SBruce Richardson if (getuid() != 0) { 10299a2dd95SBruce Richardson /* try XDG path first, fall back to /tmp */ 10399a2dd95SBruce Richardson if (xdg_runtime_dir != NULL) 10499a2dd95SBruce Richardson directory = xdg_runtime_dir; 10599a2dd95SBruce Richardson else 10699a2dd95SBruce Richardson directory = fallback; 10799a2dd95SBruce Richardson } 10899a2dd95SBruce Richardson /* create DPDK subdirectory under runtime dir */ 10999a2dd95SBruce Richardson ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory); 11099a2dd95SBruce Richardson if (ret < 0 || ret == sizeof(tmp)) { 11199a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n"); 11299a2dd95SBruce Richardson return -1; 11399a2dd95SBruce Richardson } 11499a2dd95SBruce Richardson 11599a2dd95SBruce Richardson /* create prefix-specific subdirectory under DPDK runtime dir */ 11699a2dd95SBruce Richardson ret = snprintf(run_dir, sizeof(run_dir), "%s/%s", 11799a2dd95SBruce Richardson tmp, eal_get_hugefile_prefix()); 11899a2dd95SBruce Richardson if (ret < 0 || ret == sizeof(run_dir)) { 11999a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); 12099a2dd95SBruce Richardson return -1; 12199a2dd95SBruce Richardson } 12299a2dd95SBruce Richardson 12399a2dd95SBruce Richardson /* create the path if it doesn't exist. no "mkdir -p" here, so do it 12499a2dd95SBruce Richardson * step by step. 12599a2dd95SBruce Richardson */ 12699a2dd95SBruce Richardson ret = mkdir(tmp, 0700); 12799a2dd95SBruce Richardson if (ret < 0 && errno != EEXIST) { 12899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", 12999a2dd95SBruce Richardson tmp, strerror(errno)); 13099a2dd95SBruce Richardson return -1; 13199a2dd95SBruce Richardson } 13299a2dd95SBruce Richardson 13399a2dd95SBruce Richardson ret = mkdir(run_dir, 0700); 13499a2dd95SBruce Richardson if (ret < 0 && errno != EEXIST) { 13599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", 13699a2dd95SBruce Richardson run_dir, strerror(errno)); 13799a2dd95SBruce Richardson return -1; 13899a2dd95SBruce Richardson } 13999a2dd95SBruce Richardson 14099a2dd95SBruce Richardson if (eal_set_runtime_dir(run_dir, sizeof(run_dir))) 14199a2dd95SBruce Richardson return -1; 14299a2dd95SBruce Richardson 14399a2dd95SBruce Richardson return 0; 14499a2dd95SBruce Richardson } 14599a2dd95SBruce Richardson 14699a2dd95SBruce Richardson int 14799a2dd95SBruce Richardson eal_clean_runtime_dir(void) 14899a2dd95SBruce Richardson { 14999a2dd95SBruce Richardson const char *runtime_dir = rte_eal_get_runtime_dir(); 15099a2dd95SBruce Richardson DIR *dir; 15199a2dd95SBruce Richardson struct dirent *dirent; 15299a2dd95SBruce Richardson int dir_fd, fd, lck_result; 15399a2dd95SBruce Richardson static const char * const filters[] = { 15499a2dd95SBruce Richardson "fbarray_*", 15599a2dd95SBruce Richardson "mp_socket_*" 15699a2dd95SBruce Richardson }; 15799a2dd95SBruce Richardson 15899a2dd95SBruce Richardson /* open directory */ 15999a2dd95SBruce Richardson dir = opendir(runtime_dir); 16099a2dd95SBruce Richardson if (!dir) { 16199a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", 16299a2dd95SBruce Richardson runtime_dir); 16399a2dd95SBruce Richardson goto error; 16499a2dd95SBruce Richardson } 16599a2dd95SBruce Richardson dir_fd = dirfd(dir); 16699a2dd95SBruce Richardson 16799a2dd95SBruce Richardson /* lock the directory before doing anything, to avoid races */ 16899a2dd95SBruce Richardson if (flock(dir_fd, LOCK_EX) < 0) { 16999a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", 17099a2dd95SBruce Richardson runtime_dir); 17199a2dd95SBruce Richardson goto error; 17299a2dd95SBruce Richardson } 17399a2dd95SBruce Richardson 17499a2dd95SBruce Richardson dirent = readdir(dir); 17599a2dd95SBruce Richardson if (!dirent) { 17699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", 17799a2dd95SBruce Richardson runtime_dir); 17899a2dd95SBruce Richardson goto error; 17999a2dd95SBruce Richardson } 18099a2dd95SBruce Richardson 18199a2dd95SBruce Richardson while (dirent != NULL) { 18299a2dd95SBruce Richardson unsigned int f_idx; 18399a2dd95SBruce Richardson bool skip = true; 18499a2dd95SBruce Richardson 18599a2dd95SBruce Richardson /* skip files that don't match the patterns */ 18699a2dd95SBruce Richardson for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { 18799a2dd95SBruce Richardson const char *filter = filters[f_idx]; 18899a2dd95SBruce Richardson 18999a2dd95SBruce Richardson if (fnmatch(filter, dirent->d_name, 0) == 0) { 19099a2dd95SBruce Richardson skip = false; 19199a2dd95SBruce Richardson break; 19299a2dd95SBruce Richardson } 19399a2dd95SBruce Richardson } 19499a2dd95SBruce Richardson if (skip) { 19599a2dd95SBruce Richardson dirent = readdir(dir); 19699a2dd95SBruce Richardson continue; 19799a2dd95SBruce Richardson } 19899a2dd95SBruce Richardson 19999a2dd95SBruce Richardson /* try and lock the file */ 20099a2dd95SBruce Richardson fd = openat(dir_fd, dirent->d_name, O_RDONLY); 20199a2dd95SBruce Richardson 20299a2dd95SBruce Richardson /* skip to next file */ 20399a2dd95SBruce Richardson if (fd == -1) { 20499a2dd95SBruce Richardson dirent = readdir(dir); 20599a2dd95SBruce Richardson continue; 20699a2dd95SBruce Richardson } 20799a2dd95SBruce Richardson 20899a2dd95SBruce Richardson /* non-blocking lock */ 20999a2dd95SBruce Richardson lck_result = flock(fd, LOCK_EX | LOCK_NB); 21099a2dd95SBruce Richardson 21199a2dd95SBruce Richardson /* if lock succeeds, remove the file */ 21299a2dd95SBruce Richardson if (lck_result != -1) 21399a2dd95SBruce Richardson unlinkat(dir_fd, dirent->d_name, 0); 21499a2dd95SBruce Richardson close(fd); 21599a2dd95SBruce Richardson dirent = readdir(dir); 21699a2dd95SBruce Richardson } 21799a2dd95SBruce Richardson 21899a2dd95SBruce Richardson /* closedir closes dir_fd and drops the lock */ 21999a2dd95SBruce Richardson closedir(dir); 22099a2dd95SBruce Richardson return 0; 22199a2dd95SBruce Richardson 22299a2dd95SBruce Richardson error: 22399a2dd95SBruce Richardson if (dir) 22499a2dd95SBruce Richardson closedir(dir); 22599a2dd95SBruce Richardson 22699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", 22799a2dd95SBruce Richardson strerror(errno)); 22899a2dd95SBruce Richardson 22999a2dd95SBruce Richardson return -1; 23099a2dd95SBruce Richardson } 23199a2dd95SBruce Richardson 23299a2dd95SBruce Richardson /* parse a sysfs (or other) file containing one integer value */ 23399a2dd95SBruce Richardson int 23499a2dd95SBruce Richardson eal_parse_sysfs_value(const char *filename, unsigned long *val) 23599a2dd95SBruce Richardson { 23699a2dd95SBruce Richardson FILE *f; 23799a2dd95SBruce Richardson char buf[BUFSIZ]; 23899a2dd95SBruce Richardson char *end = NULL; 23999a2dd95SBruce Richardson 24099a2dd95SBruce Richardson if ((f = fopen(filename, "r")) == NULL) { 24199a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", 24299a2dd95SBruce Richardson __func__, filename); 24399a2dd95SBruce Richardson return -1; 24499a2dd95SBruce Richardson } 24599a2dd95SBruce Richardson 24699a2dd95SBruce Richardson if (fgets(buf, sizeof(buf), f) == NULL) { 24799a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", 24899a2dd95SBruce Richardson __func__, filename); 24999a2dd95SBruce Richardson fclose(f); 25099a2dd95SBruce Richardson return -1; 25199a2dd95SBruce Richardson } 25299a2dd95SBruce Richardson *val = strtoul(buf, &end, 0); 25399a2dd95SBruce Richardson if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { 25499a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", 25599a2dd95SBruce Richardson __func__, filename); 25699a2dd95SBruce Richardson fclose(f); 25799a2dd95SBruce Richardson return -1; 25899a2dd95SBruce Richardson } 25999a2dd95SBruce Richardson fclose(f); 26099a2dd95SBruce Richardson return 0; 26199a2dd95SBruce Richardson } 26299a2dd95SBruce Richardson 26399a2dd95SBruce Richardson 26499a2dd95SBruce Richardson /* create memory configuration in shared/mmap memory. Take out 26599a2dd95SBruce Richardson * a write lock on the memsegs, so we can auto-detect primary/secondary. 26699a2dd95SBruce Richardson * This means we never close the file while running (auto-close on exit). 26799a2dd95SBruce Richardson * We also don't lock the whole file, so that in future we can use read-locks 26899a2dd95SBruce Richardson * on other parts, e.g. memzones, to detect if there are running secondary 26999a2dd95SBruce Richardson * processes. */ 27099a2dd95SBruce Richardson static int 27199a2dd95SBruce Richardson rte_eal_config_create(void) 27299a2dd95SBruce Richardson { 27399a2dd95SBruce Richardson struct rte_config *config = rte_eal_get_configuration(); 27499a2dd95SBruce Richardson size_t page_sz = sysconf(_SC_PAGE_SIZE); 27599a2dd95SBruce Richardson size_t cfg_len = sizeof(*config->mem_config); 27699a2dd95SBruce Richardson size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); 27799a2dd95SBruce Richardson void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; 27899a2dd95SBruce Richardson int retval; 27999a2dd95SBruce Richardson const struct internal_config *internal_conf = 28099a2dd95SBruce Richardson eal_get_internal_configuration(); 28199a2dd95SBruce Richardson 28299a2dd95SBruce Richardson const char *pathname = eal_runtime_config_path(); 28399a2dd95SBruce Richardson 28499a2dd95SBruce Richardson if (internal_conf->no_shconf) 28599a2dd95SBruce Richardson return 0; 28699a2dd95SBruce Richardson 28799a2dd95SBruce Richardson /* map the config before hugepage address so that we don't waste a page */ 28899a2dd95SBruce Richardson if (internal_conf->base_virtaddr != 0) 28999a2dd95SBruce Richardson rte_mem_cfg_addr = (void *) 29099a2dd95SBruce Richardson RTE_ALIGN_FLOOR(internal_conf->base_virtaddr - 29199a2dd95SBruce Richardson sizeof(struct rte_mem_config), page_sz); 29299a2dd95SBruce Richardson else 29399a2dd95SBruce Richardson rte_mem_cfg_addr = NULL; 29499a2dd95SBruce Richardson 29599a2dd95SBruce Richardson if (mem_cfg_fd < 0){ 29699a2dd95SBruce Richardson mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600); 29799a2dd95SBruce Richardson if (mem_cfg_fd < 0) { 29899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 29999a2dd95SBruce Richardson pathname); 30099a2dd95SBruce Richardson return -1; 30199a2dd95SBruce Richardson } 30299a2dd95SBruce Richardson } 30399a2dd95SBruce Richardson 30499a2dd95SBruce Richardson retval = ftruncate(mem_cfg_fd, cfg_len); 30599a2dd95SBruce Richardson if (retval < 0){ 30699a2dd95SBruce Richardson close(mem_cfg_fd); 30799a2dd95SBruce Richardson mem_cfg_fd = -1; 30899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n", 30999a2dd95SBruce Richardson pathname); 31099a2dd95SBruce Richardson return -1; 31199a2dd95SBruce Richardson } 31299a2dd95SBruce Richardson 31399a2dd95SBruce Richardson retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); 31499a2dd95SBruce Richardson if (retval < 0){ 31599a2dd95SBruce Richardson close(mem_cfg_fd); 31699a2dd95SBruce Richardson mem_cfg_fd = -1; 31799a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary " 31899a2dd95SBruce Richardson "process running?\n", pathname); 31999a2dd95SBruce Richardson return -1; 32099a2dd95SBruce Richardson } 32199a2dd95SBruce Richardson 32299a2dd95SBruce Richardson /* reserve space for config */ 32399a2dd95SBruce Richardson rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, 32499a2dd95SBruce Richardson &cfg_len_aligned, page_sz, 0, 0); 32599a2dd95SBruce Richardson if (rte_mem_cfg_addr == NULL) { 32699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); 32799a2dd95SBruce Richardson close(mem_cfg_fd); 32899a2dd95SBruce Richardson mem_cfg_fd = -1; 32999a2dd95SBruce Richardson return -1; 33099a2dd95SBruce Richardson } 33199a2dd95SBruce Richardson 33299a2dd95SBruce Richardson /* remap the actual file into the space we've just reserved */ 33399a2dd95SBruce Richardson mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, 33499a2dd95SBruce Richardson cfg_len_aligned, PROT_READ | PROT_WRITE, 33599a2dd95SBruce Richardson MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); 33699a2dd95SBruce Richardson if (mapped_mem_cfg_addr == MAP_FAILED) { 33799a2dd95SBruce Richardson munmap(rte_mem_cfg_addr, cfg_len); 33899a2dd95SBruce Richardson close(mem_cfg_fd); 33999a2dd95SBruce Richardson mem_cfg_fd = -1; 34099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); 34199a2dd95SBruce Richardson return -1; 34299a2dd95SBruce Richardson } 34399a2dd95SBruce Richardson 34499a2dd95SBruce Richardson memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config)); 34599a2dd95SBruce Richardson config->mem_config = rte_mem_cfg_addr; 34699a2dd95SBruce Richardson 34799a2dd95SBruce Richardson /* store address of the config in the config itself so that secondary 34899a2dd95SBruce Richardson * processes could later map the config into this exact location 34999a2dd95SBruce Richardson */ 35099a2dd95SBruce Richardson config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; 35199a2dd95SBruce Richardson config->mem_config->dma_maskbits = 0; 35299a2dd95SBruce Richardson 35399a2dd95SBruce Richardson return 0; 35499a2dd95SBruce Richardson } 35599a2dd95SBruce Richardson 35699a2dd95SBruce Richardson /* attach to an existing shared memory config */ 35799a2dd95SBruce Richardson static int 35899a2dd95SBruce Richardson rte_eal_config_attach(void) 35999a2dd95SBruce Richardson { 36099a2dd95SBruce Richardson struct rte_config *config = rte_eal_get_configuration(); 36199a2dd95SBruce Richardson struct rte_mem_config *mem_config; 36299a2dd95SBruce Richardson const struct internal_config *internal_conf = 36399a2dd95SBruce Richardson eal_get_internal_configuration(); 36499a2dd95SBruce Richardson 36599a2dd95SBruce Richardson const char *pathname = eal_runtime_config_path(); 36699a2dd95SBruce Richardson 36799a2dd95SBruce Richardson if (internal_conf->no_shconf) 36899a2dd95SBruce Richardson return 0; 36999a2dd95SBruce Richardson 37099a2dd95SBruce Richardson if (mem_cfg_fd < 0){ 37199a2dd95SBruce Richardson mem_cfg_fd = open(pathname, O_RDWR); 37299a2dd95SBruce Richardson if (mem_cfg_fd < 0) { 37399a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 37499a2dd95SBruce Richardson pathname); 37599a2dd95SBruce Richardson return -1; 37699a2dd95SBruce Richardson } 37799a2dd95SBruce Richardson } 37899a2dd95SBruce Richardson 37999a2dd95SBruce Richardson /* map it as read-only first */ 38099a2dd95SBruce Richardson mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), 38199a2dd95SBruce Richardson PROT_READ, MAP_SHARED, mem_cfg_fd, 0); 38299a2dd95SBruce Richardson if (mem_config == MAP_FAILED) { 38399a2dd95SBruce Richardson close(mem_cfg_fd); 38499a2dd95SBruce Richardson mem_cfg_fd = -1; 38599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 38699a2dd95SBruce Richardson errno, strerror(errno)); 38799a2dd95SBruce Richardson return -1; 38899a2dd95SBruce Richardson } 38999a2dd95SBruce Richardson 39099a2dd95SBruce Richardson config->mem_config = mem_config; 39199a2dd95SBruce Richardson 39299a2dd95SBruce Richardson return 0; 39399a2dd95SBruce Richardson } 39499a2dd95SBruce Richardson 39599a2dd95SBruce Richardson /* reattach the shared config at exact memory location primary process has it */ 39699a2dd95SBruce Richardson static int 39799a2dd95SBruce Richardson rte_eal_config_reattach(void) 39899a2dd95SBruce Richardson { 39999a2dd95SBruce Richardson struct rte_config *config = rte_eal_get_configuration(); 40099a2dd95SBruce Richardson struct rte_mem_config *mem_config; 40199a2dd95SBruce Richardson void *rte_mem_cfg_addr; 40299a2dd95SBruce Richardson const struct internal_config *internal_conf = 40399a2dd95SBruce Richardson eal_get_internal_configuration(); 40499a2dd95SBruce Richardson 40599a2dd95SBruce Richardson if (internal_conf->no_shconf) 40699a2dd95SBruce Richardson return 0; 40799a2dd95SBruce Richardson 40899a2dd95SBruce Richardson /* save the address primary process has mapped shared config to */ 40999a2dd95SBruce Richardson rte_mem_cfg_addr = 41099a2dd95SBruce Richardson (void *) (uintptr_t) config->mem_config->mem_cfg_addr; 41199a2dd95SBruce Richardson 41299a2dd95SBruce Richardson /* unmap original config */ 41399a2dd95SBruce Richardson munmap(config->mem_config, sizeof(struct rte_mem_config)); 41499a2dd95SBruce Richardson 41599a2dd95SBruce Richardson /* remap the config at proper address */ 41699a2dd95SBruce Richardson mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, 41799a2dd95SBruce Richardson sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, 41899a2dd95SBruce Richardson mem_cfg_fd, 0); 41999a2dd95SBruce Richardson 42099a2dd95SBruce Richardson close(mem_cfg_fd); 42199a2dd95SBruce Richardson mem_cfg_fd = -1; 42299a2dd95SBruce Richardson 42399a2dd95SBruce Richardson if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { 42499a2dd95SBruce Richardson if (mem_config != MAP_FAILED) { 42599a2dd95SBruce Richardson /* errno is stale, don't use */ 42699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]" 42799a2dd95SBruce Richardson " - please use '--" OPT_BASE_VIRTADDR 42899a2dd95SBruce Richardson "' option\n", rte_mem_cfg_addr, mem_config); 42999a2dd95SBruce Richardson munmap(mem_config, sizeof(struct rte_mem_config)); 43099a2dd95SBruce Richardson return -1; 43199a2dd95SBruce Richardson } 43299a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 43399a2dd95SBruce Richardson errno, strerror(errno)); 43499a2dd95SBruce Richardson return -1; 43599a2dd95SBruce Richardson } 43699a2dd95SBruce Richardson 43799a2dd95SBruce Richardson config->mem_config = mem_config; 43899a2dd95SBruce Richardson 43999a2dd95SBruce Richardson return 0; 44099a2dd95SBruce Richardson } 44199a2dd95SBruce Richardson 44299a2dd95SBruce Richardson /* Detect if we are a primary or a secondary process */ 44399a2dd95SBruce Richardson enum rte_proc_type_t 44499a2dd95SBruce Richardson eal_proc_type_detect(void) 44599a2dd95SBruce Richardson { 44699a2dd95SBruce Richardson enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; 44799a2dd95SBruce Richardson const char *pathname = eal_runtime_config_path(); 44899a2dd95SBruce Richardson const struct internal_config *internal_conf = 44999a2dd95SBruce Richardson eal_get_internal_configuration(); 45099a2dd95SBruce Richardson 45199a2dd95SBruce Richardson /* if there no shared config, there can be no secondary processes */ 45299a2dd95SBruce Richardson if (!internal_conf->no_shconf) { 45399a2dd95SBruce Richardson /* if we can open the file but not get a write-lock we are a 45499a2dd95SBruce Richardson * secondary process. NOTE: if we get a file handle back, we 45599a2dd95SBruce Richardson * keep that open and don't close it to prevent a race condition 45699a2dd95SBruce Richardson * between multiple opens. 45799a2dd95SBruce Richardson */ 45899a2dd95SBruce Richardson if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && 45999a2dd95SBruce Richardson (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) 46099a2dd95SBruce Richardson ptype = RTE_PROC_SECONDARY; 46199a2dd95SBruce Richardson } 46299a2dd95SBruce Richardson 46399a2dd95SBruce Richardson RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", 46499a2dd95SBruce Richardson ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); 46599a2dd95SBruce Richardson 46699a2dd95SBruce Richardson return ptype; 46799a2dd95SBruce Richardson } 46899a2dd95SBruce Richardson 46999a2dd95SBruce Richardson /* Sets up rte_config structure with the pointer to shared memory config.*/ 47099a2dd95SBruce Richardson static int 47199a2dd95SBruce Richardson rte_config_init(void) 47299a2dd95SBruce Richardson { 47399a2dd95SBruce Richardson struct rte_config *config = rte_eal_get_configuration(); 47499a2dd95SBruce Richardson const struct internal_config *internal_conf = 47599a2dd95SBruce Richardson eal_get_internal_configuration(); 47699a2dd95SBruce Richardson 47799a2dd95SBruce Richardson config->process_type = internal_conf->process_type; 47899a2dd95SBruce Richardson 47999a2dd95SBruce Richardson switch (config->process_type) { 48099a2dd95SBruce Richardson case RTE_PROC_PRIMARY: 48199a2dd95SBruce Richardson if (rte_eal_config_create() < 0) 48299a2dd95SBruce Richardson return -1; 48399a2dd95SBruce Richardson eal_mcfg_update_from_internal(); 48499a2dd95SBruce Richardson break; 48599a2dd95SBruce Richardson case RTE_PROC_SECONDARY: 48699a2dd95SBruce Richardson if (rte_eal_config_attach() < 0) 48799a2dd95SBruce Richardson return -1; 48899a2dd95SBruce Richardson eal_mcfg_wait_complete(); 48999a2dd95SBruce Richardson if (eal_mcfg_check_version() < 0) { 49099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n"); 49199a2dd95SBruce Richardson return -1; 49299a2dd95SBruce Richardson } 49399a2dd95SBruce Richardson if (rte_eal_config_reattach() < 0) 49499a2dd95SBruce Richardson return -1; 49599a2dd95SBruce Richardson if (!__rte_mp_enable()) { 49699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n"); 49799a2dd95SBruce Richardson return -1; 49899a2dd95SBruce Richardson } 49999a2dd95SBruce Richardson eal_mcfg_update_internal(); 50099a2dd95SBruce Richardson break; 50199a2dd95SBruce Richardson case RTE_PROC_AUTO: 50299a2dd95SBruce Richardson case RTE_PROC_INVALID: 50399a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Invalid process type %d\n", 50499a2dd95SBruce Richardson config->process_type); 50599a2dd95SBruce Richardson return -1; 50699a2dd95SBruce Richardson } 50799a2dd95SBruce Richardson 50899a2dd95SBruce Richardson return 0; 50999a2dd95SBruce Richardson } 51099a2dd95SBruce Richardson 51199a2dd95SBruce Richardson /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ 51299a2dd95SBruce Richardson static void 51399a2dd95SBruce Richardson eal_hugedirs_unlock(void) 51499a2dd95SBruce Richardson { 51599a2dd95SBruce Richardson int i; 51699a2dd95SBruce Richardson struct internal_config *internal_conf = 51799a2dd95SBruce Richardson eal_get_internal_configuration(); 51899a2dd95SBruce Richardson 51999a2dd95SBruce Richardson for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) 52099a2dd95SBruce Richardson { 52199a2dd95SBruce Richardson /* skip uninitialized */ 52299a2dd95SBruce Richardson if (internal_conf->hugepage_info[i].lock_descriptor < 0) 52399a2dd95SBruce Richardson continue; 52499a2dd95SBruce Richardson /* unlock hugepage file */ 52599a2dd95SBruce Richardson flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN); 52699a2dd95SBruce Richardson close(internal_conf->hugepage_info[i].lock_descriptor); 52799a2dd95SBruce Richardson /* reset the field */ 52899a2dd95SBruce Richardson internal_conf->hugepage_info[i].lock_descriptor = -1; 52999a2dd95SBruce Richardson } 53099a2dd95SBruce Richardson } 53199a2dd95SBruce Richardson 53299a2dd95SBruce Richardson /* display usage */ 53399a2dd95SBruce Richardson static void 53499a2dd95SBruce Richardson eal_usage(const char *prgname) 53599a2dd95SBruce Richardson { 53699a2dd95SBruce Richardson rte_usage_hook_t hook = eal_get_application_usage_hook(); 53799a2dd95SBruce Richardson 53899a2dd95SBruce Richardson printf("\nUsage: %s ", prgname); 53999a2dd95SBruce Richardson eal_common_usage(); 54099a2dd95SBruce Richardson printf("EAL Linux options:\n" 54199a2dd95SBruce Richardson " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" 54299a2dd95SBruce Richardson " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" 54399a2dd95SBruce Richardson " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" 54499a2dd95SBruce Richardson " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" 54599a2dd95SBruce Richardson " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" 54699a2dd95SBruce Richardson " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" 54799a2dd95SBruce Richardson " --"OPT_VFIO_VF_TOKEN" VF token (UUID) shared between SR-IOV PF and VFs\n" 54899a2dd95SBruce Richardson " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" 54999a2dd95SBruce Richardson " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" 55099a2dd95SBruce Richardson " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" 55199a2dd95SBruce Richardson "\n"); 55299a2dd95SBruce Richardson /* Allow the application to print its usage message too if hook is set */ 55399a2dd95SBruce Richardson if (hook) { 55499a2dd95SBruce Richardson printf("===== Application Usage =====\n\n"); 55599a2dd95SBruce Richardson (hook)(prgname); 55699a2dd95SBruce Richardson } 55799a2dd95SBruce Richardson } 55899a2dd95SBruce Richardson 55999a2dd95SBruce Richardson static int 56099a2dd95SBruce Richardson eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) 56199a2dd95SBruce Richardson { 56299a2dd95SBruce Richardson char * arg[RTE_MAX_NUMA_NODES]; 56399a2dd95SBruce Richardson char *end; 56499a2dd95SBruce Richardson int arg_num, i, len; 56599a2dd95SBruce Richardson 56699a2dd95SBruce Richardson len = strnlen(strval, SOCKET_MEM_STRLEN); 56799a2dd95SBruce Richardson if (len == SOCKET_MEM_STRLEN) { 56899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); 56999a2dd95SBruce Richardson return -1; 57099a2dd95SBruce Richardson } 57199a2dd95SBruce Richardson 57299a2dd95SBruce Richardson /* all other error cases will be caught later */ 57399a2dd95SBruce Richardson if (!isdigit(strval[len-1])) 57499a2dd95SBruce Richardson return -1; 57599a2dd95SBruce Richardson 57699a2dd95SBruce Richardson /* split the optarg into separate socket values */ 57799a2dd95SBruce Richardson arg_num = rte_strsplit(strval, len, 57899a2dd95SBruce Richardson arg, RTE_MAX_NUMA_NODES, ','); 57999a2dd95SBruce Richardson 58099a2dd95SBruce Richardson /* if split failed, or 0 arguments */ 58199a2dd95SBruce Richardson if (arg_num <= 0) 58299a2dd95SBruce Richardson return -1; 58399a2dd95SBruce Richardson 58499a2dd95SBruce Richardson /* parse each defined socket option */ 58599a2dd95SBruce Richardson errno = 0; 58699a2dd95SBruce Richardson for (i = 0; i < arg_num; i++) { 58799a2dd95SBruce Richardson uint64_t val; 58899a2dd95SBruce Richardson end = NULL; 58999a2dd95SBruce Richardson val = strtoull(arg[i], &end, 10); 59099a2dd95SBruce Richardson 59199a2dd95SBruce Richardson /* check for invalid input */ 59299a2dd95SBruce Richardson if ((errno != 0) || 59399a2dd95SBruce Richardson (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) 59499a2dd95SBruce Richardson return -1; 59599a2dd95SBruce Richardson val <<= 20; 59699a2dd95SBruce Richardson socket_arg[i] = val; 59799a2dd95SBruce Richardson } 59899a2dd95SBruce Richardson 59999a2dd95SBruce Richardson return 0; 60099a2dd95SBruce Richardson } 60199a2dd95SBruce Richardson 60299a2dd95SBruce Richardson static int 60399a2dd95SBruce Richardson eal_parse_vfio_intr(const char *mode) 60499a2dd95SBruce Richardson { 60599a2dd95SBruce Richardson struct internal_config *internal_conf = 60699a2dd95SBruce Richardson eal_get_internal_configuration(); 60799a2dd95SBruce Richardson unsigned i; 60899a2dd95SBruce Richardson static struct { 60999a2dd95SBruce Richardson const char *name; 61099a2dd95SBruce Richardson enum rte_intr_mode value; 61199a2dd95SBruce Richardson } map[] = { 61299a2dd95SBruce Richardson { "legacy", RTE_INTR_MODE_LEGACY }, 61399a2dd95SBruce Richardson { "msi", RTE_INTR_MODE_MSI }, 61499a2dd95SBruce Richardson { "msix", RTE_INTR_MODE_MSIX }, 61599a2dd95SBruce Richardson }; 61699a2dd95SBruce Richardson 61799a2dd95SBruce Richardson for (i = 0; i < RTE_DIM(map); i++) { 61899a2dd95SBruce Richardson if (!strcmp(mode, map[i].name)) { 61999a2dd95SBruce Richardson internal_conf->vfio_intr_mode = map[i].value; 62099a2dd95SBruce Richardson return 0; 62199a2dd95SBruce Richardson } 62299a2dd95SBruce Richardson } 62399a2dd95SBruce Richardson return -1; 62499a2dd95SBruce Richardson } 62599a2dd95SBruce Richardson 62699a2dd95SBruce Richardson static int 62799a2dd95SBruce Richardson eal_parse_vfio_vf_token(const char *vf_token) 62899a2dd95SBruce Richardson { 62999a2dd95SBruce Richardson struct internal_config *cfg = eal_get_internal_configuration(); 63099a2dd95SBruce Richardson rte_uuid_t uuid; 63199a2dd95SBruce Richardson 63299a2dd95SBruce Richardson if (!rte_uuid_parse(vf_token, uuid)) { 63399a2dd95SBruce Richardson rte_uuid_copy(cfg->vfio_vf_token, uuid); 63499a2dd95SBruce Richardson return 0; 63599a2dd95SBruce Richardson } 63699a2dd95SBruce Richardson 63799a2dd95SBruce Richardson return -1; 63899a2dd95SBruce Richardson } 63999a2dd95SBruce Richardson 64099a2dd95SBruce Richardson /* Parse the arguments for --log-level only */ 64199a2dd95SBruce Richardson static void 64299a2dd95SBruce Richardson eal_log_level_parse(int argc, char **argv) 64399a2dd95SBruce Richardson { 64499a2dd95SBruce Richardson int opt; 64599a2dd95SBruce Richardson char **argvopt; 64699a2dd95SBruce Richardson int option_index; 64799a2dd95SBruce Richardson const int old_optind = optind; 64899a2dd95SBruce Richardson const int old_optopt = optopt; 64999a2dd95SBruce Richardson char * const old_optarg = optarg; 65099a2dd95SBruce Richardson struct internal_config *internal_conf = 65199a2dd95SBruce Richardson eal_get_internal_configuration(); 65299a2dd95SBruce Richardson 65399a2dd95SBruce Richardson argvopt = argv; 65499a2dd95SBruce Richardson optind = 1; 65599a2dd95SBruce Richardson 65699a2dd95SBruce Richardson while ((opt = getopt_long(argc, argvopt, eal_short_options, 65799a2dd95SBruce Richardson eal_long_options, &option_index)) != EOF) { 65899a2dd95SBruce Richardson 65999a2dd95SBruce Richardson int ret; 66099a2dd95SBruce Richardson 66199a2dd95SBruce Richardson /* getopt is not happy, stop right now */ 66299a2dd95SBruce Richardson if (opt == '?') 66399a2dd95SBruce Richardson break; 66499a2dd95SBruce Richardson 66599a2dd95SBruce Richardson ret = (opt == OPT_LOG_LEVEL_NUM) ? 66699a2dd95SBruce Richardson eal_parse_common_option(opt, optarg, internal_conf) : 0; 66799a2dd95SBruce Richardson 66899a2dd95SBruce Richardson /* common parser is not happy */ 66999a2dd95SBruce Richardson if (ret < 0) 67099a2dd95SBruce Richardson break; 67199a2dd95SBruce Richardson } 67299a2dd95SBruce Richardson 67399a2dd95SBruce Richardson /* restore getopt lib */ 67499a2dd95SBruce Richardson optind = old_optind; 67599a2dd95SBruce Richardson optopt = old_optopt; 67699a2dd95SBruce Richardson optarg = old_optarg; 67799a2dd95SBruce Richardson } 67899a2dd95SBruce Richardson 67999a2dd95SBruce Richardson /* Parse the argument given in the command line of the application */ 68099a2dd95SBruce Richardson static int 68199a2dd95SBruce Richardson eal_parse_args(int argc, char **argv) 68299a2dd95SBruce Richardson { 68399a2dd95SBruce Richardson int opt, ret; 68499a2dd95SBruce Richardson char **argvopt; 68599a2dd95SBruce Richardson int option_index; 68699a2dd95SBruce Richardson char *prgname = argv[0]; 68799a2dd95SBruce Richardson const int old_optind = optind; 68899a2dd95SBruce Richardson const int old_optopt = optopt; 68999a2dd95SBruce Richardson char * const old_optarg = optarg; 69099a2dd95SBruce Richardson struct internal_config *internal_conf = 69199a2dd95SBruce Richardson eal_get_internal_configuration(); 69299a2dd95SBruce Richardson 69399a2dd95SBruce Richardson argvopt = argv; 69499a2dd95SBruce Richardson optind = 1; 69599a2dd95SBruce Richardson 69699a2dd95SBruce Richardson while ((opt = getopt_long(argc, argvopt, eal_short_options, 69799a2dd95SBruce Richardson eal_long_options, &option_index)) != EOF) { 69899a2dd95SBruce Richardson 69999a2dd95SBruce Richardson /* getopt didn't recognise the option */ 70099a2dd95SBruce Richardson if (opt == '?') { 70199a2dd95SBruce Richardson eal_usage(prgname); 70299a2dd95SBruce Richardson ret = -1; 70399a2dd95SBruce Richardson goto out; 70499a2dd95SBruce Richardson } 70599a2dd95SBruce Richardson 70699a2dd95SBruce Richardson /* eal_log_level_parse() already handled this option */ 70799a2dd95SBruce Richardson if (opt == OPT_LOG_LEVEL_NUM) 70899a2dd95SBruce Richardson continue; 70999a2dd95SBruce Richardson 71099a2dd95SBruce Richardson ret = eal_parse_common_option(opt, optarg, internal_conf); 71199a2dd95SBruce Richardson /* common parser is not happy */ 71299a2dd95SBruce Richardson if (ret < 0) { 71399a2dd95SBruce Richardson eal_usage(prgname); 71499a2dd95SBruce Richardson ret = -1; 71599a2dd95SBruce Richardson goto out; 71699a2dd95SBruce Richardson } 71799a2dd95SBruce Richardson /* common parser handled this option */ 71899a2dd95SBruce Richardson if (ret == 0) 71999a2dd95SBruce Richardson continue; 72099a2dd95SBruce Richardson 72199a2dd95SBruce Richardson switch (opt) { 72299a2dd95SBruce Richardson case 'h': 72399a2dd95SBruce Richardson eal_usage(prgname); 72499a2dd95SBruce Richardson exit(EXIT_SUCCESS); 72599a2dd95SBruce Richardson 72699a2dd95SBruce Richardson case OPT_HUGE_DIR_NUM: 72799a2dd95SBruce Richardson { 72899a2dd95SBruce Richardson char *hdir = strdup(optarg); 72999a2dd95SBruce Richardson if (hdir == NULL) 73099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); 73199a2dd95SBruce Richardson else { 73299a2dd95SBruce Richardson /* free old hugepage dir */ 73399a2dd95SBruce Richardson if (internal_conf->hugepage_dir != NULL) 73499a2dd95SBruce Richardson free(internal_conf->hugepage_dir); 73599a2dd95SBruce Richardson internal_conf->hugepage_dir = hdir; 73699a2dd95SBruce Richardson } 73799a2dd95SBruce Richardson break; 73899a2dd95SBruce Richardson } 73999a2dd95SBruce Richardson case OPT_FILE_PREFIX_NUM: 74099a2dd95SBruce Richardson { 74199a2dd95SBruce Richardson char *prefix = strdup(optarg); 74299a2dd95SBruce Richardson if (prefix == NULL) 74399a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Could not store file prefix\n"); 74499a2dd95SBruce Richardson else { 74599a2dd95SBruce Richardson /* free old prefix */ 74699a2dd95SBruce Richardson if (internal_conf->hugefile_prefix != NULL) 74799a2dd95SBruce Richardson free(internal_conf->hugefile_prefix); 74899a2dd95SBruce Richardson internal_conf->hugefile_prefix = prefix; 74999a2dd95SBruce Richardson } 75099a2dd95SBruce Richardson break; 75199a2dd95SBruce Richardson } 75299a2dd95SBruce Richardson case OPT_SOCKET_MEM_NUM: 75399a2dd95SBruce Richardson if (eal_parse_socket_arg(optarg, 75499a2dd95SBruce Richardson internal_conf->socket_mem) < 0) { 75599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "invalid parameters for --" 75699a2dd95SBruce Richardson OPT_SOCKET_MEM "\n"); 75799a2dd95SBruce Richardson eal_usage(prgname); 75899a2dd95SBruce Richardson ret = -1; 75999a2dd95SBruce Richardson goto out; 76099a2dd95SBruce Richardson } 76199a2dd95SBruce Richardson internal_conf->force_sockets = 1; 76299a2dd95SBruce Richardson break; 76399a2dd95SBruce Richardson 76499a2dd95SBruce Richardson case OPT_SOCKET_LIMIT_NUM: 76599a2dd95SBruce Richardson if (eal_parse_socket_arg(optarg, 76699a2dd95SBruce Richardson internal_conf->socket_limit) < 0) { 76799a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "invalid parameters for --" 76899a2dd95SBruce Richardson OPT_SOCKET_LIMIT "\n"); 76999a2dd95SBruce Richardson eal_usage(prgname); 77099a2dd95SBruce Richardson ret = -1; 77199a2dd95SBruce Richardson goto out; 77299a2dd95SBruce Richardson } 77399a2dd95SBruce Richardson internal_conf->force_socket_limits = 1; 77499a2dd95SBruce Richardson break; 77599a2dd95SBruce Richardson 77699a2dd95SBruce Richardson case OPT_VFIO_INTR_NUM: 77799a2dd95SBruce Richardson if (eal_parse_vfio_intr(optarg) < 0) { 77899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "invalid parameters for --" 77999a2dd95SBruce Richardson OPT_VFIO_INTR "\n"); 78099a2dd95SBruce Richardson eal_usage(prgname); 78199a2dd95SBruce Richardson ret = -1; 78299a2dd95SBruce Richardson goto out; 78399a2dd95SBruce Richardson } 78499a2dd95SBruce Richardson break; 78599a2dd95SBruce Richardson 78699a2dd95SBruce Richardson case OPT_VFIO_VF_TOKEN_NUM: 78799a2dd95SBruce Richardson if (eal_parse_vfio_vf_token(optarg) < 0) { 78899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "invalid parameters for --" 78999a2dd95SBruce Richardson OPT_VFIO_VF_TOKEN "\n"); 79099a2dd95SBruce Richardson eal_usage(prgname); 79199a2dd95SBruce Richardson ret = -1; 79299a2dd95SBruce Richardson goto out; 79399a2dd95SBruce Richardson } 79499a2dd95SBruce Richardson break; 79599a2dd95SBruce Richardson 79699a2dd95SBruce Richardson case OPT_CREATE_UIO_DEV_NUM: 79799a2dd95SBruce Richardson internal_conf->create_uio_dev = 1; 79899a2dd95SBruce Richardson break; 79999a2dd95SBruce Richardson 80099a2dd95SBruce Richardson case OPT_MBUF_POOL_OPS_NAME_NUM: 80199a2dd95SBruce Richardson { 80299a2dd95SBruce Richardson char *ops_name = strdup(optarg); 80399a2dd95SBruce Richardson if (ops_name == NULL) 80499a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); 80599a2dd95SBruce Richardson else { 80699a2dd95SBruce Richardson /* free old ops name */ 80799a2dd95SBruce Richardson if (internal_conf->user_mbuf_pool_ops_name != 80899a2dd95SBruce Richardson NULL) 80999a2dd95SBruce Richardson free(internal_conf->user_mbuf_pool_ops_name); 81099a2dd95SBruce Richardson 81199a2dd95SBruce Richardson internal_conf->user_mbuf_pool_ops_name = 81299a2dd95SBruce Richardson ops_name; 81399a2dd95SBruce Richardson } 81499a2dd95SBruce Richardson break; 81599a2dd95SBruce Richardson } 81699a2dd95SBruce Richardson case OPT_MATCH_ALLOCATIONS_NUM: 81799a2dd95SBruce Richardson internal_conf->match_allocations = 1; 81899a2dd95SBruce Richardson break; 81999a2dd95SBruce Richardson 82099a2dd95SBruce Richardson default: 82199a2dd95SBruce Richardson if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { 82299a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Option %c is not supported " 82399a2dd95SBruce Richardson "on Linux\n", opt); 82499a2dd95SBruce Richardson } else if (opt >= OPT_LONG_MIN_NUM && 82599a2dd95SBruce Richardson opt < OPT_LONG_MAX_NUM) { 82699a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Option %s is not supported " 82799a2dd95SBruce Richardson "on Linux\n", 82899a2dd95SBruce Richardson eal_long_options[option_index].name); 82999a2dd95SBruce Richardson } else { 83099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Option %d is not supported " 83199a2dd95SBruce Richardson "on Linux\n", opt); 83299a2dd95SBruce Richardson } 83399a2dd95SBruce Richardson eal_usage(prgname); 83499a2dd95SBruce Richardson ret = -1; 83599a2dd95SBruce Richardson goto out; 83699a2dd95SBruce Richardson } 83799a2dd95SBruce Richardson } 83899a2dd95SBruce Richardson 839ce382fddSBruce Richardson /* create runtime data directory. In no_shconf mode, skip any errors */ 840ce382fddSBruce Richardson if (eal_create_runtime_dir() < 0) { 841ce382fddSBruce Richardson if (internal_conf->no_shconf == 0) { 84299a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); 84399a2dd95SBruce Richardson ret = -1; 84499a2dd95SBruce Richardson goto out; 845ce382fddSBruce Richardson } else 846ce382fddSBruce Richardson RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n"); 84799a2dd95SBruce Richardson } 84899a2dd95SBruce Richardson 84999a2dd95SBruce Richardson if (eal_adjust_config(internal_conf) != 0) { 85099a2dd95SBruce Richardson ret = -1; 85199a2dd95SBruce Richardson goto out; 85299a2dd95SBruce Richardson } 85399a2dd95SBruce Richardson 85499a2dd95SBruce Richardson /* sanity checks */ 85599a2dd95SBruce Richardson if (eal_check_common_options(internal_conf) != 0) { 85699a2dd95SBruce Richardson eal_usage(prgname); 85799a2dd95SBruce Richardson ret = -1; 85899a2dd95SBruce Richardson goto out; 85999a2dd95SBruce Richardson } 86099a2dd95SBruce Richardson 86199a2dd95SBruce Richardson if (optind >= 0) 86299a2dd95SBruce Richardson argv[optind-1] = prgname; 86399a2dd95SBruce Richardson ret = optind-1; 86499a2dd95SBruce Richardson 86599a2dd95SBruce Richardson out: 86699a2dd95SBruce Richardson /* restore getopt lib */ 86799a2dd95SBruce Richardson optind = old_optind; 86899a2dd95SBruce Richardson optopt = old_optopt; 86999a2dd95SBruce Richardson optarg = old_optarg; 87099a2dd95SBruce Richardson 87199a2dd95SBruce Richardson return ret; 87299a2dd95SBruce Richardson } 87399a2dd95SBruce Richardson 87499a2dd95SBruce Richardson static int 87599a2dd95SBruce Richardson check_socket(const struct rte_memseg_list *msl, void *arg) 87699a2dd95SBruce Richardson { 87799a2dd95SBruce Richardson int *socket_id = arg; 87899a2dd95SBruce Richardson 87999a2dd95SBruce Richardson if (msl->external) 88099a2dd95SBruce Richardson return 0; 88199a2dd95SBruce Richardson 88299a2dd95SBruce Richardson return *socket_id == msl->socket_id; 88399a2dd95SBruce Richardson } 88499a2dd95SBruce Richardson 88599a2dd95SBruce Richardson static void 88699a2dd95SBruce Richardson eal_check_mem_on_local_socket(void) 88799a2dd95SBruce Richardson { 88899a2dd95SBruce Richardson int socket_id; 88999a2dd95SBruce Richardson const struct rte_config *config = rte_eal_get_configuration(); 89099a2dd95SBruce Richardson 89199a2dd95SBruce Richardson socket_id = rte_lcore_to_socket_id(config->main_lcore); 89299a2dd95SBruce Richardson 89399a2dd95SBruce Richardson if (rte_memseg_list_walk(check_socket, &socket_id) == 0) 89499a2dd95SBruce Richardson RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n"); 89599a2dd95SBruce Richardson } 89699a2dd95SBruce Richardson 89799a2dd95SBruce Richardson static int 89899a2dd95SBruce Richardson sync_func(__rte_unused void *arg) 89999a2dd95SBruce Richardson { 90099a2dd95SBruce Richardson return 0; 90199a2dd95SBruce Richardson } 90299a2dd95SBruce Richardson 90399a2dd95SBruce Richardson /* 90499a2dd95SBruce Richardson * Request iopl privilege for all RPL, returns 0 on success 90599a2dd95SBruce Richardson * iopl() call is mostly for the i386 architecture. For other architectures, 90699a2dd95SBruce Richardson * return -1 to indicate IO privilege can't be changed in this way. 90799a2dd95SBruce Richardson */ 90899a2dd95SBruce Richardson int 90999a2dd95SBruce Richardson rte_eal_iopl_init(void) 91099a2dd95SBruce Richardson { 91199a2dd95SBruce Richardson #if defined(RTE_ARCH_X86) 91299a2dd95SBruce Richardson if (iopl(3) != 0) 91399a2dd95SBruce Richardson return -1; 91499a2dd95SBruce Richardson #endif 91599a2dd95SBruce Richardson return 0; 91699a2dd95SBruce Richardson } 91799a2dd95SBruce Richardson 91899a2dd95SBruce Richardson #ifdef VFIO_PRESENT 91999a2dd95SBruce Richardson static int rte_eal_vfio_setup(void) 92099a2dd95SBruce Richardson { 92199a2dd95SBruce Richardson if (rte_vfio_enable("vfio")) 92299a2dd95SBruce Richardson return -1; 92399a2dd95SBruce Richardson 92499a2dd95SBruce Richardson return 0; 92599a2dd95SBruce Richardson } 92699a2dd95SBruce Richardson #endif 92799a2dd95SBruce Richardson 92899a2dd95SBruce Richardson static void rte_eal_init_alert(const char *msg) 92999a2dd95SBruce Richardson { 93099a2dd95SBruce Richardson fprintf(stderr, "EAL: FATAL: %s\n", msg); 93199a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "%s\n", msg); 93299a2dd95SBruce Richardson } 93399a2dd95SBruce Richardson 93499a2dd95SBruce Richardson /* 93599a2dd95SBruce Richardson * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the 93699a2dd95SBruce Richardson * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel 93799a2dd95SBruce Richardson * IOMMU groups. If IOMMU is not enabled, that path would be empty. 93899a2dd95SBruce Richardson * Therefore, checking if the path is empty will tell us if IOMMU is enabled. 93999a2dd95SBruce Richardson */ 94099a2dd95SBruce Richardson static bool 94199a2dd95SBruce Richardson is_iommu_enabled(void) 94299a2dd95SBruce Richardson { 94399a2dd95SBruce Richardson DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); 94499a2dd95SBruce Richardson struct dirent *d; 94599a2dd95SBruce Richardson int n = 0; 94699a2dd95SBruce Richardson 94799a2dd95SBruce Richardson /* if directory doesn't exist, assume IOMMU is not enabled */ 94899a2dd95SBruce Richardson if (dir == NULL) 94999a2dd95SBruce Richardson return false; 95099a2dd95SBruce Richardson 95199a2dd95SBruce Richardson while ((d = readdir(dir)) != NULL) { 95299a2dd95SBruce Richardson /* skip dot and dot-dot */ 95399a2dd95SBruce Richardson if (++n > 2) 95499a2dd95SBruce Richardson break; 95599a2dd95SBruce Richardson } 95699a2dd95SBruce Richardson closedir(dir); 95799a2dd95SBruce Richardson 95899a2dd95SBruce Richardson return n > 2; 95999a2dd95SBruce Richardson } 96099a2dd95SBruce Richardson 96199a2dd95SBruce Richardson /* Launch threads, called at application init(). */ 96299a2dd95SBruce Richardson int 96399a2dd95SBruce Richardson rte_eal_init(int argc, char **argv) 96499a2dd95SBruce Richardson { 96599a2dd95SBruce Richardson int i, fctret, ret; 96699a2dd95SBruce Richardson pthread_t thread_id; 96799a2dd95SBruce Richardson static uint32_t run_once; 96899a2dd95SBruce Richardson uint32_t has_run = 0; 96999a2dd95SBruce Richardson const char *p; 97099a2dd95SBruce Richardson static char logid[PATH_MAX]; 97199a2dd95SBruce Richardson char cpuset[RTE_CPU_AFFINITY_STR_LEN]; 97299a2dd95SBruce Richardson char thread_name[RTE_MAX_THREAD_NAME_LEN]; 97399a2dd95SBruce Richardson bool phys_addrs; 97499a2dd95SBruce Richardson const struct rte_config *config = rte_eal_get_configuration(); 97599a2dd95SBruce Richardson struct internal_config *internal_conf = 97699a2dd95SBruce Richardson eal_get_internal_configuration(); 97799a2dd95SBruce Richardson 97899a2dd95SBruce Richardson /* checks if the machine is adequate */ 97999a2dd95SBruce Richardson if (!rte_cpu_is_supported()) { 98099a2dd95SBruce Richardson rte_eal_init_alert("unsupported cpu type."); 98199a2dd95SBruce Richardson rte_errno = ENOTSUP; 98299a2dd95SBruce Richardson return -1; 98399a2dd95SBruce Richardson } 98499a2dd95SBruce Richardson 98599a2dd95SBruce Richardson if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0, 98699a2dd95SBruce Richardson __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 98799a2dd95SBruce Richardson rte_eal_init_alert("already called initialization."); 98899a2dd95SBruce Richardson rte_errno = EALREADY; 98999a2dd95SBruce Richardson return -1; 99099a2dd95SBruce Richardson } 99199a2dd95SBruce Richardson 99299a2dd95SBruce Richardson p = strrchr(argv[0], '/'); 99399a2dd95SBruce Richardson strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid)); 99499a2dd95SBruce Richardson thread_id = pthread_self(); 99599a2dd95SBruce Richardson 99699a2dd95SBruce Richardson eal_reset_internal_config(internal_conf); 99799a2dd95SBruce Richardson 99899a2dd95SBruce Richardson /* set log level as early as possible */ 99999a2dd95SBruce Richardson eal_log_level_parse(argc, argv); 100099a2dd95SBruce Richardson 100199a2dd95SBruce Richardson /* clone argv to report out later in telemetry */ 100299a2dd95SBruce Richardson eal_save_args(argc, argv); 100399a2dd95SBruce Richardson 100499a2dd95SBruce Richardson if (rte_eal_cpu_init() < 0) { 100599a2dd95SBruce Richardson rte_eal_init_alert("Cannot detect lcores."); 100699a2dd95SBruce Richardson rte_errno = ENOTSUP; 100799a2dd95SBruce Richardson return -1; 100899a2dd95SBruce Richardson } 100999a2dd95SBruce Richardson 101099a2dd95SBruce Richardson fctret = eal_parse_args(argc, argv); 101199a2dd95SBruce Richardson if (fctret < 0) { 101299a2dd95SBruce Richardson rte_eal_init_alert("Invalid 'command line' arguments."); 101399a2dd95SBruce Richardson rte_errno = EINVAL; 101499a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 101599a2dd95SBruce Richardson return -1; 101699a2dd95SBruce Richardson } 101799a2dd95SBruce Richardson 101899a2dd95SBruce Richardson if (eal_plugins_init() < 0) { 101999a2dd95SBruce Richardson rte_eal_init_alert("Cannot init plugins"); 102099a2dd95SBruce Richardson rte_errno = EINVAL; 102199a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 102299a2dd95SBruce Richardson return -1; 102399a2dd95SBruce Richardson } 102499a2dd95SBruce Richardson 102599a2dd95SBruce Richardson if (eal_trace_init() < 0) { 102699a2dd95SBruce Richardson rte_eal_init_alert("Cannot init trace"); 102799a2dd95SBruce Richardson rte_errno = EFAULT; 102899a2dd95SBruce Richardson return -1; 102999a2dd95SBruce Richardson } 103099a2dd95SBruce Richardson 103199a2dd95SBruce Richardson if (eal_option_device_parse()) { 103299a2dd95SBruce Richardson rte_errno = ENODEV; 103399a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 103499a2dd95SBruce Richardson return -1; 103599a2dd95SBruce Richardson } 103699a2dd95SBruce Richardson 103799a2dd95SBruce Richardson if (rte_config_init() < 0) { 103899a2dd95SBruce Richardson rte_eal_init_alert("Cannot init config"); 103999a2dd95SBruce Richardson return -1; 104099a2dd95SBruce Richardson } 104199a2dd95SBruce Richardson 104299a2dd95SBruce Richardson if (rte_eal_intr_init() < 0) { 104399a2dd95SBruce Richardson rte_eal_init_alert("Cannot init interrupt-handling thread"); 104499a2dd95SBruce Richardson return -1; 104599a2dd95SBruce Richardson } 104699a2dd95SBruce Richardson 104799a2dd95SBruce Richardson if (rte_eal_alarm_init() < 0) { 104899a2dd95SBruce Richardson rte_eal_init_alert("Cannot init alarm"); 104999a2dd95SBruce Richardson /* rte_eal_alarm_init sets rte_errno on failure. */ 105099a2dd95SBruce Richardson return -1; 105199a2dd95SBruce Richardson } 105299a2dd95SBruce Richardson 105399a2dd95SBruce Richardson /* Put mp channel init before bus scan so that we can init the vdev 105499a2dd95SBruce Richardson * bus through mp channel in the secondary process before the bus scan. 105599a2dd95SBruce Richardson */ 105699a2dd95SBruce Richardson if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) { 105799a2dd95SBruce Richardson rte_eal_init_alert("failed to init mp channel"); 105899a2dd95SBruce Richardson if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 105999a2dd95SBruce Richardson rte_errno = EFAULT; 106099a2dd95SBruce Richardson return -1; 106199a2dd95SBruce Richardson } 106299a2dd95SBruce Richardson } 106399a2dd95SBruce Richardson 106499a2dd95SBruce Richardson /* register multi-process action callbacks for hotplug */ 106599a2dd95SBruce Richardson if (eal_mp_dev_hotplug_init() < 0) { 106699a2dd95SBruce Richardson rte_eal_init_alert("failed to register mp callback for hotplug"); 106799a2dd95SBruce Richardson return -1; 106899a2dd95SBruce Richardson } 106999a2dd95SBruce Richardson 107099a2dd95SBruce Richardson if (rte_bus_scan()) { 107199a2dd95SBruce Richardson rte_eal_init_alert("Cannot scan the buses for devices"); 107299a2dd95SBruce Richardson rte_errno = ENODEV; 107399a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 107499a2dd95SBruce Richardson return -1; 107599a2dd95SBruce Richardson } 107699a2dd95SBruce Richardson 107799a2dd95SBruce Richardson phys_addrs = rte_eal_using_phys_addrs() != 0; 107899a2dd95SBruce Richardson 107999a2dd95SBruce Richardson /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ 108099a2dd95SBruce Richardson if (internal_conf->iova_mode == RTE_IOVA_DC) { 108199a2dd95SBruce Richardson /* autodetect the IOVA mapping mode */ 108299a2dd95SBruce Richardson enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); 108399a2dd95SBruce Richardson 108499a2dd95SBruce Richardson if (iova_mode == RTE_IOVA_DC) { 108599a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); 108699a2dd95SBruce Richardson 108799a2dd95SBruce Richardson if (!phys_addrs) { 108899a2dd95SBruce Richardson /* if we have no access to physical addresses, 108999a2dd95SBruce Richardson * pick IOVA as VA mode. 109099a2dd95SBruce Richardson */ 109199a2dd95SBruce Richardson iova_mode = RTE_IOVA_VA; 109299a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); 109399a2dd95SBruce Richardson #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) 109499a2dd95SBruce Richardson } else if (rte_eal_check_module("rte_kni") == 1) { 109599a2dd95SBruce Richardson iova_mode = RTE_IOVA_PA; 109699a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n"); 109799a2dd95SBruce Richardson #endif 109899a2dd95SBruce Richardson } else if (is_iommu_enabled()) { 109999a2dd95SBruce Richardson /* we have an IOMMU, pick IOVA as VA mode */ 110099a2dd95SBruce Richardson iova_mode = RTE_IOVA_VA; 110199a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); 110299a2dd95SBruce Richardson } else { 110399a2dd95SBruce Richardson /* physical addresses available, and no IOMMU 110499a2dd95SBruce Richardson * found, so pick IOVA as PA. 110599a2dd95SBruce Richardson */ 110699a2dd95SBruce Richardson iova_mode = RTE_IOVA_PA; 110799a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); 110899a2dd95SBruce Richardson } 110999a2dd95SBruce Richardson } 111099a2dd95SBruce Richardson #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) 111199a2dd95SBruce Richardson /* Workaround for KNI which requires physical address to work 111299a2dd95SBruce Richardson * in kernels < 4.10 111399a2dd95SBruce Richardson */ 111499a2dd95SBruce Richardson if (iova_mode == RTE_IOVA_VA && 111599a2dd95SBruce Richardson rte_eal_check_module("rte_kni") == 1) { 111699a2dd95SBruce Richardson if (phys_addrs) { 111799a2dd95SBruce Richardson iova_mode = RTE_IOVA_PA; 111899a2dd95SBruce Richardson RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); 111999a2dd95SBruce Richardson } else { 112099a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); 112199a2dd95SBruce Richardson } 112299a2dd95SBruce Richardson } 112399a2dd95SBruce Richardson #endif 112499a2dd95SBruce Richardson rte_eal_get_configuration()->iova_mode = iova_mode; 112599a2dd95SBruce Richardson } else { 112699a2dd95SBruce Richardson rte_eal_get_configuration()->iova_mode = 112799a2dd95SBruce Richardson internal_conf->iova_mode; 112899a2dd95SBruce Richardson } 112999a2dd95SBruce Richardson 113099a2dd95SBruce Richardson if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { 113199a2dd95SBruce Richardson rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); 113299a2dd95SBruce Richardson rte_errno = EINVAL; 113399a2dd95SBruce Richardson return -1; 113499a2dd95SBruce Richardson } 113599a2dd95SBruce Richardson 113699a2dd95SBruce Richardson RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", 113799a2dd95SBruce Richardson rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); 113899a2dd95SBruce Richardson 113999a2dd95SBruce Richardson if (internal_conf->no_hugetlbfs == 0) { 114099a2dd95SBruce Richardson /* rte_config isn't initialized yet */ 114199a2dd95SBruce Richardson ret = internal_conf->process_type == RTE_PROC_PRIMARY ? 114299a2dd95SBruce Richardson eal_hugepage_info_init() : 114399a2dd95SBruce Richardson eal_hugepage_info_read(); 114499a2dd95SBruce Richardson if (ret < 0) { 114599a2dd95SBruce Richardson rte_eal_init_alert("Cannot get hugepage information."); 114699a2dd95SBruce Richardson rte_errno = EACCES; 114799a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 114899a2dd95SBruce Richardson return -1; 114999a2dd95SBruce Richardson } 115099a2dd95SBruce Richardson } 115199a2dd95SBruce Richardson 115299a2dd95SBruce Richardson if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) { 115399a2dd95SBruce Richardson if (internal_conf->no_hugetlbfs) 115499a2dd95SBruce Richardson internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; 115599a2dd95SBruce Richardson } 115699a2dd95SBruce Richardson 115799a2dd95SBruce Richardson if (internal_conf->vmware_tsc_map == 1) { 115899a2dd95SBruce Richardson #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT 115999a2dd95SBruce Richardson rte_cycles_vmware_tsc_map = 1; 116099a2dd95SBruce Richardson RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " 116199a2dd95SBruce Richardson "you must have monitor_control.pseudo_perfctr = TRUE\n"); 116299a2dd95SBruce Richardson #else 116399a2dd95SBruce Richardson RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " 116499a2dd95SBruce Richardson "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); 116599a2dd95SBruce Richardson #endif 116699a2dd95SBruce Richardson } 116799a2dd95SBruce Richardson 116899a2dd95SBruce Richardson if (eal_log_init(logid, internal_conf->syslog_facility) < 0) { 116999a2dd95SBruce Richardson rte_eal_init_alert("Cannot init logging."); 117099a2dd95SBruce Richardson rte_errno = ENOMEM; 117199a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 117299a2dd95SBruce Richardson return -1; 117399a2dd95SBruce Richardson } 117499a2dd95SBruce Richardson 117599a2dd95SBruce Richardson #ifdef VFIO_PRESENT 117699a2dd95SBruce Richardson if (rte_eal_vfio_setup() < 0) { 117799a2dd95SBruce Richardson rte_eal_init_alert("Cannot init VFIO"); 117899a2dd95SBruce Richardson rte_errno = EAGAIN; 117999a2dd95SBruce Richardson __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 118099a2dd95SBruce Richardson return -1; 118199a2dd95SBruce Richardson } 118299a2dd95SBruce Richardson #endif 118399a2dd95SBruce Richardson /* in secondary processes, memory init may allocate additional fbarrays 118499a2dd95SBruce Richardson * not present in primary processes, so to avoid any potential issues, 118599a2dd95SBruce Richardson * initialize memzones first. 118699a2dd95SBruce Richardson */ 118799a2dd95SBruce Richardson if (rte_eal_memzone_init() < 0) { 118899a2dd95SBruce Richardson rte_eal_init_alert("Cannot init memzone"); 118999a2dd95SBruce Richardson rte_errno = ENODEV; 119099a2dd95SBruce Richardson return -1; 119199a2dd95SBruce Richardson } 119299a2dd95SBruce Richardson 119399a2dd95SBruce Richardson if (rte_eal_memory_init() < 0) { 119499a2dd95SBruce Richardson rte_eal_init_alert("Cannot init memory"); 119599a2dd95SBruce Richardson rte_errno = ENOMEM; 119699a2dd95SBruce Richardson return -1; 119799a2dd95SBruce Richardson } 119899a2dd95SBruce Richardson 119999a2dd95SBruce Richardson /* the directories are locked during eal_hugepage_info_init */ 120099a2dd95SBruce Richardson eal_hugedirs_unlock(); 120199a2dd95SBruce Richardson 120299a2dd95SBruce Richardson if (rte_eal_malloc_heap_init() < 0) { 120399a2dd95SBruce Richardson rte_eal_init_alert("Cannot init malloc heap"); 120499a2dd95SBruce Richardson rte_errno = ENODEV; 120599a2dd95SBruce Richardson return -1; 120699a2dd95SBruce Richardson } 120799a2dd95SBruce Richardson 120899a2dd95SBruce Richardson if (rte_eal_tailqs_init() < 0) { 120999a2dd95SBruce Richardson rte_eal_init_alert("Cannot init tail queues for objects"); 121099a2dd95SBruce Richardson rte_errno = EFAULT; 121199a2dd95SBruce Richardson return -1; 121299a2dd95SBruce Richardson } 121399a2dd95SBruce Richardson 121499a2dd95SBruce Richardson if (rte_eal_timer_init() < 0) { 121599a2dd95SBruce Richardson rte_eal_init_alert("Cannot init HPET or TSC timers"); 121699a2dd95SBruce Richardson rte_errno = ENOTSUP; 121799a2dd95SBruce Richardson return -1; 121899a2dd95SBruce Richardson } 121999a2dd95SBruce Richardson 122099a2dd95SBruce Richardson eal_check_mem_on_local_socket(); 122199a2dd95SBruce Richardson 122299a2dd95SBruce Richardson if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t), 122399a2dd95SBruce Richardson &lcore_config[config->main_lcore].cpuset) != 0) { 122499a2dd95SBruce Richardson rte_eal_init_alert("Cannot set affinity"); 122599a2dd95SBruce Richardson rte_errno = EINVAL; 122699a2dd95SBruce Richardson return -1; 122799a2dd95SBruce Richardson } 122899a2dd95SBruce Richardson __rte_thread_init(config->main_lcore, 122999a2dd95SBruce Richardson &lcore_config[config->main_lcore].cpuset); 123099a2dd95SBruce Richardson 123199a2dd95SBruce Richardson ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); 123299a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n", 123399a2dd95SBruce Richardson config->main_lcore, (uintptr_t)thread_id, cpuset, 123499a2dd95SBruce Richardson ret == 0 ? "" : "..."); 123599a2dd95SBruce Richardson 123699a2dd95SBruce Richardson RTE_LCORE_FOREACH_WORKER(i) { 123799a2dd95SBruce Richardson 123899a2dd95SBruce Richardson /* 123999a2dd95SBruce Richardson * create communication pipes between main thread 124099a2dd95SBruce Richardson * and children 124199a2dd95SBruce Richardson */ 124299a2dd95SBruce Richardson if (pipe(lcore_config[i].pipe_main2worker) < 0) 124399a2dd95SBruce Richardson rte_panic("Cannot create pipe\n"); 124499a2dd95SBruce Richardson if (pipe(lcore_config[i].pipe_worker2main) < 0) 124599a2dd95SBruce Richardson rte_panic("Cannot create pipe\n"); 124699a2dd95SBruce Richardson 124799a2dd95SBruce Richardson lcore_config[i].state = WAIT; 124899a2dd95SBruce Richardson 124999a2dd95SBruce Richardson /* create a thread for each lcore */ 125099a2dd95SBruce Richardson ret = pthread_create(&lcore_config[i].thread_id, NULL, 125199a2dd95SBruce Richardson eal_thread_loop, NULL); 125299a2dd95SBruce Richardson if (ret != 0) 125399a2dd95SBruce Richardson rte_panic("Cannot create thread\n"); 125499a2dd95SBruce Richardson 125599a2dd95SBruce Richardson /* Set thread_name for aid in debugging. */ 125699a2dd95SBruce Richardson snprintf(thread_name, sizeof(thread_name), 125799a2dd95SBruce Richardson "lcore-worker-%d", i); 125899a2dd95SBruce Richardson ret = rte_thread_setname(lcore_config[i].thread_id, 125999a2dd95SBruce Richardson thread_name); 126099a2dd95SBruce Richardson if (ret != 0) 126199a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, 126299a2dd95SBruce Richardson "Cannot set name for lcore thread\n"); 126399a2dd95SBruce Richardson 126499a2dd95SBruce Richardson ret = pthread_setaffinity_np(lcore_config[i].thread_id, 126599a2dd95SBruce Richardson sizeof(rte_cpuset_t), &lcore_config[i].cpuset); 126699a2dd95SBruce Richardson if (ret != 0) 126799a2dd95SBruce Richardson rte_panic("Cannot set affinity\n"); 126899a2dd95SBruce Richardson } 126999a2dd95SBruce Richardson 127099a2dd95SBruce Richardson /* 127199a2dd95SBruce Richardson * Launch a dummy function on all worker lcores, so that main lcore 127299a2dd95SBruce Richardson * knows they are all ready when this function returns. 127399a2dd95SBruce Richardson */ 127499a2dd95SBruce Richardson rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN); 127599a2dd95SBruce Richardson rte_eal_mp_wait_lcore(); 127699a2dd95SBruce Richardson 127799a2dd95SBruce Richardson /* initialize services so vdevs register service during bus_probe. */ 127899a2dd95SBruce Richardson ret = rte_service_init(); 127999a2dd95SBruce Richardson if (ret) { 128099a2dd95SBruce Richardson rte_eal_init_alert("rte_service_init() failed"); 128199a2dd95SBruce Richardson rte_errno = -ret; 128299a2dd95SBruce Richardson return -1; 128399a2dd95SBruce Richardson } 128499a2dd95SBruce Richardson 128599a2dd95SBruce Richardson /* Probe all the buses and devices/drivers on them */ 128699a2dd95SBruce Richardson if (rte_bus_probe()) { 128799a2dd95SBruce Richardson rte_eal_init_alert("Cannot probe devices"); 128899a2dd95SBruce Richardson rte_errno = ENOTSUP; 128999a2dd95SBruce Richardson return -1; 129099a2dd95SBruce Richardson } 129199a2dd95SBruce Richardson 129299a2dd95SBruce Richardson #ifdef VFIO_PRESENT 129399a2dd95SBruce Richardson /* Register mp action after probe() so that we got enough info */ 129499a2dd95SBruce Richardson if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) 129599a2dd95SBruce Richardson return -1; 129699a2dd95SBruce Richardson #endif 129799a2dd95SBruce Richardson 129899a2dd95SBruce Richardson /* initialize default service/lcore mappings and start running. Ignore 129999a2dd95SBruce Richardson * -ENOTSUP, as it indicates no service coremask passed to EAL. 130099a2dd95SBruce Richardson */ 130199a2dd95SBruce Richardson ret = rte_service_start_with_defaults(); 130299a2dd95SBruce Richardson if (ret < 0 && ret != -ENOTSUP) { 130399a2dd95SBruce Richardson rte_errno = -ret; 130499a2dd95SBruce Richardson return -1; 130599a2dd95SBruce Richardson } 130699a2dd95SBruce Richardson 130799a2dd95SBruce Richardson /* 130899a2dd95SBruce Richardson * Clean up unused files in runtime directory. We do this at the end of 130999a2dd95SBruce Richardson * init and not at the beginning because we want to clean stuff up 131099a2dd95SBruce Richardson * whether we are primary or secondary process, but we cannot remove 131199a2dd95SBruce Richardson * primary process' files because secondary should be able to run even 131299a2dd95SBruce Richardson * if primary process is dead. 131399a2dd95SBruce Richardson * 131499a2dd95SBruce Richardson * In no_shconf mode, no runtime directory is created in the first 131599a2dd95SBruce Richardson * place, so no cleanup needed. 131699a2dd95SBruce Richardson */ 131799a2dd95SBruce Richardson if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) { 131899a2dd95SBruce Richardson rte_eal_init_alert("Cannot clear runtime directory"); 131999a2dd95SBruce Richardson return -1; 132099a2dd95SBruce Richardson } 1321e89463a3SBruce Richardson if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) { 132299a2dd95SBruce Richardson int tlog = rte_log_register_type_and_pick_level( 132399a2dd95SBruce Richardson "lib.telemetry", RTE_LOG_WARNING); 132499a2dd95SBruce Richardson if (tlog < 0) 132599a2dd95SBruce Richardson tlog = RTE_LOGTYPE_EAL; 132699a2dd95SBruce Richardson if (rte_telemetry_init(rte_eal_get_runtime_dir(), 132799a2dd95SBruce Richardson rte_version(), 132899a2dd95SBruce Richardson &internal_conf->ctrl_cpuset, rte_log, tlog) != 0) 132999a2dd95SBruce Richardson return -1; 133099a2dd95SBruce Richardson } 133199a2dd95SBruce Richardson 133299a2dd95SBruce Richardson eal_mcfg_complete(); 133399a2dd95SBruce Richardson 133499a2dd95SBruce Richardson return fctret; 133599a2dd95SBruce Richardson } 133699a2dd95SBruce Richardson 133799a2dd95SBruce Richardson static int 133899a2dd95SBruce Richardson mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 133999a2dd95SBruce Richardson void *arg __rte_unused) 134099a2dd95SBruce Richardson { 134199a2dd95SBruce Richardson /* ms is const, so find this memseg */ 134299a2dd95SBruce Richardson struct rte_memseg *found; 134399a2dd95SBruce Richardson 134499a2dd95SBruce Richardson if (msl->external) 134599a2dd95SBruce Richardson return 0; 134699a2dd95SBruce Richardson 134799a2dd95SBruce Richardson found = rte_mem_virt2memseg(ms->addr, msl); 134899a2dd95SBruce Richardson 134999a2dd95SBruce Richardson found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; 135099a2dd95SBruce Richardson 135199a2dd95SBruce Richardson return 0; 135299a2dd95SBruce Richardson } 135399a2dd95SBruce Richardson 135499a2dd95SBruce Richardson int 135599a2dd95SBruce Richardson rte_eal_cleanup(void) 135699a2dd95SBruce Richardson { 135799a2dd95SBruce Richardson /* if we're in a primary process, we need to mark hugepages as freeable 135899a2dd95SBruce Richardson * so that finalization can release them back to the system. 135999a2dd95SBruce Richardson */ 136099a2dd95SBruce Richardson struct internal_config *internal_conf = 136199a2dd95SBruce Richardson eal_get_internal_configuration(); 136299a2dd95SBruce Richardson 1363*32b4771cSDmitry Kozlyuk if (rte_eal_process_type() == RTE_PROC_PRIMARY && 1364*32b4771cSDmitry Kozlyuk internal_conf->hugepage_file.unlink_existing) 136599a2dd95SBruce Richardson rte_memseg_walk(mark_freeable, NULL); 136699a2dd95SBruce Richardson rte_service_finalize(); 136799a2dd95SBruce Richardson rte_mp_channel_cleanup(); 136899a2dd95SBruce Richardson /* after this point, any DPDK pointers will become dangling */ 136999a2dd95SBruce Richardson rte_eal_memory_detach(); 137090b13ab8SHarman Kalra rte_eal_alarm_cleanup(); 137199a2dd95SBruce Richardson rte_trace_save(); 137299a2dd95SBruce Richardson eal_trace_fini(); 137399a2dd95SBruce Richardson eal_cleanup_config(internal_conf); 137499a2dd95SBruce Richardson return 0; 137599a2dd95SBruce Richardson } 137699a2dd95SBruce Richardson 137799a2dd95SBruce Richardson int rte_eal_create_uio_dev(void) 137899a2dd95SBruce Richardson { 137999a2dd95SBruce Richardson const struct internal_config *internal_conf = 138099a2dd95SBruce Richardson eal_get_internal_configuration(); 138199a2dd95SBruce Richardson 138299a2dd95SBruce Richardson return internal_conf->create_uio_dev; 138399a2dd95SBruce Richardson } 138499a2dd95SBruce Richardson 138599a2dd95SBruce Richardson enum rte_intr_mode 138699a2dd95SBruce Richardson rte_eal_vfio_intr_mode(void) 138799a2dd95SBruce Richardson { 138899a2dd95SBruce Richardson const struct internal_config *internal_conf = 138999a2dd95SBruce Richardson eal_get_internal_configuration(); 139099a2dd95SBruce Richardson 139199a2dd95SBruce Richardson return internal_conf->vfio_intr_mode; 139299a2dd95SBruce Richardson } 139399a2dd95SBruce Richardson 139499a2dd95SBruce Richardson void 139599a2dd95SBruce Richardson rte_eal_vfio_get_vf_token(rte_uuid_t vf_token) 139699a2dd95SBruce Richardson { 139799a2dd95SBruce Richardson struct internal_config *cfg = eal_get_internal_configuration(); 139899a2dd95SBruce Richardson 139999a2dd95SBruce Richardson rte_uuid_copy(vf_token, cfg->vfio_vf_token); 140099a2dd95SBruce Richardson } 140199a2dd95SBruce Richardson 140299a2dd95SBruce Richardson int 140399a2dd95SBruce Richardson rte_eal_check_module(const char *module_name) 140499a2dd95SBruce Richardson { 140599a2dd95SBruce Richardson char sysfs_mod_name[PATH_MAX]; 140699a2dd95SBruce Richardson struct stat st; 140799a2dd95SBruce Richardson int n; 140899a2dd95SBruce Richardson 140999a2dd95SBruce Richardson if (NULL == module_name) 141099a2dd95SBruce Richardson return -1; 141199a2dd95SBruce Richardson 141299a2dd95SBruce Richardson /* Check if there is sysfs mounted */ 141399a2dd95SBruce Richardson if (stat("/sys/module", &st) != 0) { 141499a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", 141599a2dd95SBruce Richardson errno, strerror(errno)); 141699a2dd95SBruce Richardson return -1; 141799a2dd95SBruce Richardson } 141899a2dd95SBruce Richardson 141999a2dd95SBruce Richardson /* A module might be built-in, therefore try sysfs */ 142099a2dd95SBruce Richardson n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); 142199a2dd95SBruce Richardson if (n < 0 || n > PATH_MAX) { 142299a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Could not format module path\n"); 142399a2dd95SBruce Richardson return -1; 142499a2dd95SBruce Richardson } 142599a2dd95SBruce Richardson 142699a2dd95SBruce Richardson if (stat(sysfs_mod_name, &st) != 0) { 142799a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", 142899a2dd95SBruce Richardson sysfs_mod_name, errno, strerror(errno)); 142999a2dd95SBruce Richardson return 0; 143099a2dd95SBruce Richardson } 143199a2dd95SBruce Richardson 143299a2dd95SBruce Richardson /* Module has been found */ 143399a2dd95SBruce Richardson return 1; 143499a2dd95SBruce Richardson } 1435