1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation. 3 * Copyright(c) 2012-2014 6WIND S.A. 4 */ 5 6 #include <ctype.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <pthread.h> 13 #include <getopt.h> 14 #include <sys/file.h> 15 #include <dirent.h> 16 #include <fcntl.h> 17 #include <fnmatch.h> 18 #include <stddef.h> 19 #include <errno.h> 20 #include <limits.h> 21 #include <sys/mman.h> 22 #include <sys/stat.h> 23 #if defined(RTE_ARCH_X86) 24 #include <sys/io.h> 25 #endif 26 #include <linux/version.h> 27 28 #include <rte_common.h> 29 #include <rte_debug.h> 30 #include <rte_memory.h> 31 #include <rte_launch.h> 32 #include <rte_eal.h> 33 #include <rte_eal_memconfig.h> 34 #include <rte_errno.h> 35 #include <rte_lcore.h> 36 #include <rte_service_component.h> 37 #include <rte_log.h> 38 #include <rte_string_fns.h> 39 #include <rte_cpuflags.h> 40 #include <rte_bus.h> 41 #include <rte_version.h> 42 #include <malloc_heap.h> 43 #include <rte_vfio.h> 44 45 #include <telemetry_internal.h> 46 #include "eal_private.h" 47 #include "eal_thread.h" 48 #include "eal_lcore_var.h" 49 #include "eal_internal_cfg.h" 50 #include "eal_filesystem.h" 51 #include "eal_hugepages.h" 52 #include "eal_memcfg.h" 53 #include "eal_trace.h" 54 #include "eal_options.h" 55 #include "eal_vfio.h" 56 #include "hotplug_mp.h" 57 #include "log_internal.h" 58 59 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) 60 61 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) 62 63 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" 64 65 /* define fd variable here, because file needs to be kept open for the 66 * duration of the program, as we hold a write lock on it in the primary proc */ 67 static int mem_cfg_fd = -1; 68 69 static struct flock wr_lock = { 70 .l_type = F_WRLCK, 71 .l_whence = SEEK_SET, 72 .l_start = offsetof(struct rte_mem_config, memsegs), 73 .l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs), 74 }; 75 76 /* internal configuration (per-core) */ 77 struct lcore_config lcore_config[RTE_MAX_LCORE]; 78 79 /* used by rte_rdtsc() */ 80 int rte_cycles_vmware_tsc_map; 81 82 83 int 84 eal_clean_runtime_dir(void) 85 { 86 const char *runtime_dir = rte_eal_get_runtime_dir(); 87 DIR *dir; 88 struct dirent *dirent; 89 int dir_fd, fd, lck_result; 90 static const char * const filters[] = { 91 "fbarray_*", 92 "mp_socket_*" 93 }; 94 95 /* open directory */ 96 dir = opendir(runtime_dir); 97 if (!dir) { 98 EAL_LOG(ERR, "Unable to open runtime directory %s", 99 runtime_dir); 100 goto error; 101 } 102 dir_fd = dirfd(dir); 103 104 /* lock the directory before doing anything, to avoid races */ 105 if (flock(dir_fd, LOCK_EX) < 0) { 106 EAL_LOG(ERR, "Unable to lock runtime directory %s", 107 runtime_dir); 108 goto error; 109 } 110 111 dirent = readdir(dir); 112 if (!dirent) { 113 EAL_LOG(ERR, "Unable to read runtime directory %s", 114 runtime_dir); 115 goto error; 116 } 117 118 while (dirent != NULL) { 119 unsigned int f_idx; 120 bool skip = true; 121 122 /* skip files that don't match the patterns */ 123 for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { 124 const char *filter = filters[f_idx]; 125 126 if (fnmatch(filter, dirent->d_name, 0) == 0) { 127 skip = false; 128 break; 129 } 130 } 131 if (skip) { 132 dirent = readdir(dir); 133 continue; 134 } 135 136 /* try and lock the file */ 137 fd = openat(dir_fd, dirent->d_name, O_RDONLY); 138 139 /* skip to next file */ 140 if (fd == -1) { 141 dirent = readdir(dir); 142 continue; 143 } 144 145 /* non-blocking lock */ 146 lck_result = flock(fd, LOCK_EX | LOCK_NB); 147 148 /* if lock succeeds, remove the file */ 149 if (lck_result != -1) 150 unlinkat(dir_fd, dirent->d_name, 0); 151 close(fd); 152 dirent = readdir(dir); 153 } 154 155 /* closedir closes dir_fd and drops the lock */ 156 closedir(dir); 157 return 0; 158 159 error: 160 if (dir) 161 closedir(dir); 162 163 EAL_LOG(ERR, "Error while clearing runtime dir: %s", 164 strerror(errno)); 165 166 return -1; 167 } 168 169 170 /* create memory configuration in shared/mmap memory. Take out 171 * a write lock on the memsegs, so we can auto-detect primary/secondary. 172 * This means we never close the file while running (auto-close on exit). 173 * We also don't lock the whole file, so that in future we can use read-locks 174 * on other parts, e.g. memzones, to detect if there are running secondary 175 * processes. */ 176 static int 177 rte_eal_config_create(void) 178 { 179 struct rte_config *config = rte_eal_get_configuration(); 180 size_t page_sz = sysconf(_SC_PAGE_SIZE); 181 size_t cfg_len = sizeof(*config->mem_config); 182 size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); 183 void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; 184 int retval; 185 const struct internal_config *internal_conf = 186 eal_get_internal_configuration(); 187 188 const char *pathname = eal_runtime_config_path(); 189 190 if (internal_conf->no_shconf) 191 return 0; 192 193 /* map the config before hugepage address so that we don't waste a page */ 194 if (internal_conf->base_virtaddr != 0) 195 rte_mem_cfg_addr = (void *) 196 RTE_ALIGN_FLOOR(internal_conf->base_virtaddr - 197 sizeof(struct rte_mem_config), page_sz); 198 else 199 rte_mem_cfg_addr = NULL; 200 201 if (mem_cfg_fd < 0){ 202 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600); 203 if (mem_cfg_fd < 0) { 204 EAL_LOG(ERR, "Cannot open '%s' for rte_mem_config", 205 pathname); 206 return -1; 207 } 208 } 209 210 retval = ftruncate(mem_cfg_fd, cfg_len); 211 if (retval < 0){ 212 close(mem_cfg_fd); 213 mem_cfg_fd = -1; 214 EAL_LOG(ERR, "Cannot resize '%s' for rte_mem_config", 215 pathname); 216 return -1; 217 } 218 219 retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); 220 if (retval < 0){ 221 close(mem_cfg_fd); 222 mem_cfg_fd = -1; 223 EAL_LOG(ERR, "Cannot create lock on '%s'. Is another primary " 224 "process running?", pathname); 225 return -1; 226 } 227 228 /* reserve space for config */ 229 rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, 230 &cfg_len_aligned, page_sz, 0, 0); 231 if (rte_mem_cfg_addr == NULL) { 232 EAL_LOG(ERR, "Cannot mmap memory for rte_config"); 233 close(mem_cfg_fd); 234 mem_cfg_fd = -1; 235 return -1; 236 } 237 238 /* remap the actual file into the space we've just reserved */ 239 mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, 240 cfg_len_aligned, PROT_READ | PROT_WRITE, 241 MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); 242 if (mapped_mem_cfg_addr == MAP_FAILED) { 243 munmap(rte_mem_cfg_addr, cfg_len); 244 close(mem_cfg_fd); 245 mem_cfg_fd = -1; 246 EAL_LOG(ERR, "Cannot remap memory for rte_config"); 247 return -1; 248 } 249 250 memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config)); 251 config->mem_config = rte_mem_cfg_addr; 252 253 /* store address of the config in the config itself so that secondary 254 * processes could later map the config into this exact location 255 */ 256 config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; 257 config->mem_config->dma_maskbits = 0; 258 259 return 0; 260 } 261 262 /* attach to an existing shared memory config */ 263 static int 264 rte_eal_config_attach(void) 265 { 266 struct rte_config *config = rte_eal_get_configuration(); 267 struct rte_mem_config *mem_config; 268 const struct internal_config *internal_conf = 269 eal_get_internal_configuration(); 270 271 const char *pathname = eal_runtime_config_path(); 272 273 if (internal_conf->no_shconf) 274 return 0; 275 276 if (mem_cfg_fd < 0){ 277 mem_cfg_fd = open(pathname, O_RDWR); 278 if (mem_cfg_fd < 0) { 279 EAL_LOG(ERR, "Cannot open '%s' for rte_mem_config", 280 pathname); 281 return -1; 282 } 283 } 284 285 /* map it as read-only first */ 286 mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), 287 PROT_READ, MAP_SHARED, mem_cfg_fd, 0); 288 if (mem_config == MAP_FAILED) { 289 close(mem_cfg_fd); 290 mem_cfg_fd = -1; 291 EAL_LOG(ERR, "Cannot mmap memory for rte_config! error %i (%s)", 292 errno, strerror(errno)); 293 return -1; 294 } 295 296 config->mem_config = mem_config; 297 298 return 0; 299 } 300 301 /* reattach the shared config at exact memory location primary process has it */ 302 static int 303 rte_eal_config_reattach(void) 304 { 305 struct rte_config *config = rte_eal_get_configuration(); 306 struct rte_mem_config *mem_config; 307 void *rte_mem_cfg_addr; 308 const struct internal_config *internal_conf = 309 eal_get_internal_configuration(); 310 311 if (internal_conf->no_shconf) 312 return 0; 313 314 /* save the address primary process has mapped shared config to */ 315 rte_mem_cfg_addr = 316 (void *) (uintptr_t) config->mem_config->mem_cfg_addr; 317 318 /* unmap original config */ 319 munmap(config->mem_config, sizeof(struct rte_mem_config)); 320 321 /* remap the config at proper address */ 322 mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, 323 sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, 324 mem_cfg_fd, 0); 325 326 close(mem_cfg_fd); 327 mem_cfg_fd = -1; 328 329 if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { 330 if (mem_config != MAP_FAILED) { 331 /* errno is stale, don't use */ 332 EAL_LOG(ERR, "Cannot mmap memory for rte_config at [%p], got [%p]" 333 " - please use '--" OPT_BASE_VIRTADDR 334 "' option", rte_mem_cfg_addr, mem_config); 335 munmap(mem_config, sizeof(struct rte_mem_config)); 336 return -1; 337 } 338 EAL_LOG(ERR, "Cannot mmap memory for rte_config! error %i (%s)", 339 errno, strerror(errno)); 340 return -1; 341 } 342 343 config->mem_config = mem_config; 344 345 return 0; 346 } 347 348 /* Detect if we are a primary or a secondary process */ 349 enum rte_proc_type_t 350 eal_proc_type_detect(void) 351 { 352 enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; 353 const char *pathname = eal_runtime_config_path(); 354 const struct internal_config *internal_conf = 355 eal_get_internal_configuration(); 356 357 /* if there no shared config, there can be no secondary processes */ 358 if (!internal_conf->no_shconf) { 359 /* if we can open the file but not get a write-lock we are a 360 * secondary process. NOTE: if we get a file handle back, we 361 * keep that open and don't close it to prevent a race condition 362 * between multiple opens. 363 */ 364 if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && 365 (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) 366 ptype = RTE_PROC_SECONDARY; 367 } 368 369 EAL_LOG(INFO, "Auto-detected process type: %s", 370 ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); 371 372 return ptype; 373 } 374 375 /* Sets up rte_config structure with the pointer to shared memory config.*/ 376 static int 377 rte_config_init(void) 378 { 379 struct rte_config *config = rte_eal_get_configuration(); 380 const struct internal_config *internal_conf = 381 eal_get_internal_configuration(); 382 383 config->process_type = internal_conf->process_type; 384 385 switch (config->process_type) { 386 case RTE_PROC_PRIMARY: 387 if (rte_eal_config_create() < 0) 388 return -1; 389 eal_mcfg_update_from_internal(); 390 break; 391 case RTE_PROC_SECONDARY: 392 if (rte_eal_config_attach() < 0) 393 return -1; 394 eal_mcfg_wait_complete(); 395 if (eal_mcfg_check_version() < 0) { 396 EAL_LOG(ERR, "Primary and secondary process DPDK version mismatch"); 397 return -1; 398 } 399 if (rte_eal_config_reattach() < 0) 400 return -1; 401 if (!__rte_mp_enable()) { 402 EAL_LOG(ERR, "Primary process refused secondary attachment"); 403 return -1; 404 } 405 eal_mcfg_update_internal(); 406 break; 407 case RTE_PROC_AUTO: 408 case RTE_PROC_INVALID: 409 EAL_LOG(ERR, "Invalid process type %d", 410 config->process_type); 411 return -1; 412 } 413 414 return 0; 415 } 416 417 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ 418 static void 419 eal_hugedirs_unlock(void) 420 { 421 int i; 422 struct internal_config *internal_conf = 423 eal_get_internal_configuration(); 424 425 for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) 426 { 427 /* skip uninitialized */ 428 if (internal_conf->hugepage_info[i].lock_descriptor < 0) 429 continue; 430 /* unlock hugepage file */ 431 flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN); 432 close(internal_conf->hugepage_info[i].lock_descriptor); 433 /* reset the field */ 434 internal_conf->hugepage_info[i].lock_descriptor = -1; 435 } 436 } 437 438 /* display usage */ 439 static void 440 eal_usage(const char *prgname) 441 { 442 rte_usage_hook_t hook = eal_get_application_usage_hook(); 443 444 printf("\nUsage: %s ", prgname); 445 eal_common_usage(); 446 printf("EAL Linux options:\n" 447 " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" 448 " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" 449 " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" 450 " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" 451 " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" 452 " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" 453 " --"OPT_VFIO_VF_TOKEN" VF token (UUID) shared between SR-IOV PF and VFs\n" 454 " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" 455 " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" 456 " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" 457 " --"OPT_HUGE_WORKER_STACK"[=size]\n" 458 " Allocate worker thread stacks from hugepage memory.\n" 459 " Size is in units of kbytes and defaults to system\n" 460 " thread stack size if not specified.\n" 461 "\n"); 462 /* Allow the application to print its usage message too if hook is set */ 463 if (hook) { 464 printf("===== Application Usage =====\n\n"); 465 (hook)(prgname); 466 } 467 } 468 469 static int 470 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) 471 { 472 char * arg[RTE_MAX_NUMA_NODES]; 473 char *end; 474 int arg_num, i, len; 475 476 len = strnlen(strval, SOCKET_MEM_STRLEN); 477 if (len == SOCKET_MEM_STRLEN) { 478 EAL_LOG(ERR, "--socket-mem is too long"); 479 return -1; 480 } 481 482 /* all other error cases will be caught later */ 483 if (!isdigit(strval[len-1])) 484 return -1; 485 486 /* split the optarg into separate socket values */ 487 arg_num = rte_strsplit(strval, len, 488 arg, RTE_MAX_NUMA_NODES, ','); 489 490 /* if split failed, or 0 arguments */ 491 if (arg_num <= 0) 492 return -1; 493 494 /* parse each defined socket option */ 495 errno = 0; 496 for (i = 0; i < arg_num; i++) { 497 uint64_t val; 498 end = NULL; 499 val = strtoull(arg[i], &end, 10); 500 501 /* check for invalid input */ 502 if ((errno != 0) || 503 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) 504 return -1; 505 val <<= 20; 506 socket_arg[i] = val; 507 } 508 509 return 0; 510 } 511 512 static int 513 eal_parse_vfio_intr(const char *mode) 514 { 515 struct internal_config *internal_conf = 516 eal_get_internal_configuration(); 517 unsigned i; 518 static struct { 519 const char *name; 520 enum rte_intr_mode value; 521 } map[] = { 522 { "legacy", RTE_INTR_MODE_LEGACY }, 523 { "msi", RTE_INTR_MODE_MSI }, 524 { "msix", RTE_INTR_MODE_MSIX }, 525 }; 526 527 for (i = 0; i < RTE_DIM(map); i++) { 528 if (!strcmp(mode, map[i].name)) { 529 internal_conf->vfio_intr_mode = map[i].value; 530 return 0; 531 } 532 } 533 return -1; 534 } 535 536 static int 537 eal_parse_vfio_vf_token(const char *vf_token) 538 { 539 struct internal_config *cfg = eal_get_internal_configuration(); 540 rte_uuid_t uuid; 541 542 if (!rte_uuid_parse(vf_token, uuid)) { 543 rte_uuid_copy(cfg->vfio_vf_token, uuid); 544 return 0; 545 } 546 547 return -1; 548 } 549 550 static int 551 eal_parse_huge_worker_stack(const char *arg) 552 { 553 struct internal_config *cfg = eal_get_internal_configuration(); 554 555 if (arg == NULL || arg[0] == '\0') { 556 pthread_attr_t attr; 557 int ret; 558 559 if (pthread_attr_init(&attr) != 0) { 560 EAL_LOG(ERR, "Could not retrieve default stack size"); 561 return -1; 562 } 563 ret = pthread_attr_getstacksize(&attr, &cfg->huge_worker_stack_size); 564 pthread_attr_destroy(&attr); 565 if (ret != 0) { 566 EAL_LOG(ERR, "Could not retrieve default stack size"); 567 return -1; 568 } 569 } else { 570 unsigned long stack_size; 571 char *end; 572 573 errno = 0; 574 stack_size = strtoul(arg, &end, 10); 575 if (errno || end == NULL || stack_size == 0 || 576 stack_size >= (size_t)-1 / 1024) 577 return -1; 578 579 cfg->huge_worker_stack_size = stack_size * 1024; 580 } 581 582 EAL_LOG(DEBUG, "Each worker thread will use %zu kB of DPDK memory as stack", 583 cfg->huge_worker_stack_size / 1024); 584 return 0; 585 } 586 587 /* Parse the argument given in the command line of the application */ 588 static int 589 eal_parse_args(int argc, char **argv) 590 { 591 int opt, ret; 592 char **argvopt; 593 int option_index; 594 char *prgname = argv[0]; 595 const int old_optind = optind; 596 const int old_optopt = optopt; 597 char * const old_optarg = optarg; 598 struct internal_config *internal_conf = 599 eal_get_internal_configuration(); 600 601 argvopt = argv; 602 optind = 1; 603 604 while ((opt = getopt_long(argc, argvopt, eal_short_options, 605 eal_long_options, &option_index)) != EOF) { 606 607 /* getopt didn't recognise the option */ 608 if (opt == '?') { 609 eal_usage(prgname); 610 ret = -1; 611 goto out; 612 } 613 614 /* eal_parse_log_options() already handled this option */ 615 if (eal_option_is_log(opt)) 616 continue; 617 618 ret = eal_parse_common_option(opt, optarg, internal_conf); 619 /* common parser is not happy */ 620 if (ret < 0) { 621 eal_usage(prgname); 622 ret = -1; 623 goto out; 624 } 625 /* common parser handled this option */ 626 if (ret == 0) 627 continue; 628 629 switch (opt) { 630 case OPT_HELP_NUM: 631 eal_usage(prgname); 632 exit(EXIT_SUCCESS); 633 634 case OPT_HUGE_DIR_NUM: 635 { 636 char *hdir = strdup(optarg); 637 if (hdir == NULL) 638 EAL_LOG(ERR, "Could not store hugepage directory"); 639 else { 640 /* free old hugepage dir */ 641 free(internal_conf->hugepage_dir); 642 internal_conf->hugepage_dir = hdir; 643 } 644 break; 645 } 646 case OPT_FILE_PREFIX_NUM: 647 { 648 char *prefix = strdup(optarg); 649 if (prefix == NULL) 650 EAL_LOG(ERR, "Could not store file prefix"); 651 else { 652 /* free old prefix */ 653 free(internal_conf->hugefile_prefix); 654 internal_conf->hugefile_prefix = prefix; 655 } 656 break; 657 } 658 case OPT_SOCKET_MEM_NUM: 659 if (eal_parse_socket_arg(optarg, 660 internal_conf->socket_mem) < 0) { 661 EAL_LOG(ERR, "invalid parameters for --" 662 OPT_SOCKET_MEM); 663 eal_usage(prgname); 664 ret = -1; 665 goto out; 666 } 667 internal_conf->force_sockets = 1; 668 break; 669 670 case OPT_SOCKET_LIMIT_NUM: 671 if (eal_parse_socket_arg(optarg, 672 internal_conf->socket_limit) < 0) { 673 EAL_LOG(ERR, "invalid parameters for --" 674 OPT_SOCKET_LIMIT); 675 eal_usage(prgname); 676 ret = -1; 677 goto out; 678 } 679 internal_conf->force_socket_limits = 1; 680 break; 681 682 case OPT_VFIO_INTR_NUM: 683 if (eal_parse_vfio_intr(optarg) < 0) { 684 EAL_LOG(ERR, "invalid parameters for --" 685 OPT_VFIO_INTR); 686 eal_usage(prgname); 687 ret = -1; 688 goto out; 689 } 690 break; 691 692 case OPT_VFIO_VF_TOKEN_NUM: 693 if (eal_parse_vfio_vf_token(optarg) < 0) { 694 EAL_LOG(ERR, "invalid parameters for --" 695 OPT_VFIO_VF_TOKEN); 696 eal_usage(prgname); 697 ret = -1; 698 goto out; 699 } 700 break; 701 702 case OPT_CREATE_UIO_DEV_NUM: 703 internal_conf->create_uio_dev = 1; 704 break; 705 706 case OPT_MBUF_POOL_OPS_NAME_NUM: 707 { 708 char *ops_name = strdup(optarg); 709 if (ops_name == NULL) 710 EAL_LOG(ERR, "Could not store mbuf pool ops name"); 711 else { 712 /* free old ops name */ 713 free(internal_conf->user_mbuf_pool_ops_name); 714 715 internal_conf->user_mbuf_pool_ops_name = 716 ops_name; 717 } 718 break; 719 } 720 case OPT_MATCH_ALLOCATIONS_NUM: 721 internal_conf->match_allocations = 1; 722 break; 723 724 case OPT_HUGE_WORKER_STACK_NUM: 725 if (eal_parse_huge_worker_stack(optarg) < 0) { 726 EAL_LOG(ERR, "invalid parameter for --" 727 OPT_HUGE_WORKER_STACK); 728 eal_usage(prgname); 729 ret = -1; 730 goto out; 731 } 732 break; 733 734 default: 735 if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { 736 EAL_LOG(ERR, "Option %c is not supported " 737 "on Linux", opt); 738 } else if (opt >= OPT_LONG_MIN_NUM && 739 opt < OPT_LONG_MAX_NUM) { 740 EAL_LOG(ERR, "Option %s is not supported " 741 "on Linux", 742 eal_long_options[option_index].name); 743 } else { 744 EAL_LOG(ERR, "Option %d is not supported " 745 "on Linux", opt); 746 } 747 eal_usage(prgname); 748 ret = -1; 749 goto out; 750 } 751 } 752 753 /* create runtime data directory. In no_shconf mode, skip any errors */ 754 if (eal_create_runtime_dir() < 0) { 755 if (internal_conf->no_shconf == 0) { 756 EAL_LOG(ERR, "Cannot create runtime directory"); 757 ret = -1; 758 goto out; 759 } else 760 EAL_LOG(WARNING, "No DPDK runtime directory created"); 761 } 762 763 if (eal_adjust_config(internal_conf) != 0) { 764 ret = -1; 765 goto out; 766 } 767 768 /* sanity checks */ 769 if (eal_check_common_options(internal_conf) != 0) { 770 eal_usage(prgname); 771 ret = -1; 772 goto out; 773 } 774 775 if (optind >= 0) 776 argv[optind-1] = prgname; 777 ret = optind-1; 778 779 out: 780 /* restore getopt lib */ 781 optind = old_optind; 782 optopt = old_optopt; 783 optarg = old_optarg; 784 785 return ret; 786 } 787 788 static int 789 check_socket(const struct rte_memseg_list *msl, void *arg) 790 { 791 int *socket_id = arg; 792 793 if (msl->external) 794 return 0; 795 796 return *socket_id == msl->socket_id; 797 } 798 799 static void 800 eal_check_mem_on_local_socket(void) 801 { 802 int socket_id; 803 const struct rte_config *config = rte_eal_get_configuration(); 804 805 socket_id = rte_lcore_to_socket_id(config->main_lcore); 806 807 if (rte_memseg_list_walk(check_socket, &socket_id) == 0) 808 EAL_LOG(WARNING, "WARNING: Main core has no memory on local socket!"); 809 } 810 811 static int 812 sync_func(__rte_unused void *arg) 813 { 814 return 0; 815 } 816 817 /* 818 * Request iopl privilege for all RPL, returns 0 on success 819 * iopl() call is mostly for the i386 architecture. For other architectures, 820 * return -1 to indicate IO privilege can't be changed in this way. 821 */ 822 int 823 rte_eal_iopl_init(void) 824 { 825 #if defined(RTE_ARCH_X86) 826 if (iopl(3) != 0) 827 return -1; 828 #endif 829 return 0; 830 } 831 832 static void rte_eal_init_alert(const char *msg) 833 { 834 EAL_LOG(ALERT, "%s", msg); 835 } 836 837 /* 838 * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the 839 * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel 840 * IOMMU groups. If IOMMU is not enabled, that path would be empty. 841 * Therefore, checking if the path is empty will tell us if IOMMU is enabled. 842 */ 843 static bool 844 is_iommu_enabled(void) 845 { 846 DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); 847 struct dirent *d; 848 int n = 0; 849 850 /* if directory doesn't exist, assume IOMMU is not enabled */ 851 if (dir == NULL) 852 return false; 853 854 while ((d = readdir(dir)) != NULL) { 855 /* skip dot and dot-dot */ 856 if (++n > 2) 857 break; 858 } 859 closedir(dir); 860 861 return n > 2; 862 } 863 864 static __rte_noreturn void * 865 eal_worker_thread_loop(void *arg) 866 { 867 eal_thread_loop(arg); 868 } 869 870 static int 871 eal_worker_thread_create(unsigned int lcore_id) 872 { 873 pthread_attr_t *attrp = NULL; 874 void *stack_ptr = NULL; 875 pthread_attr_t attr; 876 size_t stack_size; 877 int ret = -1; 878 879 stack_size = eal_get_internal_configuration()->huge_worker_stack_size; 880 if (stack_size != 0) { 881 /* Allocate NUMA aware stack memory and set pthread attributes */ 882 stack_ptr = rte_zmalloc_socket("lcore_stack", stack_size, 883 RTE_CACHE_LINE_SIZE, rte_lcore_to_socket_id(lcore_id)); 884 if (stack_ptr == NULL) { 885 rte_eal_init_alert("Cannot allocate worker lcore stack memory"); 886 rte_errno = ENOMEM; 887 goto out; 888 } 889 890 if (pthread_attr_init(&attr) != 0) { 891 rte_eal_init_alert("Cannot init pthread attributes"); 892 rte_errno = EFAULT; 893 goto out; 894 } 895 attrp = &attr; 896 897 if (pthread_attr_setstack(attrp, stack_ptr, stack_size) != 0) { 898 rte_eal_init_alert("Cannot set pthread stack attributes"); 899 rte_errno = EFAULT; 900 goto out; 901 } 902 } 903 904 if (pthread_create((pthread_t *)&lcore_config[lcore_id].thread_id.opaque_id, 905 attrp, eal_worker_thread_loop, (void *)(uintptr_t)lcore_id) == 0) 906 ret = 0; 907 908 out: 909 if (ret != 0) 910 rte_free(stack_ptr); 911 if (attrp != NULL) 912 pthread_attr_destroy(attrp); 913 return ret; 914 } 915 916 /* Launch threads, called at application init(). */ 917 int 918 rte_eal_init(int argc, char **argv) 919 { 920 int i, fctret, ret; 921 static RTE_ATOMIC(uint32_t) run_once; 922 uint32_t has_run = 0; 923 char cpuset[RTE_CPU_AFFINITY_STR_LEN]; 924 char thread_name[RTE_THREAD_NAME_SIZE]; 925 bool phys_addrs; 926 const struct rte_config *config = rte_eal_get_configuration(); 927 struct internal_config *internal_conf = 928 eal_get_internal_configuration(); 929 930 /* setup log as early as possible */ 931 if (eal_parse_log_options(argc, argv) < 0) { 932 rte_eal_init_alert("invalid log arguments."); 933 rte_errno = EINVAL; 934 return -1; 935 } 936 937 eal_log_init(program_invocation_short_name); 938 939 /* checks if the machine is adequate */ 940 if (!rte_cpu_is_supported()) { 941 rte_eal_init_alert("unsupported cpu type."); 942 rte_errno = ENOTSUP; 943 return -1; 944 } 945 946 /* verify if DPDK supported on architecture MMU */ 947 if (!eal_mmu_supported()) { 948 rte_eal_init_alert("unsupported MMU type."); 949 rte_errno = ENOTSUP; 950 return -1; 951 } 952 953 if (!rte_atomic_compare_exchange_strong_explicit(&run_once, &has_run, 1, 954 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 955 rte_eal_init_alert("already called initialization."); 956 rte_errno = EALREADY; 957 return -1; 958 } 959 960 eal_reset_internal_config(internal_conf); 961 962 /* clone argv to report out later in telemetry */ 963 eal_save_args(argc, argv); 964 965 if (rte_eal_cpu_init() < 0) { 966 rte_eal_init_alert("Cannot detect lcores."); 967 rte_errno = ENOTSUP; 968 return -1; 969 } 970 971 fctret = eal_parse_args(argc, argv); 972 if (fctret < 0) { 973 rte_eal_init_alert("Invalid 'command line' arguments."); 974 rte_errno = EINVAL; 975 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 976 return -1; 977 } 978 979 if (eal_plugins_init() < 0) { 980 rte_eal_init_alert("Cannot init plugins"); 981 rte_errno = EINVAL; 982 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 983 return -1; 984 } 985 986 if (eal_trace_init() < 0) { 987 rte_eal_init_alert("Cannot init trace"); 988 rte_errno = EFAULT; 989 return -1; 990 } 991 992 if (eal_option_device_parse()) { 993 rte_errno = ENODEV; 994 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 995 return -1; 996 } 997 998 if (rte_config_init() < 0) { 999 rte_eal_init_alert("Cannot init config"); 1000 return -1; 1001 } 1002 1003 if (rte_eal_intr_init() < 0) { 1004 rte_eal_init_alert("Cannot init interrupt-handling thread"); 1005 return -1; 1006 } 1007 1008 if (rte_eal_alarm_init() < 0) { 1009 rte_eal_init_alert("Cannot init alarm"); 1010 /* rte_eal_alarm_init sets rte_errno on failure. */ 1011 return -1; 1012 } 1013 1014 /* Put mp channel init before bus scan so that we can init the vdev 1015 * bus through mp channel in the secondary process before the bus scan. 1016 */ 1017 if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) { 1018 rte_eal_init_alert("failed to init mp channel"); 1019 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1020 rte_errno = EFAULT; 1021 return -1; 1022 } 1023 } 1024 1025 if (rte_bus_scan()) { 1026 rte_eal_init_alert("Cannot scan the buses for devices"); 1027 rte_errno = ENODEV; 1028 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 1029 return -1; 1030 } 1031 1032 phys_addrs = rte_eal_using_phys_addrs() != 0; 1033 1034 /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ 1035 if (internal_conf->iova_mode == RTE_IOVA_DC) { 1036 /* autodetect the IOVA mapping mode */ 1037 enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); 1038 1039 if (iova_mode == RTE_IOVA_DC) { 1040 EAL_LOG(DEBUG, "Buses did not request a specific IOVA mode."); 1041 1042 if (!RTE_IOVA_IN_MBUF) { 1043 iova_mode = RTE_IOVA_VA; 1044 EAL_LOG(DEBUG, "IOVA as VA mode is forced by build option."); 1045 } else if (!phys_addrs) { 1046 /* if we have no access to physical addresses, 1047 * pick IOVA as VA mode. 1048 */ 1049 iova_mode = RTE_IOVA_VA; 1050 EAL_LOG(DEBUG, "Physical addresses are unavailable, selecting IOVA as VA mode."); 1051 } else if (is_iommu_enabled()) { 1052 /* we have an IOMMU, pick IOVA as VA mode */ 1053 iova_mode = RTE_IOVA_VA; 1054 EAL_LOG(DEBUG, "IOMMU is available, selecting IOVA as VA mode."); 1055 } else { 1056 /* physical addresses available, and no IOMMU 1057 * found, so pick IOVA as PA. 1058 */ 1059 iova_mode = RTE_IOVA_PA; 1060 EAL_LOG(DEBUG, "IOMMU is not available, selecting IOVA as PA mode."); 1061 } 1062 } 1063 rte_eal_get_configuration()->iova_mode = iova_mode; 1064 } else { 1065 rte_eal_get_configuration()->iova_mode = 1066 internal_conf->iova_mode; 1067 } 1068 1069 if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { 1070 rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); 1071 rte_errno = EINVAL; 1072 return -1; 1073 } 1074 1075 if (rte_eal_iova_mode() == RTE_IOVA_PA && !RTE_IOVA_IN_MBUF) { 1076 rte_eal_init_alert("Cannot use IOVA as 'PA' as it is disabled during build"); 1077 rte_errno = EINVAL; 1078 return -1; 1079 } 1080 1081 EAL_LOG(INFO, "Selected IOVA mode '%s'", 1082 rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); 1083 1084 if (internal_conf->no_hugetlbfs == 0) { 1085 /* rte_config isn't initialized yet */ 1086 ret = internal_conf->process_type == RTE_PROC_PRIMARY ? 1087 eal_hugepage_info_init() : 1088 eal_hugepage_info_read(); 1089 if (ret < 0) { 1090 rte_eal_init_alert("Cannot get hugepage information."); 1091 rte_errno = EACCES; 1092 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 1093 return -1; 1094 } 1095 } 1096 1097 if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) { 1098 if (internal_conf->no_hugetlbfs) 1099 internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; 1100 } 1101 1102 if (internal_conf->vmware_tsc_map == 1) { 1103 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT 1104 rte_cycles_vmware_tsc_map = 1; 1105 EAL_LOG(DEBUG, "Using VMWARE TSC MAP, " 1106 "you must have monitor_control.pseudo_perfctr = TRUE"); 1107 #else 1108 EAL_LOG(WARNING, "Ignoring --vmware-tsc-map because " 1109 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set"); 1110 #endif 1111 } 1112 1113 #ifdef VFIO_PRESENT 1114 if (rte_vfio_enable("vfio")) { 1115 rte_eal_init_alert("Cannot init VFIO"); 1116 rte_errno = EAGAIN; 1117 rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); 1118 return -1; 1119 } 1120 #endif 1121 /* in secondary processes, memory init may allocate additional fbarrays 1122 * not present in primary processes, so to avoid any potential issues, 1123 * initialize memzones first. 1124 */ 1125 if (rte_eal_memzone_init() < 0) { 1126 rte_eal_init_alert("Cannot init memzone"); 1127 rte_errno = ENODEV; 1128 return -1; 1129 } 1130 1131 rte_mcfg_mem_read_lock(); 1132 1133 if (rte_eal_memory_init() < 0) { 1134 rte_mcfg_mem_read_unlock(); 1135 rte_eal_init_alert("Cannot init memory"); 1136 rte_errno = ENOMEM; 1137 return -1; 1138 } 1139 1140 /* the directories are locked during eal_hugepage_info_init */ 1141 eal_hugedirs_unlock(); 1142 1143 if (rte_eal_malloc_heap_init() < 0) { 1144 rte_mcfg_mem_read_unlock(); 1145 rte_eal_init_alert("Cannot init malloc heap"); 1146 rte_errno = ENODEV; 1147 return -1; 1148 } 1149 1150 rte_mcfg_mem_read_unlock(); 1151 1152 if (rte_eal_malloc_heap_populate() < 0) { 1153 rte_eal_init_alert("Cannot init malloc heap"); 1154 rte_errno = ENODEV; 1155 return -1; 1156 } 1157 1158 /* register multi-process action callbacks for hotplug after memory init */ 1159 if (eal_mp_dev_hotplug_init() < 0) { 1160 rte_eal_init_alert("failed to register mp callback for hotplug"); 1161 return -1; 1162 } 1163 1164 if (rte_eal_tailqs_init() < 0) { 1165 rte_eal_init_alert("Cannot init tail queues for objects"); 1166 rte_errno = EFAULT; 1167 return -1; 1168 } 1169 1170 if (rte_eal_timer_init() < 0) { 1171 rte_eal_init_alert("Cannot init HPET or TSC timers"); 1172 rte_errno = ENOTSUP; 1173 return -1; 1174 } 1175 1176 eal_check_mem_on_local_socket(); 1177 1178 if (rte_thread_set_affinity_by_id(rte_thread_self(), 1179 &lcore_config[config->main_lcore].cpuset) != 0) { 1180 rte_eal_init_alert("Cannot set affinity"); 1181 rte_errno = EINVAL; 1182 return -1; 1183 } 1184 __rte_thread_init(config->main_lcore, 1185 &lcore_config[config->main_lcore].cpuset); 1186 1187 ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); 1188 EAL_LOG(DEBUG, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])", 1189 config->main_lcore, (uintptr_t)pthread_self(), cpuset, 1190 ret == 0 ? "" : "..."); 1191 1192 RTE_LCORE_FOREACH_WORKER(i) { 1193 1194 /* 1195 * create communication pipes between main thread 1196 * and children 1197 */ 1198 if (pipe(lcore_config[i].pipe_main2worker) < 0) 1199 rte_panic("Cannot create pipe\n"); 1200 if (pipe(lcore_config[i].pipe_worker2main) < 0) 1201 rte_panic("Cannot create pipe\n"); 1202 1203 lcore_config[i].state = WAIT; 1204 1205 /* create a thread for each lcore */ 1206 ret = eal_worker_thread_create(i); 1207 if (ret != 0) 1208 rte_panic("Cannot create thread\n"); 1209 1210 /* Set thread_name for aid in debugging. */ 1211 snprintf(thread_name, sizeof(thread_name), 1212 "dpdk-worker%d", i); 1213 rte_thread_set_name(lcore_config[i].thread_id, thread_name); 1214 1215 ret = rte_thread_set_affinity_by_id(lcore_config[i].thread_id, 1216 &lcore_config[i].cpuset); 1217 if (ret != 0) 1218 rte_panic("Cannot set affinity\n"); 1219 } 1220 1221 /* 1222 * Launch a dummy function on all worker lcores, so that main lcore 1223 * knows they are all ready when this function returns. 1224 */ 1225 rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN); 1226 rte_eal_mp_wait_lcore(); 1227 1228 /* initialize services so vdevs register service during bus_probe. */ 1229 ret = rte_service_init(); 1230 if (ret) { 1231 rte_eal_init_alert("rte_service_init() failed"); 1232 rte_errno = -ret; 1233 return -1; 1234 } 1235 1236 /* Probe all the buses and devices/drivers on them */ 1237 if (rte_bus_probe()) { 1238 rte_eal_init_alert("Cannot probe devices"); 1239 rte_errno = ENOTSUP; 1240 return -1; 1241 } 1242 1243 /* initialize default service/lcore mappings and start running. Ignore 1244 * -ENOTSUP, as it indicates no service coremask passed to EAL. 1245 */ 1246 ret = rte_service_start_with_defaults(); 1247 if (ret < 0 && ret != -ENOTSUP) { 1248 rte_errno = -ret; 1249 return -1; 1250 } 1251 1252 /* 1253 * Clean up unused files in runtime directory. We do this at the end of 1254 * init and not at the beginning because we want to clean stuff up 1255 * whether we are primary or secondary process, but we cannot remove 1256 * primary process' files because secondary should be able to run even 1257 * if primary process is dead. 1258 * 1259 * In no_shconf mode, no runtime directory is created in the first 1260 * place, so no cleanup needed. 1261 */ 1262 if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) { 1263 rte_eal_init_alert("Cannot clear runtime directory"); 1264 return -1; 1265 } 1266 if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) { 1267 if (rte_telemetry_init(rte_eal_get_runtime_dir(), 1268 rte_version(), 1269 &internal_conf->ctrl_cpuset) != 0) 1270 return -1; 1271 } 1272 1273 eal_mcfg_complete(); 1274 1275 return fctret; 1276 } 1277 1278 static int 1279 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 1280 void *arg __rte_unused) 1281 { 1282 /* ms is const, so find this memseg */ 1283 struct rte_memseg *found; 1284 1285 if (msl->external) 1286 return 0; 1287 1288 found = rte_mem_virt2memseg(ms->addr, msl); 1289 1290 found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; 1291 1292 return 0; 1293 } 1294 1295 int 1296 rte_eal_cleanup(void) 1297 { 1298 static RTE_ATOMIC(uint32_t) run_once; 1299 uint32_t has_run = 0; 1300 1301 if (!rte_atomic_compare_exchange_strong_explicit(&run_once, &has_run, 1, 1302 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 1303 EAL_LOG(WARNING, "Already called cleanup"); 1304 rte_errno = EALREADY; 1305 return -1; 1306 } 1307 1308 /* if we're in a primary process, we need to mark hugepages as freeable 1309 * so that finalization can release them back to the system. 1310 */ 1311 struct internal_config *internal_conf = 1312 eal_get_internal_configuration(); 1313 1314 if (rte_eal_process_type() == RTE_PROC_PRIMARY && 1315 internal_conf->hugepage_file.unlink_existing) 1316 rte_memseg_walk(mark_freeable, NULL); 1317 1318 rte_service_finalize(); 1319 #ifdef VFIO_PRESENT 1320 vfio_mp_sync_cleanup(); 1321 #endif 1322 rte_mp_channel_cleanup(); 1323 eal_bus_cleanup(); 1324 rte_trace_save(); 1325 eal_trace_fini(); 1326 eal_mp_dev_hotplug_cleanup(); 1327 rte_eal_alarm_cleanup(); 1328 /* after this point, any DPDK pointers will become dangling */ 1329 rte_eal_memory_detach(); 1330 rte_eal_malloc_heap_cleanup(); 1331 eal_cleanup_config(internal_conf); 1332 eal_lcore_var_cleanup(); 1333 rte_eal_log_cleanup(); 1334 return 0; 1335 } 1336 1337 int rte_eal_create_uio_dev(void) 1338 { 1339 const struct internal_config *internal_conf = 1340 eal_get_internal_configuration(); 1341 1342 return internal_conf->create_uio_dev; 1343 } 1344 1345 enum rte_intr_mode 1346 rte_eal_vfio_intr_mode(void) 1347 { 1348 const struct internal_config *internal_conf = 1349 eal_get_internal_configuration(); 1350 1351 return internal_conf->vfio_intr_mode; 1352 } 1353 1354 void 1355 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token) 1356 { 1357 struct internal_config *cfg = eal_get_internal_configuration(); 1358 1359 rte_uuid_copy(vf_token, cfg->vfio_vf_token); 1360 } 1361 1362 int 1363 rte_eal_check_module(const char *module_name) 1364 { 1365 char sysfs_mod_name[PATH_MAX]; 1366 struct stat st; 1367 int n; 1368 1369 if (NULL == module_name) 1370 return -1; 1371 1372 /* Check if there is sysfs mounted */ 1373 if (stat("/sys/module", &st) != 0) { 1374 EAL_LOG(DEBUG, "sysfs is not mounted! error %i (%s)", 1375 errno, strerror(errno)); 1376 return -1; 1377 } 1378 1379 /* A module might be built-in, therefore try sysfs */ 1380 n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); 1381 if (n < 0 || n > PATH_MAX) { 1382 EAL_LOG(DEBUG, "Could not format module path"); 1383 return -1; 1384 } 1385 1386 if (stat(sysfs_mod_name, &st) != 0) { 1387 EAL_LOG(DEBUG, "Module %s not found! error %i (%s)", 1388 sysfs_mod_name, errno, strerror(errno)); 1389 return 0; 1390 } 1391 1392 /* Module has been found */ 1393 return 1; 1394 } 1395