1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation. 3 * Copyright(c) 2012-2014 6WIND S.A. 4 */ 5 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <stdint.h> 9 #include <string.h> 10 #include <unistd.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <sys/file.h> 14 #include <dirent.h> 15 #include <fcntl.h> 16 #include <fnmatch.h> 17 #include <stddef.h> 18 #include <errno.h> 19 #include <limits.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 #if defined(RTE_ARCH_X86) 23 #include <sys/io.h> 24 #endif 25 #include <linux/version.h> 26 27 #include <rte_common.h> 28 #include <rte_debug.h> 29 #include <rte_memory.h> 30 #include <rte_launch.h> 31 #include <rte_eal.h> 32 #include <rte_errno.h> 33 #include <rte_lcore.h> 34 #include <rte_service_component.h> 35 #include <rte_log.h> 36 #include <rte_string_fns.h> 37 #include <rte_cpuflags.h> 38 #include <rte_bus.h> 39 #include <rte_version.h> 40 #include <malloc_heap.h> 41 #include <rte_vfio.h> 42 43 #include <telemetry_internal.h> 44 #include "eal_private.h" 45 #include "eal_thread.h" 46 #include "eal_internal_cfg.h" 47 #include "eal_filesystem.h" 48 #include "eal_hugepages.h" 49 #include "eal_memcfg.h" 50 #include "eal_trace.h" 51 #include "eal_log.h" 52 #include "eal_options.h" 53 #include "eal_vfio.h" 54 #include "hotplug_mp.h" 55 56 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) 57 58 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) 59 60 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" 61 62 /* define fd variable here, because file needs to be kept open for the 63 * duration of the program, as we hold a write lock on it in the primary proc */ 64 static int mem_cfg_fd = -1; 65 66 static struct flock wr_lock = { 67 .l_type = F_WRLCK, 68 .l_whence = SEEK_SET, 69 .l_start = offsetof(struct rte_mem_config, memsegs), 70 .l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs), 71 }; 72 73 /* internal configuration (per-core) */ 74 struct lcore_config lcore_config[RTE_MAX_LCORE]; 75 76 /* used by rte_rdtsc() */ 77 int rte_cycles_vmware_tsc_map; 78 79 80 int 81 eal_clean_runtime_dir(void) 82 { 83 const char *runtime_dir = rte_eal_get_runtime_dir(); 84 DIR *dir; 85 struct dirent *dirent; 86 int dir_fd, fd, lck_result; 87 static const char * const filters[] = { 88 "fbarray_*", 89 "mp_socket_*" 90 }; 91 92 /* open directory */ 93 dir = opendir(runtime_dir); 94 if (!dir) { 95 RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", 96 runtime_dir); 97 goto error; 98 } 99 dir_fd = dirfd(dir); 100 101 /* lock the directory before doing anything, to avoid races */ 102 if (flock(dir_fd, LOCK_EX) < 0) { 103 RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", 104 runtime_dir); 105 goto error; 106 } 107 108 dirent = readdir(dir); 109 if (!dirent) { 110 RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", 111 runtime_dir); 112 goto error; 113 } 114 115 while (dirent != NULL) { 116 unsigned int f_idx; 117 bool skip = true; 118 119 /* skip files that don't match the patterns */ 120 for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { 121 const char *filter = filters[f_idx]; 122 123 if (fnmatch(filter, dirent->d_name, 0) == 0) { 124 skip = false; 125 break; 126 } 127 } 128 if (skip) { 129 dirent = readdir(dir); 130 continue; 131 } 132 133 /* try and lock the file */ 134 fd = openat(dir_fd, dirent->d_name, O_RDONLY); 135 136 /* skip to next file */ 137 if (fd == -1) { 138 dirent = readdir(dir); 139 continue; 140 } 141 142 /* non-blocking lock */ 143 lck_result = flock(fd, LOCK_EX | LOCK_NB); 144 145 /* if lock succeeds, remove the file */ 146 if (lck_result != -1) 147 unlinkat(dir_fd, dirent->d_name, 0); 148 close(fd); 149 dirent = readdir(dir); 150 } 151 152 /* closedir closes dir_fd and drops the lock */ 153 closedir(dir); 154 return 0; 155 156 error: 157 if (dir) 158 closedir(dir); 159 160 RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", 161 strerror(errno)); 162 163 return -1; 164 } 165 166 167 /* create memory configuration in shared/mmap memory. Take out 168 * a write lock on the memsegs, so we can auto-detect primary/secondary. 169 * This means we never close the file while running (auto-close on exit). 170 * We also don't lock the whole file, so that in future we can use read-locks 171 * on other parts, e.g. memzones, to detect if there are running secondary 172 * processes. */ 173 static int 174 rte_eal_config_create(void) 175 { 176 struct rte_config *config = rte_eal_get_configuration(); 177 size_t page_sz = sysconf(_SC_PAGE_SIZE); 178 size_t cfg_len = sizeof(*config->mem_config); 179 size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); 180 void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; 181 int retval; 182 const struct internal_config *internal_conf = 183 eal_get_internal_configuration(); 184 185 const char *pathname = eal_runtime_config_path(); 186 187 if (internal_conf->no_shconf) 188 return 0; 189 190 /* map the config before hugepage address so that we don't waste a page */ 191 if (internal_conf->base_virtaddr != 0) 192 rte_mem_cfg_addr = (void *) 193 RTE_ALIGN_FLOOR(internal_conf->base_virtaddr - 194 sizeof(struct rte_mem_config), page_sz); 195 else 196 rte_mem_cfg_addr = NULL; 197 198 if (mem_cfg_fd < 0){ 199 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600); 200 if (mem_cfg_fd < 0) { 201 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 202 pathname); 203 return -1; 204 } 205 } 206 207 retval = ftruncate(mem_cfg_fd, cfg_len); 208 if (retval < 0){ 209 close(mem_cfg_fd); 210 mem_cfg_fd = -1; 211 RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n", 212 pathname); 213 return -1; 214 } 215 216 retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); 217 if (retval < 0){ 218 close(mem_cfg_fd); 219 mem_cfg_fd = -1; 220 RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary " 221 "process running?\n", pathname); 222 return -1; 223 } 224 225 /* reserve space for config */ 226 rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, 227 &cfg_len_aligned, page_sz, 0, 0); 228 if (rte_mem_cfg_addr == NULL) { 229 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); 230 close(mem_cfg_fd); 231 mem_cfg_fd = -1; 232 return -1; 233 } 234 235 /* remap the actual file into the space we've just reserved */ 236 mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, 237 cfg_len_aligned, PROT_READ | PROT_WRITE, 238 MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); 239 if (mapped_mem_cfg_addr == MAP_FAILED) { 240 munmap(rte_mem_cfg_addr, cfg_len); 241 close(mem_cfg_fd); 242 mem_cfg_fd = -1; 243 RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); 244 return -1; 245 } 246 247 memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config)); 248 config->mem_config = rte_mem_cfg_addr; 249 250 /* store address of the config in the config itself so that secondary 251 * processes could later map the config into this exact location 252 */ 253 config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; 254 config->mem_config->dma_maskbits = 0; 255 256 return 0; 257 } 258 259 /* attach to an existing shared memory config */ 260 static int 261 rte_eal_config_attach(void) 262 { 263 struct rte_config *config = rte_eal_get_configuration(); 264 struct rte_mem_config *mem_config; 265 const struct internal_config *internal_conf = 266 eal_get_internal_configuration(); 267 268 const char *pathname = eal_runtime_config_path(); 269 270 if (internal_conf->no_shconf) 271 return 0; 272 273 if (mem_cfg_fd < 0){ 274 mem_cfg_fd = open(pathname, O_RDWR); 275 if (mem_cfg_fd < 0) { 276 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 277 pathname); 278 return -1; 279 } 280 } 281 282 /* map it as read-only first */ 283 mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), 284 PROT_READ, MAP_SHARED, mem_cfg_fd, 0); 285 if (mem_config == MAP_FAILED) { 286 close(mem_cfg_fd); 287 mem_cfg_fd = -1; 288 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 289 errno, strerror(errno)); 290 return -1; 291 } 292 293 config->mem_config = mem_config; 294 295 return 0; 296 } 297 298 /* reattach the shared config at exact memory location primary process has it */ 299 static int 300 rte_eal_config_reattach(void) 301 { 302 struct rte_config *config = rte_eal_get_configuration(); 303 struct rte_mem_config *mem_config; 304 void *rte_mem_cfg_addr; 305 const struct internal_config *internal_conf = 306 eal_get_internal_configuration(); 307 308 if (internal_conf->no_shconf) 309 return 0; 310 311 /* save the address primary process has mapped shared config to */ 312 rte_mem_cfg_addr = 313 (void *) (uintptr_t) config->mem_config->mem_cfg_addr; 314 315 /* unmap original config */ 316 munmap(config->mem_config, sizeof(struct rte_mem_config)); 317 318 /* remap the config at proper address */ 319 mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, 320 sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, 321 mem_cfg_fd, 0); 322 323 close(mem_cfg_fd); 324 mem_cfg_fd = -1; 325 326 if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { 327 if (mem_config != MAP_FAILED) { 328 /* errno is stale, don't use */ 329 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]" 330 " - please use '--" OPT_BASE_VIRTADDR 331 "' option\n", rte_mem_cfg_addr, mem_config); 332 munmap(mem_config, sizeof(struct rte_mem_config)); 333 return -1; 334 } 335 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 336 errno, strerror(errno)); 337 return -1; 338 } 339 340 config->mem_config = mem_config; 341 342 return 0; 343 } 344 345 /* Detect if we are a primary or a secondary process */ 346 enum rte_proc_type_t 347 eal_proc_type_detect(void) 348 { 349 enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; 350 const char *pathname = eal_runtime_config_path(); 351 const struct internal_config *internal_conf = 352 eal_get_internal_configuration(); 353 354 /* if there no shared config, there can be no secondary processes */ 355 if (!internal_conf->no_shconf) { 356 /* if we can open the file but not get a write-lock we are a 357 * secondary process. NOTE: if we get a file handle back, we 358 * keep that open and don't close it to prevent a race condition 359 * between multiple opens. 360 */ 361 if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && 362 (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) 363 ptype = RTE_PROC_SECONDARY; 364 } 365 366 RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", 367 ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); 368 369 return ptype; 370 } 371 372 /* Sets up rte_config structure with the pointer to shared memory config.*/ 373 static int 374 rte_config_init(void) 375 { 376 struct rte_config *config = rte_eal_get_configuration(); 377 const struct internal_config *internal_conf = 378 eal_get_internal_configuration(); 379 380 config->process_type = internal_conf->process_type; 381 382 switch (config->process_type) { 383 case RTE_PROC_PRIMARY: 384 if (rte_eal_config_create() < 0) 385 return -1; 386 eal_mcfg_update_from_internal(); 387 break; 388 case RTE_PROC_SECONDARY: 389 if (rte_eal_config_attach() < 0) 390 return -1; 391 eal_mcfg_wait_complete(); 392 if (eal_mcfg_check_version() < 0) { 393 RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n"); 394 return -1; 395 } 396 if (rte_eal_config_reattach() < 0) 397 return -1; 398 if (!__rte_mp_enable()) { 399 RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n"); 400 return -1; 401 } 402 eal_mcfg_update_internal(); 403 break; 404 case RTE_PROC_AUTO: 405 case RTE_PROC_INVALID: 406 RTE_LOG(ERR, EAL, "Invalid process type %d\n", 407 config->process_type); 408 return -1; 409 } 410 411 return 0; 412 } 413 414 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ 415 static void 416 eal_hugedirs_unlock(void) 417 { 418 int i; 419 struct internal_config *internal_conf = 420 eal_get_internal_configuration(); 421 422 for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) 423 { 424 /* skip uninitialized */ 425 if (internal_conf->hugepage_info[i].lock_descriptor < 0) 426 continue; 427 /* unlock hugepage file */ 428 flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN); 429 close(internal_conf->hugepage_info[i].lock_descriptor); 430 /* reset the field */ 431 internal_conf->hugepage_info[i].lock_descriptor = -1; 432 } 433 } 434 435 /* display usage */ 436 static void 437 eal_usage(const char *prgname) 438 { 439 rte_usage_hook_t hook = eal_get_application_usage_hook(); 440 441 printf("\nUsage: %s ", prgname); 442 eal_common_usage(); 443 printf("EAL Linux options:\n" 444 " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" 445 " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" 446 " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" 447 " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" 448 " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" 449 " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" 450 " --"OPT_VFIO_VF_TOKEN" VF token (UUID) shared between SR-IOV PF and VFs\n" 451 " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" 452 " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" 453 " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" 454 "\n"); 455 /* Allow the application to print its usage message too if hook is set */ 456 if (hook) { 457 printf("===== Application Usage =====\n\n"); 458 (hook)(prgname); 459 } 460 } 461 462 static int 463 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) 464 { 465 char * arg[RTE_MAX_NUMA_NODES]; 466 char *end; 467 int arg_num, i, len; 468 469 len = strnlen(strval, SOCKET_MEM_STRLEN); 470 if (len == SOCKET_MEM_STRLEN) { 471 RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); 472 return -1; 473 } 474 475 /* all other error cases will be caught later */ 476 if (!isdigit(strval[len-1])) 477 return -1; 478 479 /* split the optarg into separate socket values */ 480 arg_num = rte_strsplit(strval, len, 481 arg, RTE_MAX_NUMA_NODES, ','); 482 483 /* if split failed, or 0 arguments */ 484 if (arg_num <= 0) 485 return -1; 486 487 /* parse each defined socket option */ 488 errno = 0; 489 for (i = 0; i < arg_num; i++) { 490 uint64_t val; 491 end = NULL; 492 val = strtoull(arg[i], &end, 10); 493 494 /* check for invalid input */ 495 if ((errno != 0) || 496 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) 497 return -1; 498 val <<= 20; 499 socket_arg[i] = val; 500 } 501 502 return 0; 503 } 504 505 static int 506 eal_parse_vfio_intr(const char *mode) 507 { 508 struct internal_config *internal_conf = 509 eal_get_internal_configuration(); 510 unsigned i; 511 static struct { 512 const char *name; 513 enum rte_intr_mode value; 514 } map[] = { 515 { "legacy", RTE_INTR_MODE_LEGACY }, 516 { "msi", RTE_INTR_MODE_MSI }, 517 { "msix", RTE_INTR_MODE_MSIX }, 518 }; 519 520 for (i = 0; i < RTE_DIM(map); i++) { 521 if (!strcmp(mode, map[i].name)) { 522 internal_conf->vfio_intr_mode = map[i].value; 523 return 0; 524 } 525 } 526 return -1; 527 } 528 529 static int 530 eal_parse_vfio_vf_token(const char *vf_token) 531 { 532 struct internal_config *cfg = eal_get_internal_configuration(); 533 rte_uuid_t uuid; 534 535 if (!rte_uuid_parse(vf_token, uuid)) { 536 rte_uuid_copy(cfg->vfio_vf_token, uuid); 537 return 0; 538 } 539 540 return -1; 541 } 542 543 /* Parse the arguments for --log-level only */ 544 static void 545 eal_log_level_parse(int argc, char **argv) 546 { 547 int opt; 548 char **argvopt; 549 int option_index; 550 const int old_optind = optind; 551 const int old_optopt = optopt; 552 char * const old_optarg = optarg; 553 struct internal_config *internal_conf = 554 eal_get_internal_configuration(); 555 556 argvopt = argv; 557 optind = 1; 558 559 while ((opt = getopt_long(argc, argvopt, eal_short_options, 560 eal_long_options, &option_index)) != EOF) { 561 562 int ret; 563 564 /* getopt is not happy, stop right now */ 565 if (opt == '?') 566 break; 567 568 ret = (opt == OPT_LOG_LEVEL_NUM) ? 569 eal_parse_common_option(opt, optarg, internal_conf) : 0; 570 571 /* common parser is not happy */ 572 if (ret < 0) 573 break; 574 } 575 576 /* restore getopt lib */ 577 optind = old_optind; 578 optopt = old_optopt; 579 optarg = old_optarg; 580 } 581 582 /* Parse the argument given in the command line of the application */ 583 static int 584 eal_parse_args(int argc, char **argv) 585 { 586 int opt, ret; 587 char **argvopt; 588 int option_index; 589 char *prgname = argv[0]; 590 const int old_optind = optind; 591 const int old_optopt = optopt; 592 char * const old_optarg = optarg; 593 struct internal_config *internal_conf = 594 eal_get_internal_configuration(); 595 596 argvopt = argv; 597 optind = 1; 598 599 while ((opt = getopt_long(argc, argvopt, eal_short_options, 600 eal_long_options, &option_index)) != EOF) { 601 602 /* getopt didn't recognise the option */ 603 if (opt == '?') { 604 eal_usage(prgname); 605 ret = -1; 606 goto out; 607 } 608 609 /* eal_log_level_parse() already handled this option */ 610 if (opt == OPT_LOG_LEVEL_NUM) 611 continue; 612 613 ret = eal_parse_common_option(opt, optarg, internal_conf); 614 /* common parser is not happy */ 615 if (ret < 0) { 616 eal_usage(prgname); 617 ret = -1; 618 goto out; 619 } 620 /* common parser handled this option */ 621 if (ret == 0) 622 continue; 623 624 switch (opt) { 625 case 'h': 626 eal_usage(prgname); 627 exit(EXIT_SUCCESS); 628 629 case OPT_HUGE_DIR_NUM: 630 { 631 char *hdir = strdup(optarg); 632 if (hdir == NULL) 633 RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); 634 else { 635 /* free old hugepage dir */ 636 free(internal_conf->hugepage_dir); 637 internal_conf->hugepage_dir = hdir; 638 } 639 break; 640 } 641 case OPT_FILE_PREFIX_NUM: 642 { 643 char *prefix = strdup(optarg); 644 if (prefix == NULL) 645 RTE_LOG(ERR, EAL, "Could not store file prefix\n"); 646 else { 647 /* free old prefix */ 648 free(internal_conf->hugefile_prefix); 649 internal_conf->hugefile_prefix = prefix; 650 } 651 break; 652 } 653 case OPT_SOCKET_MEM_NUM: 654 if (eal_parse_socket_arg(optarg, 655 internal_conf->socket_mem) < 0) { 656 RTE_LOG(ERR, EAL, "invalid parameters for --" 657 OPT_SOCKET_MEM "\n"); 658 eal_usage(prgname); 659 ret = -1; 660 goto out; 661 } 662 internal_conf->force_sockets = 1; 663 break; 664 665 case OPT_SOCKET_LIMIT_NUM: 666 if (eal_parse_socket_arg(optarg, 667 internal_conf->socket_limit) < 0) { 668 RTE_LOG(ERR, EAL, "invalid parameters for --" 669 OPT_SOCKET_LIMIT "\n"); 670 eal_usage(prgname); 671 ret = -1; 672 goto out; 673 } 674 internal_conf->force_socket_limits = 1; 675 break; 676 677 case OPT_VFIO_INTR_NUM: 678 if (eal_parse_vfio_intr(optarg) < 0) { 679 RTE_LOG(ERR, EAL, "invalid parameters for --" 680 OPT_VFIO_INTR "\n"); 681 eal_usage(prgname); 682 ret = -1; 683 goto out; 684 } 685 break; 686 687 case OPT_VFIO_VF_TOKEN_NUM: 688 if (eal_parse_vfio_vf_token(optarg) < 0) { 689 RTE_LOG(ERR, EAL, "invalid parameters for --" 690 OPT_VFIO_VF_TOKEN "\n"); 691 eal_usage(prgname); 692 ret = -1; 693 goto out; 694 } 695 break; 696 697 case OPT_CREATE_UIO_DEV_NUM: 698 internal_conf->create_uio_dev = 1; 699 break; 700 701 case OPT_MBUF_POOL_OPS_NAME_NUM: 702 { 703 char *ops_name = strdup(optarg); 704 if (ops_name == NULL) 705 RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); 706 else { 707 /* free old ops name */ 708 free(internal_conf->user_mbuf_pool_ops_name); 709 710 internal_conf->user_mbuf_pool_ops_name = 711 ops_name; 712 } 713 break; 714 } 715 case OPT_MATCH_ALLOCATIONS_NUM: 716 internal_conf->match_allocations = 1; 717 break; 718 719 default: 720 if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { 721 RTE_LOG(ERR, EAL, "Option %c is not supported " 722 "on Linux\n", opt); 723 } else if (opt >= OPT_LONG_MIN_NUM && 724 opt < OPT_LONG_MAX_NUM) { 725 RTE_LOG(ERR, EAL, "Option %s is not supported " 726 "on Linux\n", 727 eal_long_options[option_index].name); 728 } else { 729 RTE_LOG(ERR, EAL, "Option %d is not supported " 730 "on Linux\n", opt); 731 } 732 eal_usage(prgname); 733 ret = -1; 734 goto out; 735 } 736 } 737 738 /* create runtime data directory. In no_shconf mode, skip any errors */ 739 if (eal_create_runtime_dir() < 0) { 740 if (internal_conf->no_shconf == 0) { 741 RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); 742 ret = -1; 743 goto out; 744 } else 745 RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n"); 746 } 747 748 if (eal_adjust_config(internal_conf) != 0) { 749 ret = -1; 750 goto out; 751 } 752 753 /* sanity checks */ 754 if (eal_check_common_options(internal_conf) != 0) { 755 eal_usage(prgname); 756 ret = -1; 757 goto out; 758 } 759 760 if (optind >= 0) 761 argv[optind-1] = prgname; 762 ret = optind-1; 763 764 out: 765 /* restore getopt lib */ 766 optind = old_optind; 767 optopt = old_optopt; 768 optarg = old_optarg; 769 770 return ret; 771 } 772 773 static int 774 check_socket(const struct rte_memseg_list *msl, void *arg) 775 { 776 int *socket_id = arg; 777 778 if (msl->external) 779 return 0; 780 781 return *socket_id == msl->socket_id; 782 } 783 784 static void 785 eal_check_mem_on_local_socket(void) 786 { 787 int socket_id; 788 const struct rte_config *config = rte_eal_get_configuration(); 789 790 socket_id = rte_lcore_to_socket_id(config->main_lcore); 791 792 if (rte_memseg_list_walk(check_socket, &socket_id) == 0) 793 RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n"); 794 } 795 796 static int 797 sync_func(__rte_unused void *arg) 798 { 799 return 0; 800 } 801 802 /* 803 * Request iopl privilege for all RPL, returns 0 on success 804 * iopl() call is mostly for the i386 architecture. For other architectures, 805 * return -1 to indicate IO privilege can't be changed in this way. 806 */ 807 int 808 rte_eal_iopl_init(void) 809 { 810 #if defined(RTE_ARCH_X86) 811 if (iopl(3) != 0) 812 return -1; 813 #endif 814 return 0; 815 } 816 817 #ifdef VFIO_PRESENT 818 static int rte_eal_vfio_setup(void) 819 { 820 if (rte_vfio_enable("vfio")) 821 return -1; 822 823 return 0; 824 } 825 #endif 826 827 static void rte_eal_init_alert(const char *msg) 828 { 829 fprintf(stderr, "EAL: FATAL: %s\n", msg); 830 RTE_LOG(ERR, EAL, "%s\n", msg); 831 } 832 833 /* 834 * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the 835 * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel 836 * IOMMU groups. If IOMMU is not enabled, that path would be empty. 837 * Therefore, checking if the path is empty will tell us if IOMMU is enabled. 838 */ 839 static bool 840 is_iommu_enabled(void) 841 { 842 DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); 843 struct dirent *d; 844 int n = 0; 845 846 /* if directory doesn't exist, assume IOMMU is not enabled */ 847 if (dir == NULL) 848 return false; 849 850 while ((d = readdir(dir)) != NULL) { 851 /* skip dot and dot-dot */ 852 if (++n > 2) 853 break; 854 } 855 closedir(dir); 856 857 return n > 2; 858 } 859 860 /* Launch threads, called at application init(). */ 861 int 862 rte_eal_init(int argc, char **argv) 863 { 864 int i, fctret, ret; 865 static uint32_t run_once; 866 uint32_t has_run = 0; 867 const char *p; 868 static char logid[PATH_MAX]; 869 char cpuset[RTE_CPU_AFFINITY_STR_LEN]; 870 char thread_name[RTE_MAX_THREAD_NAME_LEN]; 871 bool phys_addrs; 872 const struct rte_config *config = rte_eal_get_configuration(); 873 struct internal_config *internal_conf = 874 eal_get_internal_configuration(); 875 876 /* checks if the machine is adequate */ 877 if (!rte_cpu_is_supported()) { 878 rte_eal_init_alert("unsupported cpu type."); 879 rte_errno = ENOTSUP; 880 return -1; 881 } 882 883 if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0, 884 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 885 rte_eal_init_alert("already called initialization."); 886 rte_errno = EALREADY; 887 return -1; 888 } 889 890 p = strrchr(argv[0], '/'); 891 strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid)); 892 893 eal_reset_internal_config(internal_conf); 894 895 /* set log level as early as possible */ 896 eal_log_level_parse(argc, argv); 897 898 /* clone argv to report out later in telemetry */ 899 eal_save_args(argc, argv); 900 901 if (rte_eal_cpu_init() < 0) { 902 rte_eal_init_alert("Cannot detect lcores."); 903 rte_errno = ENOTSUP; 904 return -1; 905 } 906 907 fctret = eal_parse_args(argc, argv); 908 if (fctret < 0) { 909 rte_eal_init_alert("Invalid 'command line' arguments."); 910 rte_errno = EINVAL; 911 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 912 return -1; 913 } 914 915 if (eal_plugins_init() < 0) { 916 rte_eal_init_alert("Cannot init plugins"); 917 rte_errno = EINVAL; 918 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 919 return -1; 920 } 921 922 if (eal_trace_init() < 0) { 923 rte_eal_init_alert("Cannot init trace"); 924 rte_errno = EFAULT; 925 return -1; 926 } 927 928 if (eal_option_device_parse()) { 929 rte_errno = ENODEV; 930 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 931 return -1; 932 } 933 934 if (rte_config_init() < 0) { 935 rte_eal_init_alert("Cannot init config"); 936 return -1; 937 } 938 939 if (rte_eal_intr_init() < 0) { 940 rte_eal_init_alert("Cannot init interrupt-handling thread"); 941 return -1; 942 } 943 944 if (rte_eal_alarm_init() < 0) { 945 rte_eal_init_alert("Cannot init alarm"); 946 /* rte_eal_alarm_init sets rte_errno on failure. */ 947 return -1; 948 } 949 950 /* Put mp channel init before bus scan so that we can init the vdev 951 * bus through mp channel in the secondary process before the bus scan. 952 */ 953 if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) { 954 rte_eal_init_alert("failed to init mp channel"); 955 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 956 rte_errno = EFAULT; 957 return -1; 958 } 959 } 960 961 /* register multi-process action callbacks for hotplug */ 962 if (eal_mp_dev_hotplug_init() < 0) { 963 rte_eal_init_alert("failed to register mp callback for hotplug"); 964 return -1; 965 } 966 967 if (rte_bus_scan()) { 968 rte_eal_init_alert("Cannot scan the buses for devices"); 969 rte_errno = ENODEV; 970 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 971 return -1; 972 } 973 974 phys_addrs = rte_eal_using_phys_addrs() != 0; 975 976 /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ 977 if (internal_conf->iova_mode == RTE_IOVA_DC) { 978 /* autodetect the IOVA mapping mode */ 979 enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); 980 981 if (iova_mode == RTE_IOVA_DC) { 982 RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); 983 984 if (!phys_addrs) { 985 /* if we have no access to physical addresses, 986 * pick IOVA as VA mode. 987 */ 988 iova_mode = RTE_IOVA_VA; 989 RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); 990 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) 991 } else if (rte_eal_check_module("rte_kni") == 1) { 992 iova_mode = RTE_IOVA_PA; 993 RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n"); 994 #endif 995 } else if (is_iommu_enabled()) { 996 /* we have an IOMMU, pick IOVA as VA mode */ 997 iova_mode = RTE_IOVA_VA; 998 RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); 999 } else { 1000 /* physical addresses available, and no IOMMU 1001 * found, so pick IOVA as PA. 1002 */ 1003 iova_mode = RTE_IOVA_PA; 1004 RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); 1005 } 1006 } 1007 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) 1008 /* Workaround for KNI which requires physical address to work 1009 * in kernels < 4.10 1010 */ 1011 if (iova_mode == RTE_IOVA_VA && 1012 rte_eal_check_module("rte_kni") == 1) { 1013 if (phys_addrs) { 1014 iova_mode = RTE_IOVA_PA; 1015 RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); 1016 } else { 1017 RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); 1018 } 1019 } 1020 #endif 1021 rte_eal_get_configuration()->iova_mode = iova_mode; 1022 } else { 1023 rte_eal_get_configuration()->iova_mode = 1024 internal_conf->iova_mode; 1025 } 1026 1027 if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { 1028 rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); 1029 rte_errno = EINVAL; 1030 return -1; 1031 } 1032 1033 RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", 1034 rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); 1035 1036 if (internal_conf->no_hugetlbfs == 0) { 1037 /* rte_config isn't initialized yet */ 1038 ret = internal_conf->process_type == RTE_PROC_PRIMARY ? 1039 eal_hugepage_info_init() : 1040 eal_hugepage_info_read(); 1041 if (ret < 0) { 1042 rte_eal_init_alert("Cannot get hugepage information."); 1043 rte_errno = EACCES; 1044 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1045 return -1; 1046 } 1047 } 1048 1049 if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) { 1050 if (internal_conf->no_hugetlbfs) 1051 internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; 1052 } 1053 1054 if (internal_conf->vmware_tsc_map == 1) { 1055 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT 1056 rte_cycles_vmware_tsc_map = 1; 1057 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " 1058 "you must have monitor_control.pseudo_perfctr = TRUE\n"); 1059 #else 1060 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " 1061 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); 1062 #endif 1063 } 1064 1065 if (eal_log_init(logid, internal_conf->syslog_facility) < 0) { 1066 rte_eal_init_alert("Cannot init logging."); 1067 rte_errno = ENOMEM; 1068 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1069 return -1; 1070 } 1071 1072 #ifdef VFIO_PRESENT 1073 if (rte_eal_vfio_setup() < 0) { 1074 rte_eal_init_alert("Cannot init VFIO"); 1075 rte_errno = EAGAIN; 1076 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1077 return -1; 1078 } 1079 #endif 1080 /* in secondary processes, memory init may allocate additional fbarrays 1081 * not present in primary processes, so to avoid any potential issues, 1082 * initialize memzones first. 1083 */ 1084 if (rte_eal_memzone_init() < 0) { 1085 rte_eal_init_alert("Cannot init memzone"); 1086 rte_errno = ENODEV; 1087 return -1; 1088 } 1089 1090 if (rte_eal_memory_init() < 0) { 1091 rte_eal_init_alert("Cannot init memory"); 1092 rte_errno = ENOMEM; 1093 return -1; 1094 } 1095 1096 /* the directories are locked during eal_hugepage_info_init */ 1097 eal_hugedirs_unlock(); 1098 1099 if (rte_eal_malloc_heap_init() < 0) { 1100 rte_eal_init_alert("Cannot init malloc heap"); 1101 rte_errno = ENODEV; 1102 return -1; 1103 } 1104 1105 if (rte_eal_tailqs_init() < 0) { 1106 rte_eal_init_alert("Cannot init tail queues for objects"); 1107 rte_errno = EFAULT; 1108 return -1; 1109 } 1110 1111 if (rte_eal_timer_init() < 0) { 1112 rte_eal_init_alert("Cannot init HPET or TSC timers"); 1113 rte_errno = ENOTSUP; 1114 return -1; 1115 } 1116 1117 eal_check_mem_on_local_socket(); 1118 1119 if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t), 1120 &lcore_config[config->main_lcore].cpuset) != 0) { 1121 rte_eal_init_alert("Cannot set affinity"); 1122 rte_errno = EINVAL; 1123 return -1; 1124 } 1125 __rte_thread_init(config->main_lcore, 1126 &lcore_config[config->main_lcore].cpuset); 1127 1128 ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); 1129 RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n", 1130 config->main_lcore, (uintptr_t)pthread_self(), cpuset, 1131 ret == 0 ? "" : "..."); 1132 1133 RTE_LCORE_FOREACH_WORKER(i) { 1134 1135 /* 1136 * create communication pipes between main thread 1137 * and children 1138 */ 1139 if (pipe(lcore_config[i].pipe_main2worker) < 0) 1140 rte_panic("Cannot create pipe\n"); 1141 if (pipe(lcore_config[i].pipe_worker2main) < 0) 1142 rte_panic("Cannot create pipe\n"); 1143 1144 lcore_config[i].state = WAIT; 1145 1146 /* create a thread for each lcore */ 1147 ret = pthread_create(&lcore_config[i].thread_id, NULL, 1148 eal_thread_loop, (void *)(uintptr_t)i); 1149 if (ret != 0) 1150 rte_panic("Cannot create thread\n"); 1151 1152 /* Set thread_name for aid in debugging. */ 1153 snprintf(thread_name, sizeof(thread_name), 1154 "lcore-worker-%d", i); 1155 ret = rte_thread_setname(lcore_config[i].thread_id, 1156 thread_name); 1157 if (ret != 0) 1158 RTE_LOG(DEBUG, EAL, 1159 "Cannot set name for lcore thread\n"); 1160 1161 ret = pthread_setaffinity_np(lcore_config[i].thread_id, 1162 sizeof(rte_cpuset_t), &lcore_config[i].cpuset); 1163 if (ret != 0) 1164 rte_panic("Cannot set affinity\n"); 1165 } 1166 1167 /* 1168 * Launch a dummy function on all worker lcores, so that main lcore 1169 * knows they are all ready when this function returns. 1170 */ 1171 rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN); 1172 rte_eal_mp_wait_lcore(); 1173 1174 /* initialize services so vdevs register service during bus_probe. */ 1175 ret = rte_service_init(); 1176 if (ret) { 1177 rte_eal_init_alert("rte_service_init() failed"); 1178 rte_errno = -ret; 1179 return -1; 1180 } 1181 1182 /* Probe all the buses and devices/drivers on them */ 1183 if (rte_bus_probe()) { 1184 rte_eal_init_alert("Cannot probe devices"); 1185 rte_errno = ENOTSUP; 1186 return -1; 1187 } 1188 1189 #ifdef VFIO_PRESENT 1190 /* Register mp action after probe() so that we got enough info */ 1191 if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) 1192 return -1; 1193 #endif 1194 1195 /* initialize default service/lcore mappings and start running. Ignore 1196 * -ENOTSUP, as it indicates no service coremask passed to EAL. 1197 */ 1198 ret = rte_service_start_with_defaults(); 1199 if (ret < 0 && ret != -ENOTSUP) { 1200 rte_errno = -ret; 1201 return -1; 1202 } 1203 1204 /* 1205 * Clean up unused files in runtime directory. We do this at the end of 1206 * init and not at the beginning because we want to clean stuff up 1207 * whether we are primary or secondary process, but we cannot remove 1208 * primary process' files because secondary should be able to run even 1209 * if primary process is dead. 1210 * 1211 * In no_shconf mode, no runtime directory is created in the first 1212 * place, so no cleanup needed. 1213 */ 1214 if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) { 1215 rte_eal_init_alert("Cannot clear runtime directory"); 1216 return -1; 1217 } 1218 if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) { 1219 int tlog = rte_log_register_type_and_pick_level( 1220 "lib.telemetry", RTE_LOG_WARNING); 1221 if (tlog < 0) 1222 tlog = RTE_LOGTYPE_EAL; 1223 if (rte_telemetry_init(rte_eal_get_runtime_dir(), 1224 rte_version(), 1225 &internal_conf->ctrl_cpuset, rte_log, tlog) != 0) 1226 return -1; 1227 } 1228 1229 eal_mcfg_complete(); 1230 1231 return fctret; 1232 } 1233 1234 static int 1235 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 1236 void *arg __rte_unused) 1237 { 1238 /* ms is const, so find this memseg */ 1239 struct rte_memseg *found; 1240 1241 if (msl->external) 1242 return 0; 1243 1244 found = rte_mem_virt2memseg(ms->addr, msl); 1245 1246 found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; 1247 1248 return 0; 1249 } 1250 1251 int 1252 rte_eal_cleanup(void) 1253 { 1254 /* if we're in a primary process, we need to mark hugepages as freeable 1255 * so that finalization can release them back to the system. 1256 */ 1257 struct internal_config *internal_conf = 1258 eal_get_internal_configuration(); 1259 1260 if (rte_eal_process_type() == RTE_PROC_PRIMARY && 1261 internal_conf->hugepage_file.unlink_existing) 1262 rte_memseg_walk(mark_freeable, NULL); 1263 1264 rte_service_finalize(); 1265 #ifdef VFIO_PRESENT 1266 vfio_mp_sync_cleanup(); 1267 #endif 1268 rte_mp_channel_cleanup(); 1269 rte_trace_save(); 1270 eal_trace_fini(); 1271 /* after this point, any DPDK pointers will become dangling */ 1272 rte_eal_memory_detach(); 1273 eal_mp_dev_hotplug_cleanup(); 1274 rte_eal_malloc_heap_cleanup(); 1275 rte_eal_alarm_cleanup(); 1276 eal_cleanup_config(internal_conf); 1277 rte_eal_log_cleanup(); 1278 return 0; 1279 } 1280 1281 int rte_eal_create_uio_dev(void) 1282 { 1283 const struct internal_config *internal_conf = 1284 eal_get_internal_configuration(); 1285 1286 return internal_conf->create_uio_dev; 1287 } 1288 1289 enum rte_intr_mode 1290 rte_eal_vfio_intr_mode(void) 1291 { 1292 const struct internal_config *internal_conf = 1293 eal_get_internal_configuration(); 1294 1295 return internal_conf->vfio_intr_mode; 1296 } 1297 1298 void 1299 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token) 1300 { 1301 struct internal_config *cfg = eal_get_internal_configuration(); 1302 1303 rte_uuid_copy(vf_token, cfg->vfio_vf_token); 1304 } 1305 1306 int 1307 rte_eal_check_module(const char *module_name) 1308 { 1309 char sysfs_mod_name[PATH_MAX]; 1310 struct stat st; 1311 int n; 1312 1313 if (NULL == module_name) 1314 return -1; 1315 1316 /* Check if there is sysfs mounted */ 1317 if (stat("/sys/module", &st) != 0) { 1318 RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", 1319 errno, strerror(errno)); 1320 return -1; 1321 } 1322 1323 /* A module might be built-in, therefore try sysfs */ 1324 n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); 1325 if (n < 0 || n > PATH_MAX) { 1326 RTE_LOG(DEBUG, EAL, "Could not format module path\n"); 1327 return -1; 1328 } 1329 1330 if (stat(sysfs_mod_name, &st) != 0) { 1331 RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", 1332 sysfs_mod_name, errno, strerror(errno)); 1333 return 0; 1334 } 1335 1336 /* Module has been found */ 1337 return 1; 1338 } 1339