1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation. 3 * Copyright(c) 2012-2014 6WIND S.A. 4 */ 5 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <stdint.h> 9 #include <string.h> 10 #include <unistd.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <sys/file.h> 14 #include <dirent.h> 15 #include <fcntl.h> 16 #include <fnmatch.h> 17 #include <stddef.h> 18 #include <errno.h> 19 #include <limits.h> 20 #include <sys/mman.h> 21 #include <sys/stat.h> 22 #if defined(RTE_ARCH_X86) 23 #include <sys/io.h> 24 #endif 25 #include <linux/version.h> 26 27 #include <rte_common.h> 28 #include <rte_debug.h> 29 #include <rte_memory.h> 30 #include <rte_launch.h> 31 #include <rte_eal.h> 32 #include <rte_errno.h> 33 #include <rte_lcore.h> 34 #include <rte_service_component.h> 35 #include <rte_log.h> 36 #include <rte_string_fns.h> 37 #include <rte_cpuflags.h> 38 #include <rte_bus.h> 39 #include <rte_version.h> 40 #include <malloc_heap.h> 41 #include <rte_vfio.h> 42 43 #include <telemetry_internal.h> 44 #include "eal_private.h" 45 #include "eal_thread.h" 46 #include "eal_internal_cfg.h" 47 #include "eal_filesystem.h" 48 #include "eal_hugepages.h" 49 #include "eal_memcfg.h" 50 #include "eal_trace.h" 51 #include "eal_log.h" 52 #include "eal_options.h" 53 #include "eal_vfio.h" 54 #include "hotplug_mp.h" 55 56 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) 57 58 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) 59 60 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" 61 62 /* define fd variable here, because file needs to be kept open for the 63 * duration of the program, as we hold a write lock on it in the primary proc */ 64 static int mem_cfg_fd = -1; 65 66 static struct flock wr_lock = { 67 .l_type = F_WRLCK, 68 .l_whence = SEEK_SET, 69 .l_start = offsetof(struct rte_mem_config, memsegs), 70 .l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs), 71 }; 72 73 /* internal configuration (per-core) */ 74 struct lcore_config lcore_config[RTE_MAX_LCORE]; 75 76 /* used by rte_rdtsc() */ 77 int rte_cycles_vmware_tsc_map; 78 79 80 int 81 eal_clean_runtime_dir(void) 82 { 83 const char *runtime_dir = rte_eal_get_runtime_dir(); 84 DIR *dir; 85 struct dirent *dirent; 86 int dir_fd, fd, lck_result; 87 static const char * const filters[] = { 88 "fbarray_*", 89 "mp_socket_*" 90 }; 91 92 /* open directory */ 93 dir = opendir(runtime_dir); 94 if (!dir) { 95 RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", 96 runtime_dir); 97 goto error; 98 } 99 dir_fd = dirfd(dir); 100 101 /* lock the directory before doing anything, to avoid races */ 102 if (flock(dir_fd, LOCK_EX) < 0) { 103 RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", 104 runtime_dir); 105 goto error; 106 } 107 108 dirent = readdir(dir); 109 if (!dirent) { 110 RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", 111 runtime_dir); 112 goto error; 113 } 114 115 while (dirent != NULL) { 116 unsigned int f_idx; 117 bool skip = true; 118 119 /* skip files that don't match the patterns */ 120 for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { 121 const char *filter = filters[f_idx]; 122 123 if (fnmatch(filter, dirent->d_name, 0) == 0) { 124 skip = false; 125 break; 126 } 127 } 128 if (skip) { 129 dirent = readdir(dir); 130 continue; 131 } 132 133 /* try and lock the file */ 134 fd = openat(dir_fd, dirent->d_name, O_RDONLY); 135 136 /* skip to next file */ 137 if (fd == -1) { 138 dirent = readdir(dir); 139 continue; 140 } 141 142 /* non-blocking lock */ 143 lck_result = flock(fd, LOCK_EX | LOCK_NB); 144 145 /* if lock succeeds, remove the file */ 146 if (lck_result != -1) 147 unlinkat(dir_fd, dirent->d_name, 0); 148 close(fd); 149 dirent = readdir(dir); 150 } 151 152 /* closedir closes dir_fd and drops the lock */ 153 closedir(dir); 154 return 0; 155 156 error: 157 if (dir) 158 closedir(dir); 159 160 RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", 161 strerror(errno)); 162 163 return -1; 164 } 165 166 167 /* create memory configuration in shared/mmap memory. Take out 168 * a write lock on the memsegs, so we can auto-detect primary/secondary. 169 * This means we never close the file while running (auto-close on exit). 170 * We also don't lock the whole file, so that in future we can use read-locks 171 * on other parts, e.g. memzones, to detect if there are running secondary 172 * processes. */ 173 static int 174 rte_eal_config_create(void) 175 { 176 struct rte_config *config = rte_eal_get_configuration(); 177 size_t page_sz = sysconf(_SC_PAGE_SIZE); 178 size_t cfg_len = sizeof(*config->mem_config); 179 size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); 180 void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; 181 int retval; 182 const struct internal_config *internal_conf = 183 eal_get_internal_configuration(); 184 185 const char *pathname = eal_runtime_config_path(); 186 187 if (internal_conf->no_shconf) 188 return 0; 189 190 /* map the config before hugepage address so that we don't waste a page */ 191 if (internal_conf->base_virtaddr != 0) 192 rte_mem_cfg_addr = (void *) 193 RTE_ALIGN_FLOOR(internal_conf->base_virtaddr - 194 sizeof(struct rte_mem_config), page_sz); 195 else 196 rte_mem_cfg_addr = NULL; 197 198 if (mem_cfg_fd < 0){ 199 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600); 200 if (mem_cfg_fd < 0) { 201 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 202 pathname); 203 return -1; 204 } 205 } 206 207 retval = ftruncate(mem_cfg_fd, cfg_len); 208 if (retval < 0){ 209 close(mem_cfg_fd); 210 mem_cfg_fd = -1; 211 RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n", 212 pathname); 213 return -1; 214 } 215 216 retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); 217 if (retval < 0){ 218 close(mem_cfg_fd); 219 mem_cfg_fd = -1; 220 RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary " 221 "process running?\n", pathname); 222 return -1; 223 } 224 225 /* reserve space for config */ 226 rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, 227 &cfg_len_aligned, page_sz, 0, 0); 228 if (rte_mem_cfg_addr == NULL) { 229 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); 230 close(mem_cfg_fd); 231 mem_cfg_fd = -1; 232 return -1; 233 } 234 235 /* remap the actual file into the space we've just reserved */ 236 mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, 237 cfg_len_aligned, PROT_READ | PROT_WRITE, 238 MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); 239 if (mapped_mem_cfg_addr == MAP_FAILED) { 240 munmap(rte_mem_cfg_addr, cfg_len); 241 close(mem_cfg_fd); 242 mem_cfg_fd = -1; 243 RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); 244 return -1; 245 } 246 247 memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config)); 248 config->mem_config = rte_mem_cfg_addr; 249 250 /* store address of the config in the config itself so that secondary 251 * processes could later map the config into this exact location 252 */ 253 config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; 254 config->mem_config->dma_maskbits = 0; 255 256 return 0; 257 } 258 259 /* attach to an existing shared memory config */ 260 static int 261 rte_eal_config_attach(void) 262 { 263 struct rte_config *config = rte_eal_get_configuration(); 264 struct rte_mem_config *mem_config; 265 const struct internal_config *internal_conf = 266 eal_get_internal_configuration(); 267 268 const char *pathname = eal_runtime_config_path(); 269 270 if (internal_conf->no_shconf) 271 return 0; 272 273 if (mem_cfg_fd < 0){ 274 mem_cfg_fd = open(pathname, O_RDWR); 275 if (mem_cfg_fd < 0) { 276 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 277 pathname); 278 return -1; 279 } 280 } 281 282 /* map it as read-only first */ 283 mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), 284 PROT_READ, MAP_SHARED, mem_cfg_fd, 0); 285 if (mem_config == MAP_FAILED) { 286 close(mem_cfg_fd); 287 mem_cfg_fd = -1; 288 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 289 errno, strerror(errno)); 290 return -1; 291 } 292 293 config->mem_config = mem_config; 294 295 return 0; 296 } 297 298 /* reattach the shared config at exact memory location primary process has it */ 299 static int 300 rte_eal_config_reattach(void) 301 { 302 struct rte_config *config = rte_eal_get_configuration(); 303 struct rte_mem_config *mem_config; 304 void *rte_mem_cfg_addr; 305 const struct internal_config *internal_conf = 306 eal_get_internal_configuration(); 307 308 if (internal_conf->no_shconf) 309 return 0; 310 311 /* save the address primary process has mapped shared config to */ 312 rte_mem_cfg_addr = 313 (void *) (uintptr_t) config->mem_config->mem_cfg_addr; 314 315 /* unmap original config */ 316 munmap(config->mem_config, sizeof(struct rte_mem_config)); 317 318 /* remap the config at proper address */ 319 mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, 320 sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, 321 mem_cfg_fd, 0); 322 323 close(mem_cfg_fd); 324 mem_cfg_fd = -1; 325 326 if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { 327 if (mem_config != MAP_FAILED) { 328 /* errno is stale, don't use */ 329 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]" 330 " - please use '--" OPT_BASE_VIRTADDR 331 "' option\n", rte_mem_cfg_addr, mem_config); 332 munmap(mem_config, sizeof(struct rte_mem_config)); 333 return -1; 334 } 335 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 336 errno, strerror(errno)); 337 return -1; 338 } 339 340 config->mem_config = mem_config; 341 342 return 0; 343 } 344 345 /* Detect if we are a primary or a secondary process */ 346 enum rte_proc_type_t 347 eal_proc_type_detect(void) 348 { 349 enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; 350 const char *pathname = eal_runtime_config_path(); 351 const struct internal_config *internal_conf = 352 eal_get_internal_configuration(); 353 354 /* if there no shared config, there can be no secondary processes */ 355 if (!internal_conf->no_shconf) { 356 /* if we can open the file but not get a write-lock we are a 357 * secondary process. NOTE: if we get a file handle back, we 358 * keep that open and don't close it to prevent a race condition 359 * between multiple opens. 360 */ 361 if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && 362 (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) 363 ptype = RTE_PROC_SECONDARY; 364 } 365 366 RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", 367 ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); 368 369 return ptype; 370 } 371 372 /* Sets up rte_config structure with the pointer to shared memory config.*/ 373 static int 374 rte_config_init(void) 375 { 376 struct rte_config *config = rte_eal_get_configuration(); 377 const struct internal_config *internal_conf = 378 eal_get_internal_configuration(); 379 380 config->process_type = internal_conf->process_type; 381 382 switch (config->process_type) { 383 case RTE_PROC_PRIMARY: 384 if (rte_eal_config_create() < 0) 385 return -1; 386 eal_mcfg_update_from_internal(); 387 break; 388 case RTE_PROC_SECONDARY: 389 if (rte_eal_config_attach() < 0) 390 return -1; 391 eal_mcfg_wait_complete(); 392 if (eal_mcfg_check_version() < 0) { 393 RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n"); 394 return -1; 395 } 396 if (rte_eal_config_reattach() < 0) 397 return -1; 398 if (!__rte_mp_enable()) { 399 RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n"); 400 return -1; 401 } 402 eal_mcfg_update_internal(); 403 break; 404 case RTE_PROC_AUTO: 405 case RTE_PROC_INVALID: 406 RTE_LOG(ERR, EAL, "Invalid process type %d\n", 407 config->process_type); 408 return -1; 409 } 410 411 return 0; 412 } 413 414 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ 415 static void 416 eal_hugedirs_unlock(void) 417 { 418 int i; 419 struct internal_config *internal_conf = 420 eal_get_internal_configuration(); 421 422 for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) 423 { 424 /* skip uninitialized */ 425 if (internal_conf->hugepage_info[i].lock_descriptor < 0) 426 continue; 427 /* unlock hugepage file */ 428 flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN); 429 close(internal_conf->hugepage_info[i].lock_descriptor); 430 /* reset the field */ 431 internal_conf->hugepage_info[i].lock_descriptor = -1; 432 } 433 } 434 435 /* display usage */ 436 static void 437 eal_usage(const char *prgname) 438 { 439 rte_usage_hook_t hook = eal_get_application_usage_hook(); 440 441 printf("\nUsage: %s ", prgname); 442 eal_common_usage(); 443 printf("EAL Linux options:\n" 444 " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" 445 " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" 446 " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" 447 " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" 448 " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" 449 " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" 450 " --"OPT_VFIO_VF_TOKEN" VF token (UUID) shared between SR-IOV PF and VFs\n" 451 " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" 452 " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" 453 " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" 454 "\n"); 455 /* Allow the application to print its usage message too if hook is set */ 456 if (hook) { 457 printf("===== Application Usage =====\n\n"); 458 (hook)(prgname); 459 } 460 } 461 462 static int 463 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) 464 { 465 char * arg[RTE_MAX_NUMA_NODES]; 466 char *end; 467 int arg_num, i, len; 468 469 len = strnlen(strval, SOCKET_MEM_STRLEN); 470 if (len == SOCKET_MEM_STRLEN) { 471 RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); 472 return -1; 473 } 474 475 /* all other error cases will be caught later */ 476 if (!isdigit(strval[len-1])) 477 return -1; 478 479 /* split the optarg into separate socket values */ 480 arg_num = rte_strsplit(strval, len, 481 arg, RTE_MAX_NUMA_NODES, ','); 482 483 /* if split failed, or 0 arguments */ 484 if (arg_num <= 0) 485 return -1; 486 487 /* parse each defined socket option */ 488 errno = 0; 489 for (i = 0; i < arg_num; i++) { 490 uint64_t val; 491 end = NULL; 492 val = strtoull(arg[i], &end, 10); 493 494 /* check for invalid input */ 495 if ((errno != 0) || 496 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) 497 return -1; 498 val <<= 20; 499 socket_arg[i] = val; 500 } 501 502 return 0; 503 } 504 505 static int 506 eal_parse_vfio_intr(const char *mode) 507 { 508 struct internal_config *internal_conf = 509 eal_get_internal_configuration(); 510 unsigned i; 511 static struct { 512 const char *name; 513 enum rte_intr_mode value; 514 } map[] = { 515 { "legacy", RTE_INTR_MODE_LEGACY }, 516 { "msi", RTE_INTR_MODE_MSI }, 517 { "msix", RTE_INTR_MODE_MSIX }, 518 }; 519 520 for (i = 0; i < RTE_DIM(map); i++) { 521 if (!strcmp(mode, map[i].name)) { 522 internal_conf->vfio_intr_mode = map[i].value; 523 return 0; 524 } 525 } 526 return -1; 527 } 528 529 static int 530 eal_parse_vfio_vf_token(const char *vf_token) 531 { 532 struct internal_config *cfg = eal_get_internal_configuration(); 533 rte_uuid_t uuid; 534 535 if (!rte_uuid_parse(vf_token, uuid)) { 536 rte_uuid_copy(cfg->vfio_vf_token, uuid); 537 return 0; 538 } 539 540 return -1; 541 } 542 543 /* Parse the arguments for --log-level only */ 544 static void 545 eal_log_level_parse(int argc, char **argv) 546 { 547 int opt; 548 char **argvopt; 549 int option_index; 550 const int old_optind = optind; 551 const int old_optopt = optopt; 552 char * const old_optarg = optarg; 553 struct internal_config *internal_conf = 554 eal_get_internal_configuration(); 555 556 argvopt = argv; 557 optind = 1; 558 559 while ((opt = getopt_long(argc, argvopt, eal_short_options, 560 eal_long_options, &option_index)) != EOF) { 561 562 int ret; 563 564 /* getopt is not happy, stop right now */ 565 if (opt == '?') 566 break; 567 568 ret = (opt == OPT_LOG_LEVEL_NUM) ? 569 eal_parse_common_option(opt, optarg, internal_conf) : 0; 570 571 /* common parser is not happy */ 572 if (ret < 0) 573 break; 574 } 575 576 /* restore getopt lib */ 577 optind = old_optind; 578 optopt = old_optopt; 579 optarg = old_optarg; 580 } 581 582 /* Parse the argument given in the command line of the application */ 583 static int 584 eal_parse_args(int argc, char **argv) 585 { 586 int opt, ret; 587 char **argvopt; 588 int option_index; 589 char *prgname = argv[0]; 590 const int old_optind = optind; 591 const int old_optopt = optopt; 592 char * const old_optarg = optarg; 593 struct internal_config *internal_conf = 594 eal_get_internal_configuration(); 595 596 argvopt = argv; 597 optind = 1; 598 599 while ((opt = getopt_long(argc, argvopt, eal_short_options, 600 eal_long_options, &option_index)) != EOF) { 601 602 /* getopt didn't recognise the option */ 603 if (opt == '?') { 604 eal_usage(prgname); 605 ret = -1; 606 goto out; 607 } 608 609 /* eal_log_level_parse() already handled this option */ 610 if (opt == OPT_LOG_LEVEL_NUM) 611 continue; 612 613 ret = eal_parse_common_option(opt, optarg, internal_conf); 614 /* common parser is not happy */ 615 if (ret < 0) { 616 eal_usage(prgname); 617 ret = -1; 618 goto out; 619 } 620 /* common parser handled this option */ 621 if (ret == 0) 622 continue; 623 624 switch (opt) { 625 case 'h': 626 eal_usage(prgname); 627 exit(EXIT_SUCCESS); 628 629 case OPT_HUGE_DIR_NUM: 630 { 631 char *hdir = strdup(optarg); 632 if (hdir == NULL) 633 RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); 634 else { 635 /* free old hugepage dir */ 636 free(internal_conf->hugepage_dir); 637 internal_conf->hugepage_dir = hdir; 638 } 639 break; 640 } 641 case OPT_FILE_PREFIX_NUM: 642 { 643 char *prefix = strdup(optarg); 644 if (prefix == NULL) 645 RTE_LOG(ERR, EAL, "Could not store file prefix\n"); 646 else { 647 /* free old prefix */ 648 free(internal_conf->hugefile_prefix); 649 internal_conf->hugefile_prefix = prefix; 650 } 651 break; 652 } 653 case OPT_SOCKET_MEM_NUM: 654 if (eal_parse_socket_arg(optarg, 655 internal_conf->socket_mem) < 0) { 656 RTE_LOG(ERR, EAL, "invalid parameters for --" 657 OPT_SOCKET_MEM "\n"); 658 eal_usage(prgname); 659 ret = -1; 660 goto out; 661 } 662 internal_conf->force_sockets = 1; 663 break; 664 665 case OPT_SOCKET_LIMIT_NUM: 666 if (eal_parse_socket_arg(optarg, 667 internal_conf->socket_limit) < 0) { 668 RTE_LOG(ERR, EAL, "invalid parameters for --" 669 OPT_SOCKET_LIMIT "\n"); 670 eal_usage(prgname); 671 ret = -1; 672 goto out; 673 } 674 internal_conf->force_socket_limits = 1; 675 break; 676 677 case OPT_VFIO_INTR_NUM: 678 if (eal_parse_vfio_intr(optarg) < 0) { 679 RTE_LOG(ERR, EAL, "invalid parameters for --" 680 OPT_VFIO_INTR "\n"); 681 eal_usage(prgname); 682 ret = -1; 683 goto out; 684 } 685 break; 686 687 case OPT_VFIO_VF_TOKEN_NUM: 688 if (eal_parse_vfio_vf_token(optarg) < 0) { 689 RTE_LOG(ERR, EAL, "invalid parameters for --" 690 OPT_VFIO_VF_TOKEN "\n"); 691 eal_usage(prgname); 692 ret = -1; 693 goto out; 694 } 695 break; 696 697 case OPT_CREATE_UIO_DEV_NUM: 698 internal_conf->create_uio_dev = 1; 699 break; 700 701 case OPT_MBUF_POOL_OPS_NAME_NUM: 702 { 703 char *ops_name = strdup(optarg); 704 if (ops_name == NULL) 705 RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); 706 else { 707 /* free old ops name */ 708 free(internal_conf->user_mbuf_pool_ops_name); 709 710 internal_conf->user_mbuf_pool_ops_name = 711 ops_name; 712 } 713 break; 714 } 715 case OPT_MATCH_ALLOCATIONS_NUM: 716 internal_conf->match_allocations = 1; 717 break; 718 719 default: 720 if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { 721 RTE_LOG(ERR, EAL, "Option %c is not supported " 722 "on Linux\n", opt); 723 } else if (opt >= OPT_LONG_MIN_NUM && 724 opt < OPT_LONG_MAX_NUM) { 725 RTE_LOG(ERR, EAL, "Option %s is not supported " 726 "on Linux\n", 727 eal_long_options[option_index].name); 728 } else { 729 RTE_LOG(ERR, EAL, "Option %d is not supported " 730 "on Linux\n", opt); 731 } 732 eal_usage(prgname); 733 ret = -1; 734 goto out; 735 } 736 } 737 738 /* create runtime data directory. In no_shconf mode, skip any errors */ 739 if (eal_create_runtime_dir() < 0) { 740 if (internal_conf->no_shconf == 0) { 741 RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); 742 ret = -1; 743 goto out; 744 } else 745 RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n"); 746 } 747 748 if (eal_adjust_config(internal_conf) != 0) { 749 ret = -1; 750 goto out; 751 } 752 753 /* sanity checks */ 754 if (eal_check_common_options(internal_conf) != 0) { 755 eal_usage(prgname); 756 ret = -1; 757 goto out; 758 } 759 760 if (optind >= 0) 761 argv[optind-1] = prgname; 762 ret = optind-1; 763 764 out: 765 /* restore getopt lib */ 766 optind = old_optind; 767 optopt = old_optopt; 768 optarg = old_optarg; 769 770 return ret; 771 } 772 773 static int 774 check_socket(const struct rte_memseg_list *msl, void *arg) 775 { 776 int *socket_id = arg; 777 778 if (msl->external) 779 return 0; 780 781 return *socket_id == msl->socket_id; 782 } 783 784 static void 785 eal_check_mem_on_local_socket(void) 786 { 787 int socket_id; 788 const struct rte_config *config = rte_eal_get_configuration(); 789 790 socket_id = rte_lcore_to_socket_id(config->main_lcore); 791 792 if (rte_memseg_list_walk(check_socket, &socket_id) == 0) 793 RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n"); 794 } 795 796 static int 797 sync_func(__rte_unused void *arg) 798 { 799 return 0; 800 } 801 802 /* 803 * Request iopl privilege for all RPL, returns 0 on success 804 * iopl() call is mostly for the i386 architecture. For other architectures, 805 * return -1 to indicate IO privilege can't be changed in this way. 806 */ 807 int 808 rte_eal_iopl_init(void) 809 { 810 #if defined(RTE_ARCH_X86) 811 if (iopl(3) != 0) 812 return -1; 813 #endif 814 return 0; 815 } 816 817 #ifdef VFIO_PRESENT 818 static int rte_eal_vfio_setup(void) 819 { 820 if (rte_vfio_enable("vfio")) 821 return -1; 822 823 return 0; 824 } 825 #endif 826 827 static void rte_eal_init_alert(const char *msg) 828 { 829 fprintf(stderr, "EAL: FATAL: %s\n", msg); 830 RTE_LOG(ERR, EAL, "%s\n", msg); 831 } 832 833 /* 834 * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the 835 * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel 836 * IOMMU groups. If IOMMU is not enabled, that path would be empty. 837 * Therefore, checking if the path is empty will tell us if IOMMU is enabled. 838 */ 839 static bool 840 is_iommu_enabled(void) 841 { 842 DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); 843 struct dirent *d; 844 int n = 0; 845 846 /* if directory doesn't exist, assume IOMMU is not enabled */ 847 if (dir == NULL) 848 return false; 849 850 while ((d = readdir(dir)) != NULL) { 851 /* skip dot and dot-dot */ 852 if (++n > 2) 853 break; 854 } 855 closedir(dir); 856 857 return n > 2; 858 } 859 860 /* Launch threads, called at application init(). */ 861 int 862 rte_eal_init(int argc, char **argv) 863 { 864 int i, fctret, ret; 865 pthread_t thread_id; 866 static uint32_t run_once; 867 uint32_t has_run = 0; 868 const char *p; 869 static char logid[PATH_MAX]; 870 char cpuset[RTE_CPU_AFFINITY_STR_LEN]; 871 char thread_name[RTE_MAX_THREAD_NAME_LEN]; 872 bool phys_addrs; 873 const struct rte_config *config = rte_eal_get_configuration(); 874 struct internal_config *internal_conf = 875 eal_get_internal_configuration(); 876 877 /* checks if the machine is adequate */ 878 if (!rte_cpu_is_supported()) { 879 rte_eal_init_alert("unsupported cpu type."); 880 rte_errno = ENOTSUP; 881 return -1; 882 } 883 884 if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0, 885 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 886 rte_eal_init_alert("already called initialization."); 887 rte_errno = EALREADY; 888 return -1; 889 } 890 891 p = strrchr(argv[0], '/'); 892 strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid)); 893 thread_id = pthread_self(); 894 895 eal_reset_internal_config(internal_conf); 896 897 /* set log level as early as possible */ 898 eal_log_level_parse(argc, argv); 899 900 /* clone argv to report out later in telemetry */ 901 eal_save_args(argc, argv); 902 903 if (rte_eal_cpu_init() < 0) { 904 rte_eal_init_alert("Cannot detect lcores."); 905 rte_errno = ENOTSUP; 906 return -1; 907 } 908 909 fctret = eal_parse_args(argc, argv); 910 if (fctret < 0) { 911 rte_eal_init_alert("Invalid 'command line' arguments."); 912 rte_errno = EINVAL; 913 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 914 return -1; 915 } 916 917 if (eal_plugins_init() < 0) { 918 rte_eal_init_alert("Cannot init plugins"); 919 rte_errno = EINVAL; 920 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 921 return -1; 922 } 923 924 if (eal_trace_init() < 0) { 925 rte_eal_init_alert("Cannot init trace"); 926 rte_errno = EFAULT; 927 return -1; 928 } 929 930 if (eal_option_device_parse()) { 931 rte_errno = ENODEV; 932 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 933 return -1; 934 } 935 936 if (rte_config_init() < 0) { 937 rte_eal_init_alert("Cannot init config"); 938 return -1; 939 } 940 941 if (rte_eal_intr_init() < 0) { 942 rte_eal_init_alert("Cannot init interrupt-handling thread"); 943 return -1; 944 } 945 946 if (rte_eal_alarm_init() < 0) { 947 rte_eal_init_alert("Cannot init alarm"); 948 /* rte_eal_alarm_init sets rte_errno on failure. */ 949 return -1; 950 } 951 952 /* Put mp channel init before bus scan so that we can init the vdev 953 * bus through mp channel in the secondary process before the bus scan. 954 */ 955 if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) { 956 rte_eal_init_alert("failed to init mp channel"); 957 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 958 rte_errno = EFAULT; 959 return -1; 960 } 961 } 962 963 /* register multi-process action callbacks for hotplug */ 964 if (eal_mp_dev_hotplug_init() < 0) { 965 rte_eal_init_alert("failed to register mp callback for hotplug"); 966 return -1; 967 } 968 969 if (rte_bus_scan()) { 970 rte_eal_init_alert("Cannot scan the buses for devices"); 971 rte_errno = ENODEV; 972 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 973 return -1; 974 } 975 976 phys_addrs = rte_eal_using_phys_addrs() != 0; 977 978 /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ 979 if (internal_conf->iova_mode == RTE_IOVA_DC) { 980 /* autodetect the IOVA mapping mode */ 981 enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); 982 983 if (iova_mode == RTE_IOVA_DC) { 984 RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); 985 986 if (!phys_addrs) { 987 /* if we have no access to physical addresses, 988 * pick IOVA as VA mode. 989 */ 990 iova_mode = RTE_IOVA_VA; 991 RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); 992 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) 993 } else if (rte_eal_check_module("rte_kni") == 1) { 994 iova_mode = RTE_IOVA_PA; 995 RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n"); 996 #endif 997 } else if (is_iommu_enabled()) { 998 /* we have an IOMMU, pick IOVA as VA mode */ 999 iova_mode = RTE_IOVA_VA; 1000 RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); 1001 } else { 1002 /* physical addresses available, and no IOMMU 1003 * found, so pick IOVA as PA. 1004 */ 1005 iova_mode = RTE_IOVA_PA; 1006 RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); 1007 } 1008 } 1009 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) 1010 /* Workaround for KNI which requires physical address to work 1011 * in kernels < 4.10 1012 */ 1013 if (iova_mode == RTE_IOVA_VA && 1014 rte_eal_check_module("rte_kni") == 1) { 1015 if (phys_addrs) { 1016 iova_mode = RTE_IOVA_PA; 1017 RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); 1018 } else { 1019 RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); 1020 } 1021 } 1022 #endif 1023 rte_eal_get_configuration()->iova_mode = iova_mode; 1024 } else { 1025 rte_eal_get_configuration()->iova_mode = 1026 internal_conf->iova_mode; 1027 } 1028 1029 if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { 1030 rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); 1031 rte_errno = EINVAL; 1032 return -1; 1033 } 1034 1035 RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", 1036 rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); 1037 1038 if (internal_conf->no_hugetlbfs == 0) { 1039 /* rte_config isn't initialized yet */ 1040 ret = internal_conf->process_type == RTE_PROC_PRIMARY ? 1041 eal_hugepage_info_init() : 1042 eal_hugepage_info_read(); 1043 if (ret < 0) { 1044 rte_eal_init_alert("Cannot get hugepage information."); 1045 rte_errno = EACCES; 1046 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1047 return -1; 1048 } 1049 } 1050 1051 if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) { 1052 if (internal_conf->no_hugetlbfs) 1053 internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; 1054 } 1055 1056 if (internal_conf->vmware_tsc_map == 1) { 1057 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT 1058 rte_cycles_vmware_tsc_map = 1; 1059 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " 1060 "you must have monitor_control.pseudo_perfctr = TRUE\n"); 1061 #else 1062 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " 1063 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); 1064 #endif 1065 } 1066 1067 if (eal_log_init(logid, internal_conf->syslog_facility) < 0) { 1068 rte_eal_init_alert("Cannot init logging."); 1069 rte_errno = ENOMEM; 1070 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1071 return -1; 1072 } 1073 1074 #ifdef VFIO_PRESENT 1075 if (rte_eal_vfio_setup() < 0) { 1076 rte_eal_init_alert("Cannot init VFIO"); 1077 rte_errno = EAGAIN; 1078 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1079 return -1; 1080 } 1081 #endif 1082 /* in secondary processes, memory init may allocate additional fbarrays 1083 * not present in primary processes, so to avoid any potential issues, 1084 * initialize memzones first. 1085 */ 1086 if (rte_eal_memzone_init() < 0) { 1087 rte_eal_init_alert("Cannot init memzone"); 1088 rte_errno = ENODEV; 1089 return -1; 1090 } 1091 1092 if (rte_eal_memory_init() < 0) { 1093 rte_eal_init_alert("Cannot init memory"); 1094 rte_errno = ENOMEM; 1095 return -1; 1096 } 1097 1098 /* the directories are locked during eal_hugepage_info_init */ 1099 eal_hugedirs_unlock(); 1100 1101 if (rte_eal_malloc_heap_init() < 0) { 1102 rte_eal_init_alert("Cannot init malloc heap"); 1103 rte_errno = ENODEV; 1104 return -1; 1105 } 1106 1107 if (rte_eal_tailqs_init() < 0) { 1108 rte_eal_init_alert("Cannot init tail queues for objects"); 1109 rte_errno = EFAULT; 1110 return -1; 1111 } 1112 1113 if (rte_eal_timer_init() < 0) { 1114 rte_eal_init_alert("Cannot init HPET or TSC timers"); 1115 rte_errno = ENOTSUP; 1116 return -1; 1117 } 1118 1119 eal_check_mem_on_local_socket(); 1120 1121 if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t), 1122 &lcore_config[config->main_lcore].cpuset) != 0) { 1123 rte_eal_init_alert("Cannot set affinity"); 1124 rte_errno = EINVAL; 1125 return -1; 1126 } 1127 __rte_thread_init(config->main_lcore, 1128 &lcore_config[config->main_lcore].cpuset); 1129 1130 ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); 1131 RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n", 1132 config->main_lcore, (uintptr_t)thread_id, cpuset, 1133 ret == 0 ? "" : "..."); 1134 1135 RTE_LCORE_FOREACH_WORKER(i) { 1136 1137 /* 1138 * create communication pipes between main thread 1139 * and children 1140 */ 1141 if (pipe(lcore_config[i].pipe_main2worker) < 0) 1142 rte_panic("Cannot create pipe\n"); 1143 if (pipe(lcore_config[i].pipe_worker2main) < 0) 1144 rte_panic("Cannot create pipe\n"); 1145 1146 lcore_config[i].state = WAIT; 1147 1148 /* create a thread for each lcore */ 1149 ret = pthread_create(&lcore_config[i].thread_id, NULL, 1150 eal_thread_loop, NULL); 1151 if (ret != 0) 1152 rte_panic("Cannot create thread\n"); 1153 1154 /* Set thread_name for aid in debugging. */ 1155 snprintf(thread_name, sizeof(thread_name), 1156 "lcore-worker-%d", i); 1157 ret = rte_thread_setname(lcore_config[i].thread_id, 1158 thread_name); 1159 if (ret != 0) 1160 RTE_LOG(DEBUG, EAL, 1161 "Cannot set name for lcore thread\n"); 1162 1163 ret = pthread_setaffinity_np(lcore_config[i].thread_id, 1164 sizeof(rte_cpuset_t), &lcore_config[i].cpuset); 1165 if (ret != 0) 1166 rte_panic("Cannot set affinity\n"); 1167 } 1168 1169 /* 1170 * Launch a dummy function on all worker lcores, so that main lcore 1171 * knows they are all ready when this function returns. 1172 */ 1173 rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN); 1174 rte_eal_mp_wait_lcore(); 1175 1176 /* initialize services so vdevs register service during bus_probe. */ 1177 ret = rte_service_init(); 1178 if (ret) { 1179 rte_eal_init_alert("rte_service_init() failed"); 1180 rte_errno = -ret; 1181 return -1; 1182 } 1183 1184 /* Probe all the buses and devices/drivers on them */ 1185 if (rte_bus_probe()) { 1186 rte_eal_init_alert("Cannot probe devices"); 1187 rte_errno = ENOTSUP; 1188 return -1; 1189 } 1190 1191 #ifdef VFIO_PRESENT 1192 /* Register mp action after probe() so that we got enough info */ 1193 if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) 1194 return -1; 1195 #endif 1196 1197 /* initialize default service/lcore mappings and start running. Ignore 1198 * -ENOTSUP, as it indicates no service coremask passed to EAL. 1199 */ 1200 ret = rte_service_start_with_defaults(); 1201 if (ret < 0 && ret != -ENOTSUP) { 1202 rte_errno = -ret; 1203 return -1; 1204 } 1205 1206 /* 1207 * Clean up unused files in runtime directory. We do this at the end of 1208 * init and not at the beginning because we want to clean stuff up 1209 * whether we are primary or secondary process, but we cannot remove 1210 * primary process' files because secondary should be able to run even 1211 * if primary process is dead. 1212 * 1213 * In no_shconf mode, no runtime directory is created in the first 1214 * place, so no cleanup needed. 1215 */ 1216 if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) { 1217 rte_eal_init_alert("Cannot clear runtime directory"); 1218 return -1; 1219 } 1220 if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) { 1221 int tlog = rte_log_register_type_and_pick_level( 1222 "lib.telemetry", RTE_LOG_WARNING); 1223 if (tlog < 0) 1224 tlog = RTE_LOGTYPE_EAL; 1225 if (rte_telemetry_init(rte_eal_get_runtime_dir(), 1226 rte_version(), 1227 &internal_conf->ctrl_cpuset, rte_log, tlog) != 0) 1228 return -1; 1229 } 1230 1231 eal_mcfg_complete(); 1232 1233 return fctret; 1234 } 1235 1236 static int 1237 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 1238 void *arg __rte_unused) 1239 { 1240 /* ms is const, so find this memseg */ 1241 struct rte_memseg *found; 1242 1243 if (msl->external) 1244 return 0; 1245 1246 found = rte_mem_virt2memseg(ms->addr, msl); 1247 1248 found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; 1249 1250 return 0; 1251 } 1252 1253 int 1254 rte_eal_cleanup(void) 1255 { 1256 /* if we're in a primary process, we need to mark hugepages as freeable 1257 * so that finalization can release them back to the system. 1258 */ 1259 struct internal_config *internal_conf = 1260 eal_get_internal_configuration(); 1261 1262 if (rte_eal_process_type() == RTE_PROC_PRIMARY && 1263 internal_conf->hugepage_file.unlink_existing) 1264 rte_memseg_walk(mark_freeable, NULL); 1265 1266 rte_service_finalize(); 1267 #ifdef VFIO_PRESENT 1268 vfio_mp_sync_cleanup(); 1269 #endif 1270 rte_mp_channel_cleanup(); 1271 /* after this point, any DPDK pointers will become dangling */ 1272 rte_eal_memory_detach(); 1273 eal_mp_dev_hotplug_cleanup(); 1274 rte_eal_malloc_heap_cleanup(); 1275 rte_eal_alarm_cleanup(); 1276 rte_trace_save(); 1277 eal_trace_fini(); 1278 eal_cleanup_config(internal_conf); 1279 rte_eal_log_cleanup(); 1280 return 0; 1281 } 1282 1283 int rte_eal_create_uio_dev(void) 1284 { 1285 const struct internal_config *internal_conf = 1286 eal_get_internal_configuration(); 1287 1288 return internal_conf->create_uio_dev; 1289 } 1290 1291 enum rte_intr_mode 1292 rte_eal_vfio_intr_mode(void) 1293 { 1294 const struct internal_config *internal_conf = 1295 eal_get_internal_configuration(); 1296 1297 return internal_conf->vfio_intr_mode; 1298 } 1299 1300 void 1301 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token) 1302 { 1303 struct internal_config *cfg = eal_get_internal_configuration(); 1304 1305 rte_uuid_copy(vf_token, cfg->vfio_vf_token); 1306 } 1307 1308 int 1309 rte_eal_check_module(const char *module_name) 1310 { 1311 char sysfs_mod_name[PATH_MAX]; 1312 struct stat st; 1313 int n; 1314 1315 if (NULL == module_name) 1316 return -1; 1317 1318 /* Check if there is sysfs mounted */ 1319 if (stat("/sys/module", &st) != 0) { 1320 RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", 1321 errno, strerror(errno)); 1322 return -1; 1323 } 1324 1325 /* A module might be built-in, therefore try sysfs */ 1326 n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); 1327 if (n < 0 || n > PATH_MAX) { 1328 RTE_LOG(DEBUG, EAL, "Could not format module path\n"); 1329 return -1; 1330 } 1331 1332 if (stat(sysfs_mod_name, &st) != 0) { 1333 RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", 1334 sysfs_mod_name, errno, strerror(errno)); 1335 return 0; 1336 } 1337 1338 /* Module has been found */ 1339 return 1; 1340 } 1341