1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation. 3 * Copyright(c) 2012-2014 6WIND S.A. 4 */ 5 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <stdint.h> 9 #include <string.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <pthread.h> 13 #include <syslog.h> 14 #include <getopt.h> 15 #include <sys/file.h> 16 #include <dirent.h> 17 #include <fcntl.h> 18 #include <fnmatch.h> 19 #include <stddef.h> 20 #include <errno.h> 21 #include <limits.h> 22 #include <sys/mman.h> 23 #include <sys/queue.h> 24 #include <sys/stat.h> 25 #if defined(RTE_ARCH_X86) 26 #include <sys/io.h> 27 #endif 28 #include <linux/version.h> 29 30 #include <rte_compat.h> 31 #include <rte_common.h> 32 #include <rte_debug.h> 33 #include <rte_memory.h> 34 #include <rte_launch.h> 35 #include <rte_eal.h> 36 #include <rte_errno.h> 37 #include <rte_per_lcore.h> 38 #include <rte_lcore.h> 39 #include <rte_service_component.h> 40 #include <rte_log.h> 41 #include <rte_random.h> 42 #include <rte_cycles.h> 43 #include <rte_string_fns.h> 44 #include <rte_cpuflags.h> 45 #include <rte_interrupts.h> 46 #include <rte_bus.h> 47 #include <rte_dev.h> 48 #include <rte_devargs.h> 49 #include <rte_version.h> 50 #include <malloc_heap.h> 51 #include <rte_vfio.h> 52 53 #include <telemetry_internal.h> 54 #include "eal_private.h" 55 #include "eal_thread.h" 56 #include "eal_internal_cfg.h" 57 #include "eal_filesystem.h" 58 #include "eal_hugepages.h" 59 #include "eal_memcfg.h" 60 #include "eal_trace.h" 61 #include "eal_log.h" 62 #include "eal_options.h" 63 #include "eal_vfio.h" 64 #include "hotplug_mp.h" 65 66 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) 67 68 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) 69 70 #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" 71 72 /* define fd variable here, because file needs to be kept open for the 73 * duration of the program, as we hold a write lock on it in the primary proc */ 74 static int mem_cfg_fd = -1; 75 76 static struct flock wr_lock = { 77 .l_type = F_WRLCK, 78 .l_whence = SEEK_SET, 79 .l_start = offsetof(struct rte_mem_config, memsegs), 80 .l_len = RTE_SIZEOF_FIELD(struct rte_mem_config, memsegs), 81 }; 82 83 /* internal configuration (per-core) */ 84 struct lcore_config lcore_config[RTE_MAX_LCORE]; 85 86 /* used by rte_rdtsc() */ 87 int rte_cycles_vmware_tsc_map; 88 89 90 int 91 eal_clean_runtime_dir(void) 92 { 93 const char *runtime_dir = rte_eal_get_runtime_dir(); 94 DIR *dir; 95 struct dirent *dirent; 96 int dir_fd, fd, lck_result; 97 static const char * const filters[] = { 98 "fbarray_*", 99 "mp_socket_*" 100 }; 101 102 /* open directory */ 103 dir = opendir(runtime_dir); 104 if (!dir) { 105 RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", 106 runtime_dir); 107 goto error; 108 } 109 dir_fd = dirfd(dir); 110 111 /* lock the directory before doing anything, to avoid races */ 112 if (flock(dir_fd, LOCK_EX) < 0) { 113 RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", 114 runtime_dir); 115 goto error; 116 } 117 118 dirent = readdir(dir); 119 if (!dirent) { 120 RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", 121 runtime_dir); 122 goto error; 123 } 124 125 while (dirent != NULL) { 126 unsigned int f_idx; 127 bool skip = true; 128 129 /* skip files that don't match the patterns */ 130 for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { 131 const char *filter = filters[f_idx]; 132 133 if (fnmatch(filter, dirent->d_name, 0) == 0) { 134 skip = false; 135 break; 136 } 137 } 138 if (skip) { 139 dirent = readdir(dir); 140 continue; 141 } 142 143 /* try and lock the file */ 144 fd = openat(dir_fd, dirent->d_name, O_RDONLY); 145 146 /* skip to next file */ 147 if (fd == -1) { 148 dirent = readdir(dir); 149 continue; 150 } 151 152 /* non-blocking lock */ 153 lck_result = flock(fd, LOCK_EX | LOCK_NB); 154 155 /* if lock succeeds, remove the file */ 156 if (lck_result != -1) 157 unlinkat(dir_fd, dirent->d_name, 0); 158 close(fd); 159 dirent = readdir(dir); 160 } 161 162 /* closedir closes dir_fd and drops the lock */ 163 closedir(dir); 164 return 0; 165 166 error: 167 if (dir) 168 closedir(dir); 169 170 RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", 171 strerror(errno)); 172 173 return -1; 174 } 175 176 177 /* create memory configuration in shared/mmap memory. Take out 178 * a write lock on the memsegs, so we can auto-detect primary/secondary. 179 * This means we never close the file while running (auto-close on exit). 180 * We also don't lock the whole file, so that in future we can use read-locks 181 * on other parts, e.g. memzones, to detect if there are running secondary 182 * processes. */ 183 static int 184 rte_eal_config_create(void) 185 { 186 struct rte_config *config = rte_eal_get_configuration(); 187 size_t page_sz = sysconf(_SC_PAGE_SIZE); 188 size_t cfg_len = sizeof(*config->mem_config); 189 size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); 190 void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; 191 int retval; 192 const struct internal_config *internal_conf = 193 eal_get_internal_configuration(); 194 195 const char *pathname = eal_runtime_config_path(); 196 197 if (internal_conf->no_shconf) 198 return 0; 199 200 /* map the config before hugepage address so that we don't waste a page */ 201 if (internal_conf->base_virtaddr != 0) 202 rte_mem_cfg_addr = (void *) 203 RTE_ALIGN_FLOOR(internal_conf->base_virtaddr - 204 sizeof(struct rte_mem_config), page_sz); 205 else 206 rte_mem_cfg_addr = NULL; 207 208 if (mem_cfg_fd < 0){ 209 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600); 210 if (mem_cfg_fd < 0) { 211 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 212 pathname); 213 return -1; 214 } 215 } 216 217 retval = ftruncate(mem_cfg_fd, cfg_len); 218 if (retval < 0){ 219 close(mem_cfg_fd); 220 mem_cfg_fd = -1; 221 RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n", 222 pathname); 223 return -1; 224 } 225 226 retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); 227 if (retval < 0){ 228 close(mem_cfg_fd); 229 mem_cfg_fd = -1; 230 RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary " 231 "process running?\n", pathname); 232 return -1; 233 } 234 235 /* reserve space for config */ 236 rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, 237 &cfg_len_aligned, page_sz, 0, 0); 238 if (rte_mem_cfg_addr == NULL) { 239 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); 240 close(mem_cfg_fd); 241 mem_cfg_fd = -1; 242 return -1; 243 } 244 245 /* remap the actual file into the space we've just reserved */ 246 mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, 247 cfg_len_aligned, PROT_READ | PROT_WRITE, 248 MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); 249 if (mapped_mem_cfg_addr == MAP_FAILED) { 250 munmap(rte_mem_cfg_addr, cfg_len); 251 close(mem_cfg_fd); 252 mem_cfg_fd = -1; 253 RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); 254 return -1; 255 } 256 257 memcpy(rte_mem_cfg_addr, config->mem_config, sizeof(struct rte_mem_config)); 258 config->mem_config = rte_mem_cfg_addr; 259 260 /* store address of the config in the config itself so that secondary 261 * processes could later map the config into this exact location 262 */ 263 config->mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; 264 config->mem_config->dma_maskbits = 0; 265 266 return 0; 267 } 268 269 /* attach to an existing shared memory config */ 270 static int 271 rte_eal_config_attach(void) 272 { 273 struct rte_config *config = rte_eal_get_configuration(); 274 struct rte_mem_config *mem_config; 275 const struct internal_config *internal_conf = 276 eal_get_internal_configuration(); 277 278 const char *pathname = eal_runtime_config_path(); 279 280 if (internal_conf->no_shconf) 281 return 0; 282 283 if (mem_cfg_fd < 0){ 284 mem_cfg_fd = open(pathname, O_RDWR); 285 if (mem_cfg_fd < 0) { 286 RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", 287 pathname); 288 return -1; 289 } 290 } 291 292 /* map it as read-only first */ 293 mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), 294 PROT_READ, MAP_SHARED, mem_cfg_fd, 0); 295 if (mem_config == MAP_FAILED) { 296 close(mem_cfg_fd); 297 mem_cfg_fd = -1; 298 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 299 errno, strerror(errno)); 300 return -1; 301 } 302 303 config->mem_config = mem_config; 304 305 return 0; 306 } 307 308 /* reattach the shared config at exact memory location primary process has it */ 309 static int 310 rte_eal_config_reattach(void) 311 { 312 struct rte_config *config = rte_eal_get_configuration(); 313 struct rte_mem_config *mem_config; 314 void *rte_mem_cfg_addr; 315 const struct internal_config *internal_conf = 316 eal_get_internal_configuration(); 317 318 if (internal_conf->no_shconf) 319 return 0; 320 321 /* save the address primary process has mapped shared config to */ 322 rte_mem_cfg_addr = 323 (void *) (uintptr_t) config->mem_config->mem_cfg_addr; 324 325 /* unmap original config */ 326 munmap(config->mem_config, sizeof(struct rte_mem_config)); 327 328 /* remap the config at proper address */ 329 mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, 330 sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, 331 mem_cfg_fd, 0); 332 333 close(mem_cfg_fd); 334 mem_cfg_fd = -1; 335 336 if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { 337 if (mem_config != MAP_FAILED) { 338 /* errno is stale, don't use */ 339 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]" 340 " - please use '--" OPT_BASE_VIRTADDR 341 "' option\n", rte_mem_cfg_addr, mem_config); 342 munmap(mem_config, sizeof(struct rte_mem_config)); 343 return -1; 344 } 345 RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", 346 errno, strerror(errno)); 347 return -1; 348 } 349 350 config->mem_config = mem_config; 351 352 return 0; 353 } 354 355 /* Detect if we are a primary or a secondary process */ 356 enum rte_proc_type_t 357 eal_proc_type_detect(void) 358 { 359 enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; 360 const char *pathname = eal_runtime_config_path(); 361 const struct internal_config *internal_conf = 362 eal_get_internal_configuration(); 363 364 /* if there no shared config, there can be no secondary processes */ 365 if (!internal_conf->no_shconf) { 366 /* if we can open the file but not get a write-lock we are a 367 * secondary process. NOTE: if we get a file handle back, we 368 * keep that open and don't close it to prevent a race condition 369 * between multiple opens. 370 */ 371 if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && 372 (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) 373 ptype = RTE_PROC_SECONDARY; 374 } 375 376 RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", 377 ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); 378 379 return ptype; 380 } 381 382 /* Sets up rte_config structure with the pointer to shared memory config.*/ 383 static int 384 rte_config_init(void) 385 { 386 struct rte_config *config = rte_eal_get_configuration(); 387 const struct internal_config *internal_conf = 388 eal_get_internal_configuration(); 389 390 config->process_type = internal_conf->process_type; 391 392 switch (config->process_type) { 393 case RTE_PROC_PRIMARY: 394 if (rte_eal_config_create() < 0) 395 return -1; 396 eal_mcfg_update_from_internal(); 397 break; 398 case RTE_PROC_SECONDARY: 399 if (rte_eal_config_attach() < 0) 400 return -1; 401 eal_mcfg_wait_complete(); 402 if (eal_mcfg_check_version() < 0) { 403 RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n"); 404 return -1; 405 } 406 if (rte_eal_config_reattach() < 0) 407 return -1; 408 if (!__rte_mp_enable()) { 409 RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n"); 410 return -1; 411 } 412 eal_mcfg_update_internal(); 413 break; 414 case RTE_PROC_AUTO: 415 case RTE_PROC_INVALID: 416 RTE_LOG(ERR, EAL, "Invalid process type %d\n", 417 config->process_type); 418 return -1; 419 } 420 421 return 0; 422 } 423 424 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ 425 static void 426 eal_hugedirs_unlock(void) 427 { 428 int i; 429 struct internal_config *internal_conf = 430 eal_get_internal_configuration(); 431 432 for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) 433 { 434 /* skip uninitialized */ 435 if (internal_conf->hugepage_info[i].lock_descriptor < 0) 436 continue; 437 /* unlock hugepage file */ 438 flock(internal_conf->hugepage_info[i].lock_descriptor, LOCK_UN); 439 close(internal_conf->hugepage_info[i].lock_descriptor); 440 /* reset the field */ 441 internal_conf->hugepage_info[i].lock_descriptor = -1; 442 } 443 } 444 445 /* display usage */ 446 static void 447 eal_usage(const char *prgname) 448 { 449 rte_usage_hook_t hook = eal_get_application_usage_hook(); 450 451 printf("\nUsage: %s ", prgname); 452 eal_common_usage(); 453 printf("EAL Linux options:\n" 454 " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" 455 " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" 456 " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" 457 " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" 458 " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" 459 " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" 460 " --"OPT_VFIO_VF_TOKEN" VF token (UUID) shared between SR-IOV PF and VFs\n" 461 " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" 462 " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" 463 " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" 464 "\n"); 465 /* Allow the application to print its usage message too if hook is set */ 466 if (hook) { 467 printf("===== Application Usage =====\n\n"); 468 (hook)(prgname); 469 } 470 } 471 472 static int 473 eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) 474 { 475 char * arg[RTE_MAX_NUMA_NODES]; 476 char *end; 477 int arg_num, i, len; 478 479 len = strnlen(strval, SOCKET_MEM_STRLEN); 480 if (len == SOCKET_MEM_STRLEN) { 481 RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); 482 return -1; 483 } 484 485 /* all other error cases will be caught later */ 486 if (!isdigit(strval[len-1])) 487 return -1; 488 489 /* split the optarg into separate socket values */ 490 arg_num = rte_strsplit(strval, len, 491 arg, RTE_MAX_NUMA_NODES, ','); 492 493 /* if split failed, or 0 arguments */ 494 if (arg_num <= 0) 495 return -1; 496 497 /* parse each defined socket option */ 498 errno = 0; 499 for (i = 0; i < arg_num; i++) { 500 uint64_t val; 501 end = NULL; 502 val = strtoull(arg[i], &end, 10); 503 504 /* check for invalid input */ 505 if ((errno != 0) || 506 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) 507 return -1; 508 val <<= 20; 509 socket_arg[i] = val; 510 } 511 512 return 0; 513 } 514 515 static int 516 eal_parse_vfio_intr(const char *mode) 517 { 518 struct internal_config *internal_conf = 519 eal_get_internal_configuration(); 520 unsigned i; 521 static struct { 522 const char *name; 523 enum rte_intr_mode value; 524 } map[] = { 525 { "legacy", RTE_INTR_MODE_LEGACY }, 526 { "msi", RTE_INTR_MODE_MSI }, 527 { "msix", RTE_INTR_MODE_MSIX }, 528 }; 529 530 for (i = 0; i < RTE_DIM(map); i++) { 531 if (!strcmp(mode, map[i].name)) { 532 internal_conf->vfio_intr_mode = map[i].value; 533 return 0; 534 } 535 } 536 return -1; 537 } 538 539 static int 540 eal_parse_vfio_vf_token(const char *vf_token) 541 { 542 struct internal_config *cfg = eal_get_internal_configuration(); 543 rte_uuid_t uuid; 544 545 if (!rte_uuid_parse(vf_token, uuid)) { 546 rte_uuid_copy(cfg->vfio_vf_token, uuid); 547 return 0; 548 } 549 550 return -1; 551 } 552 553 /* Parse the arguments for --log-level only */ 554 static void 555 eal_log_level_parse(int argc, char **argv) 556 { 557 int opt; 558 char **argvopt; 559 int option_index; 560 const int old_optind = optind; 561 const int old_optopt = optopt; 562 char * const old_optarg = optarg; 563 struct internal_config *internal_conf = 564 eal_get_internal_configuration(); 565 566 argvopt = argv; 567 optind = 1; 568 569 while ((opt = getopt_long(argc, argvopt, eal_short_options, 570 eal_long_options, &option_index)) != EOF) { 571 572 int ret; 573 574 /* getopt is not happy, stop right now */ 575 if (opt == '?') 576 break; 577 578 ret = (opt == OPT_LOG_LEVEL_NUM) ? 579 eal_parse_common_option(opt, optarg, internal_conf) : 0; 580 581 /* common parser is not happy */ 582 if (ret < 0) 583 break; 584 } 585 586 /* restore getopt lib */ 587 optind = old_optind; 588 optopt = old_optopt; 589 optarg = old_optarg; 590 } 591 592 /* Parse the argument given in the command line of the application */ 593 static int 594 eal_parse_args(int argc, char **argv) 595 { 596 int opt, ret; 597 char **argvopt; 598 int option_index; 599 char *prgname = argv[0]; 600 const int old_optind = optind; 601 const int old_optopt = optopt; 602 char * const old_optarg = optarg; 603 struct internal_config *internal_conf = 604 eal_get_internal_configuration(); 605 606 argvopt = argv; 607 optind = 1; 608 609 while ((opt = getopt_long(argc, argvopt, eal_short_options, 610 eal_long_options, &option_index)) != EOF) { 611 612 /* getopt didn't recognise the option */ 613 if (opt == '?') { 614 eal_usage(prgname); 615 ret = -1; 616 goto out; 617 } 618 619 /* eal_log_level_parse() already handled this option */ 620 if (opt == OPT_LOG_LEVEL_NUM) 621 continue; 622 623 ret = eal_parse_common_option(opt, optarg, internal_conf); 624 /* common parser is not happy */ 625 if (ret < 0) { 626 eal_usage(prgname); 627 ret = -1; 628 goto out; 629 } 630 /* common parser handled this option */ 631 if (ret == 0) 632 continue; 633 634 switch (opt) { 635 case 'h': 636 eal_usage(prgname); 637 exit(EXIT_SUCCESS); 638 639 case OPT_HUGE_DIR_NUM: 640 { 641 char *hdir = strdup(optarg); 642 if (hdir == NULL) 643 RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); 644 else { 645 /* free old hugepage dir */ 646 if (internal_conf->hugepage_dir != NULL) 647 free(internal_conf->hugepage_dir); 648 internal_conf->hugepage_dir = hdir; 649 } 650 break; 651 } 652 case OPT_FILE_PREFIX_NUM: 653 { 654 char *prefix = strdup(optarg); 655 if (prefix == NULL) 656 RTE_LOG(ERR, EAL, "Could not store file prefix\n"); 657 else { 658 /* free old prefix */ 659 if (internal_conf->hugefile_prefix != NULL) 660 free(internal_conf->hugefile_prefix); 661 internal_conf->hugefile_prefix = prefix; 662 } 663 break; 664 } 665 case OPT_SOCKET_MEM_NUM: 666 if (eal_parse_socket_arg(optarg, 667 internal_conf->socket_mem) < 0) { 668 RTE_LOG(ERR, EAL, "invalid parameters for --" 669 OPT_SOCKET_MEM "\n"); 670 eal_usage(prgname); 671 ret = -1; 672 goto out; 673 } 674 internal_conf->force_sockets = 1; 675 break; 676 677 case OPT_SOCKET_LIMIT_NUM: 678 if (eal_parse_socket_arg(optarg, 679 internal_conf->socket_limit) < 0) { 680 RTE_LOG(ERR, EAL, "invalid parameters for --" 681 OPT_SOCKET_LIMIT "\n"); 682 eal_usage(prgname); 683 ret = -1; 684 goto out; 685 } 686 internal_conf->force_socket_limits = 1; 687 break; 688 689 case OPT_VFIO_INTR_NUM: 690 if (eal_parse_vfio_intr(optarg) < 0) { 691 RTE_LOG(ERR, EAL, "invalid parameters for --" 692 OPT_VFIO_INTR "\n"); 693 eal_usage(prgname); 694 ret = -1; 695 goto out; 696 } 697 break; 698 699 case OPT_VFIO_VF_TOKEN_NUM: 700 if (eal_parse_vfio_vf_token(optarg) < 0) { 701 RTE_LOG(ERR, EAL, "invalid parameters for --" 702 OPT_VFIO_VF_TOKEN "\n"); 703 eal_usage(prgname); 704 ret = -1; 705 goto out; 706 } 707 break; 708 709 case OPT_CREATE_UIO_DEV_NUM: 710 internal_conf->create_uio_dev = 1; 711 break; 712 713 case OPT_MBUF_POOL_OPS_NAME_NUM: 714 { 715 char *ops_name = strdup(optarg); 716 if (ops_name == NULL) 717 RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); 718 else { 719 /* free old ops name */ 720 if (internal_conf->user_mbuf_pool_ops_name != 721 NULL) 722 free(internal_conf->user_mbuf_pool_ops_name); 723 724 internal_conf->user_mbuf_pool_ops_name = 725 ops_name; 726 } 727 break; 728 } 729 case OPT_MATCH_ALLOCATIONS_NUM: 730 internal_conf->match_allocations = 1; 731 break; 732 733 default: 734 if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { 735 RTE_LOG(ERR, EAL, "Option %c is not supported " 736 "on Linux\n", opt); 737 } else if (opt >= OPT_LONG_MIN_NUM && 738 opt < OPT_LONG_MAX_NUM) { 739 RTE_LOG(ERR, EAL, "Option %s is not supported " 740 "on Linux\n", 741 eal_long_options[option_index].name); 742 } else { 743 RTE_LOG(ERR, EAL, "Option %d is not supported " 744 "on Linux\n", opt); 745 } 746 eal_usage(prgname); 747 ret = -1; 748 goto out; 749 } 750 } 751 752 /* create runtime data directory. In no_shconf mode, skip any errors */ 753 if (eal_create_runtime_dir() < 0) { 754 if (internal_conf->no_shconf == 0) { 755 RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); 756 ret = -1; 757 goto out; 758 } else 759 RTE_LOG(WARNING, EAL, "No DPDK runtime directory created\n"); 760 } 761 762 if (eal_adjust_config(internal_conf) != 0) { 763 ret = -1; 764 goto out; 765 } 766 767 /* sanity checks */ 768 if (eal_check_common_options(internal_conf) != 0) { 769 eal_usage(prgname); 770 ret = -1; 771 goto out; 772 } 773 774 if (optind >= 0) 775 argv[optind-1] = prgname; 776 ret = optind-1; 777 778 out: 779 /* restore getopt lib */ 780 optind = old_optind; 781 optopt = old_optopt; 782 optarg = old_optarg; 783 784 return ret; 785 } 786 787 static int 788 check_socket(const struct rte_memseg_list *msl, void *arg) 789 { 790 int *socket_id = arg; 791 792 if (msl->external) 793 return 0; 794 795 return *socket_id == msl->socket_id; 796 } 797 798 static void 799 eal_check_mem_on_local_socket(void) 800 { 801 int socket_id; 802 const struct rte_config *config = rte_eal_get_configuration(); 803 804 socket_id = rte_lcore_to_socket_id(config->main_lcore); 805 806 if (rte_memseg_list_walk(check_socket, &socket_id) == 0) 807 RTE_LOG(WARNING, EAL, "WARNING: Main core has no memory on local socket!\n"); 808 } 809 810 static int 811 sync_func(__rte_unused void *arg) 812 { 813 return 0; 814 } 815 816 /* 817 * Request iopl privilege for all RPL, returns 0 on success 818 * iopl() call is mostly for the i386 architecture. For other architectures, 819 * return -1 to indicate IO privilege can't be changed in this way. 820 */ 821 int 822 rte_eal_iopl_init(void) 823 { 824 #if defined(RTE_ARCH_X86) 825 if (iopl(3) != 0) 826 return -1; 827 #endif 828 return 0; 829 } 830 831 #ifdef VFIO_PRESENT 832 static int rte_eal_vfio_setup(void) 833 { 834 if (rte_vfio_enable("vfio")) 835 return -1; 836 837 return 0; 838 } 839 #endif 840 841 static void rte_eal_init_alert(const char *msg) 842 { 843 fprintf(stderr, "EAL: FATAL: %s\n", msg); 844 RTE_LOG(ERR, EAL, "%s\n", msg); 845 } 846 847 /* 848 * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the 849 * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel 850 * IOMMU groups. If IOMMU is not enabled, that path would be empty. 851 * Therefore, checking if the path is empty will tell us if IOMMU is enabled. 852 */ 853 static bool 854 is_iommu_enabled(void) 855 { 856 DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); 857 struct dirent *d; 858 int n = 0; 859 860 /* if directory doesn't exist, assume IOMMU is not enabled */ 861 if (dir == NULL) 862 return false; 863 864 while ((d = readdir(dir)) != NULL) { 865 /* skip dot and dot-dot */ 866 if (++n > 2) 867 break; 868 } 869 closedir(dir); 870 871 return n > 2; 872 } 873 874 /* Launch threads, called at application init(). */ 875 int 876 rte_eal_init(int argc, char **argv) 877 { 878 int i, fctret, ret; 879 pthread_t thread_id; 880 static uint32_t run_once; 881 uint32_t has_run = 0; 882 const char *p; 883 static char logid[PATH_MAX]; 884 char cpuset[RTE_CPU_AFFINITY_STR_LEN]; 885 char thread_name[RTE_MAX_THREAD_NAME_LEN]; 886 bool phys_addrs; 887 const struct rte_config *config = rte_eal_get_configuration(); 888 struct internal_config *internal_conf = 889 eal_get_internal_configuration(); 890 891 /* checks if the machine is adequate */ 892 if (!rte_cpu_is_supported()) { 893 rte_eal_init_alert("unsupported cpu type."); 894 rte_errno = ENOTSUP; 895 return -1; 896 } 897 898 if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0, 899 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 900 rte_eal_init_alert("already called initialization."); 901 rte_errno = EALREADY; 902 return -1; 903 } 904 905 p = strrchr(argv[0], '/'); 906 strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid)); 907 thread_id = pthread_self(); 908 909 eal_reset_internal_config(internal_conf); 910 911 /* set log level as early as possible */ 912 eal_log_level_parse(argc, argv); 913 914 /* clone argv to report out later in telemetry */ 915 eal_save_args(argc, argv); 916 917 if (rte_eal_cpu_init() < 0) { 918 rte_eal_init_alert("Cannot detect lcores."); 919 rte_errno = ENOTSUP; 920 return -1; 921 } 922 923 fctret = eal_parse_args(argc, argv); 924 if (fctret < 0) { 925 rte_eal_init_alert("Invalid 'command line' arguments."); 926 rte_errno = EINVAL; 927 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 928 return -1; 929 } 930 931 if (eal_plugins_init() < 0) { 932 rte_eal_init_alert("Cannot init plugins"); 933 rte_errno = EINVAL; 934 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 935 return -1; 936 } 937 938 if (eal_trace_init() < 0) { 939 rte_eal_init_alert("Cannot init trace"); 940 rte_errno = EFAULT; 941 return -1; 942 } 943 944 if (eal_option_device_parse()) { 945 rte_errno = ENODEV; 946 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 947 return -1; 948 } 949 950 if (rte_config_init() < 0) { 951 rte_eal_init_alert("Cannot init config"); 952 return -1; 953 } 954 955 if (rte_eal_intr_init() < 0) { 956 rte_eal_init_alert("Cannot init interrupt-handling thread"); 957 return -1; 958 } 959 960 if (rte_eal_alarm_init() < 0) { 961 rte_eal_init_alert("Cannot init alarm"); 962 /* rte_eal_alarm_init sets rte_errno on failure. */ 963 return -1; 964 } 965 966 /* Put mp channel init before bus scan so that we can init the vdev 967 * bus through mp channel in the secondary process before the bus scan. 968 */ 969 if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) { 970 rte_eal_init_alert("failed to init mp channel"); 971 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 972 rte_errno = EFAULT; 973 return -1; 974 } 975 } 976 977 /* register multi-process action callbacks for hotplug */ 978 if (eal_mp_dev_hotplug_init() < 0) { 979 rte_eal_init_alert("failed to register mp callback for hotplug"); 980 return -1; 981 } 982 983 if (rte_bus_scan()) { 984 rte_eal_init_alert("Cannot scan the buses for devices"); 985 rte_errno = ENODEV; 986 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 987 return -1; 988 } 989 990 phys_addrs = rte_eal_using_phys_addrs() != 0; 991 992 /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ 993 if (internal_conf->iova_mode == RTE_IOVA_DC) { 994 /* autodetect the IOVA mapping mode */ 995 enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); 996 997 if (iova_mode == RTE_IOVA_DC) { 998 RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); 999 1000 if (!phys_addrs) { 1001 /* if we have no access to physical addresses, 1002 * pick IOVA as VA mode. 1003 */ 1004 iova_mode = RTE_IOVA_VA; 1005 RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); 1006 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) 1007 } else if (rte_eal_check_module("rte_kni") == 1) { 1008 iova_mode = RTE_IOVA_PA; 1009 RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n"); 1010 #endif 1011 } else if (is_iommu_enabled()) { 1012 /* we have an IOMMU, pick IOVA as VA mode */ 1013 iova_mode = RTE_IOVA_VA; 1014 RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); 1015 } else { 1016 /* physical addresses available, and no IOMMU 1017 * found, so pick IOVA as PA. 1018 */ 1019 iova_mode = RTE_IOVA_PA; 1020 RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); 1021 } 1022 } 1023 #if defined(RTE_LIB_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) 1024 /* Workaround for KNI which requires physical address to work 1025 * in kernels < 4.10 1026 */ 1027 if (iova_mode == RTE_IOVA_VA && 1028 rte_eal_check_module("rte_kni") == 1) { 1029 if (phys_addrs) { 1030 iova_mode = RTE_IOVA_PA; 1031 RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); 1032 } else { 1033 RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); 1034 } 1035 } 1036 #endif 1037 rte_eal_get_configuration()->iova_mode = iova_mode; 1038 } else { 1039 rte_eal_get_configuration()->iova_mode = 1040 internal_conf->iova_mode; 1041 } 1042 1043 if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { 1044 rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); 1045 rte_errno = EINVAL; 1046 return -1; 1047 } 1048 1049 RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", 1050 rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); 1051 1052 if (internal_conf->no_hugetlbfs == 0) { 1053 /* rte_config isn't initialized yet */ 1054 ret = internal_conf->process_type == RTE_PROC_PRIMARY ? 1055 eal_hugepage_info_init() : 1056 eal_hugepage_info_read(); 1057 if (ret < 0) { 1058 rte_eal_init_alert("Cannot get hugepage information."); 1059 rte_errno = EACCES; 1060 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1061 return -1; 1062 } 1063 } 1064 1065 if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) { 1066 if (internal_conf->no_hugetlbfs) 1067 internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; 1068 } 1069 1070 if (internal_conf->vmware_tsc_map == 1) { 1071 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT 1072 rte_cycles_vmware_tsc_map = 1; 1073 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " 1074 "you must have monitor_control.pseudo_perfctr = TRUE\n"); 1075 #else 1076 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " 1077 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); 1078 #endif 1079 } 1080 1081 if (eal_log_init(logid, internal_conf->syslog_facility) < 0) { 1082 rte_eal_init_alert("Cannot init logging."); 1083 rte_errno = ENOMEM; 1084 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1085 return -1; 1086 } 1087 1088 #ifdef VFIO_PRESENT 1089 if (rte_eal_vfio_setup() < 0) { 1090 rte_eal_init_alert("Cannot init VFIO"); 1091 rte_errno = EAGAIN; 1092 __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED); 1093 return -1; 1094 } 1095 #endif 1096 /* in secondary processes, memory init may allocate additional fbarrays 1097 * not present in primary processes, so to avoid any potential issues, 1098 * initialize memzones first. 1099 */ 1100 if (rte_eal_memzone_init() < 0) { 1101 rte_eal_init_alert("Cannot init memzone"); 1102 rte_errno = ENODEV; 1103 return -1; 1104 } 1105 1106 if (rte_eal_memory_init() < 0) { 1107 rte_eal_init_alert("Cannot init memory"); 1108 rte_errno = ENOMEM; 1109 return -1; 1110 } 1111 1112 /* the directories are locked during eal_hugepage_info_init */ 1113 eal_hugedirs_unlock(); 1114 1115 if (rte_eal_malloc_heap_init() < 0) { 1116 rte_eal_init_alert("Cannot init malloc heap"); 1117 rte_errno = ENODEV; 1118 return -1; 1119 } 1120 1121 if (rte_eal_tailqs_init() < 0) { 1122 rte_eal_init_alert("Cannot init tail queues for objects"); 1123 rte_errno = EFAULT; 1124 return -1; 1125 } 1126 1127 if (rte_eal_timer_init() < 0) { 1128 rte_eal_init_alert("Cannot init HPET or TSC timers"); 1129 rte_errno = ENOTSUP; 1130 return -1; 1131 } 1132 1133 eal_check_mem_on_local_socket(); 1134 1135 if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t), 1136 &lcore_config[config->main_lcore].cpuset) != 0) { 1137 rte_eal_init_alert("Cannot set affinity"); 1138 rte_errno = EINVAL; 1139 return -1; 1140 } 1141 __rte_thread_init(config->main_lcore, 1142 &lcore_config[config->main_lcore].cpuset); 1143 1144 ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); 1145 RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%zx;cpuset=[%s%s])\n", 1146 config->main_lcore, (uintptr_t)thread_id, cpuset, 1147 ret == 0 ? "" : "..."); 1148 1149 RTE_LCORE_FOREACH_WORKER(i) { 1150 1151 /* 1152 * create communication pipes between main thread 1153 * and children 1154 */ 1155 if (pipe(lcore_config[i].pipe_main2worker) < 0) 1156 rte_panic("Cannot create pipe\n"); 1157 if (pipe(lcore_config[i].pipe_worker2main) < 0) 1158 rte_panic("Cannot create pipe\n"); 1159 1160 lcore_config[i].state = WAIT; 1161 1162 /* create a thread for each lcore */ 1163 ret = pthread_create(&lcore_config[i].thread_id, NULL, 1164 eal_thread_loop, NULL); 1165 if (ret != 0) 1166 rte_panic("Cannot create thread\n"); 1167 1168 /* Set thread_name for aid in debugging. */ 1169 snprintf(thread_name, sizeof(thread_name), 1170 "lcore-worker-%d", i); 1171 ret = rte_thread_setname(lcore_config[i].thread_id, 1172 thread_name); 1173 if (ret != 0) 1174 RTE_LOG(DEBUG, EAL, 1175 "Cannot set name for lcore thread\n"); 1176 1177 ret = pthread_setaffinity_np(lcore_config[i].thread_id, 1178 sizeof(rte_cpuset_t), &lcore_config[i].cpuset); 1179 if (ret != 0) 1180 rte_panic("Cannot set affinity\n"); 1181 } 1182 1183 /* 1184 * Launch a dummy function on all worker lcores, so that main lcore 1185 * knows they are all ready when this function returns. 1186 */ 1187 rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN); 1188 rte_eal_mp_wait_lcore(); 1189 1190 /* initialize services so vdevs register service during bus_probe. */ 1191 ret = rte_service_init(); 1192 if (ret) { 1193 rte_eal_init_alert("rte_service_init() failed"); 1194 rte_errno = -ret; 1195 return -1; 1196 } 1197 1198 /* Probe all the buses and devices/drivers on them */ 1199 if (rte_bus_probe()) { 1200 rte_eal_init_alert("Cannot probe devices"); 1201 rte_errno = ENOTSUP; 1202 return -1; 1203 } 1204 1205 #ifdef VFIO_PRESENT 1206 /* Register mp action after probe() so that we got enough info */ 1207 if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) 1208 return -1; 1209 #endif 1210 1211 /* initialize default service/lcore mappings and start running. Ignore 1212 * -ENOTSUP, as it indicates no service coremask passed to EAL. 1213 */ 1214 ret = rte_service_start_with_defaults(); 1215 if (ret < 0 && ret != -ENOTSUP) { 1216 rte_errno = -ret; 1217 return -1; 1218 } 1219 1220 /* 1221 * Clean up unused files in runtime directory. We do this at the end of 1222 * init and not at the beginning because we want to clean stuff up 1223 * whether we are primary or secondary process, but we cannot remove 1224 * primary process' files because secondary should be able to run even 1225 * if primary process is dead. 1226 * 1227 * In no_shconf mode, no runtime directory is created in the first 1228 * place, so no cleanup needed. 1229 */ 1230 if (!internal_conf->no_shconf && eal_clean_runtime_dir() < 0) { 1231 rte_eal_init_alert("Cannot clear runtime directory"); 1232 return -1; 1233 } 1234 if (rte_eal_process_type() == RTE_PROC_PRIMARY && !internal_conf->no_telemetry) { 1235 int tlog = rte_log_register_type_and_pick_level( 1236 "lib.telemetry", RTE_LOG_WARNING); 1237 if (tlog < 0) 1238 tlog = RTE_LOGTYPE_EAL; 1239 if (rte_telemetry_init(rte_eal_get_runtime_dir(), 1240 rte_version(), 1241 &internal_conf->ctrl_cpuset, rte_log, tlog) != 0) 1242 return -1; 1243 } 1244 1245 eal_mcfg_complete(); 1246 1247 return fctret; 1248 } 1249 1250 static int 1251 mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 1252 void *arg __rte_unused) 1253 { 1254 /* ms is const, so find this memseg */ 1255 struct rte_memseg *found; 1256 1257 if (msl->external) 1258 return 0; 1259 1260 found = rte_mem_virt2memseg(ms->addr, msl); 1261 1262 found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE; 1263 1264 return 0; 1265 } 1266 1267 int 1268 rte_eal_cleanup(void) 1269 { 1270 /* if we're in a primary process, we need to mark hugepages as freeable 1271 * so that finalization can release them back to the system. 1272 */ 1273 struct internal_config *internal_conf = 1274 eal_get_internal_configuration(); 1275 1276 if (rte_eal_process_type() == RTE_PROC_PRIMARY && 1277 internal_conf->hugepage_file.unlink_existing) 1278 rte_memseg_walk(mark_freeable, NULL); 1279 1280 rte_service_finalize(); 1281 #ifdef VFIO_PRESENT 1282 vfio_mp_sync_cleanup(); 1283 #endif 1284 rte_mp_channel_cleanup(); 1285 /* after this point, any DPDK pointers will become dangling */ 1286 rte_eal_memory_detach(); 1287 eal_mp_dev_hotplug_cleanup(); 1288 rte_eal_alarm_cleanup(); 1289 rte_trace_save(); 1290 eal_trace_fini(); 1291 eal_cleanup_config(internal_conf); 1292 rte_eal_log_cleanup(); 1293 return 0; 1294 } 1295 1296 int rte_eal_create_uio_dev(void) 1297 { 1298 const struct internal_config *internal_conf = 1299 eal_get_internal_configuration(); 1300 1301 return internal_conf->create_uio_dev; 1302 } 1303 1304 enum rte_intr_mode 1305 rte_eal_vfio_intr_mode(void) 1306 { 1307 const struct internal_config *internal_conf = 1308 eal_get_internal_configuration(); 1309 1310 return internal_conf->vfio_intr_mode; 1311 } 1312 1313 void 1314 rte_eal_vfio_get_vf_token(rte_uuid_t vf_token) 1315 { 1316 struct internal_config *cfg = eal_get_internal_configuration(); 1317 1318 rte_uuid_copy(vf_token, cfg->vfio_vf_token); 1319 } 1320 1321 int 1322 rte_eal_check_module(const char *module_name) 1323 { 1324 char sysfs_mod_name[PATH_MAX]; 1325 struct stat st; 1326 int n; 1327 1328 if (NULL == module_name) 1329 return -1; 1330 1331 /* Check if there is sysfs mounted */ 1332 if (stat("/sys/module", &st) != 0) { 1333 RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", 1334 errno, strerror(errno)); 1335 return -1; 1336 } 1337 1338 /* A module might be built-in, therefore try sysfs */ 1339 n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); 1340 if (n < 0 || n > PATH_MAX) { 1341 RTE_LOG(DEBUG, EAL, "Could not format module path\n"); 1342 return -1; 1343 } 1344 1345 if (stat(sysfs_mod_name, &st) != 0) { 1346 RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", 1347 sysfs_mod_name, errno, strerror(errno)); 1348 return 0; 1349 } 1350 1351 /* Module has been found */ 1352 return 1; 1353 } 1354