1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2017 Intel Corporation. 318d26e42SBen Walker * All rights reserved. 418d26e42SBen Walker */ 518d26e42SBen Walker 6b961d9ccSBen Walker #include "spdk/stdinc.h" 718d26e42SBen Walker 8b961d9ccSBen Walker #include "env_internal.h" 918d26e42SBen Walker 1053d5499eSDaniel Verkamp #include "spdk/version.h" 1181551144SXiaodong Liu #include "spdk/env_dpdk.h" 12d8190d02SVitaliy Mysak #include "spdk/log.h" 139e7217abSKrzysztof Karas #include "spdk/config.h" 149e7217abSKrzysztof Karas 159e7217abSKrzysztof Karas #include <openssl/ssl.h> 169e7217abSKrzysztof Karas #include <openssl/err.h> 1753d5499eSDaniel Verkamp 1818d26e42SBen Walker #include <rte_config.h> 1918d26e42SBen Walker #include <rte_eal.h> 2037c0a02eSJim Harris #include <rte_errno.h> 21a9c79c33SRichael Zhuang #include <rte_vfio.h> 2218d26e42SBen Walker 2318d26e42SBen Walker #define SPDK_ENV_DPDK_DEFAULT_NAME "spdk" 2418d26e42SBen Walker #define SPDK_ENV_DPDK_DEFAULT_SHM_ID -1 2518d26e42SBen Walker #define SPDK_ENV_DPDK_DEFAULT_MEM_SIZE -1 26fe137c89SJim Harris #define SPDK_ENV_DPDK_DEFAULT_MAIN_CORE -1 2718d26e42SBen Walker #define SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL -1 2818d26e42SBen Walker #define SPDK_ENV_DPDK_DEFAULT_CORE_MASK "0x1" 29b3767a23SJacek Kalwas #define SPDK_ENV_DPDK_DEFAULT_BASE_VIRTADDR 0x200000000000 3018d26e42SBen Walker 314a6a2824SJim Harris #define DPDK_ALLOW_PARAM "--allow" 324a6a2824SJim Harris #define DPDK_BLOCK_PARAM "--block" 33fe137c89SJim Harris #define DPDK_MAIN_CORE_PARAM "--main-lcore" 34ddd71f93SJim Harris 35d631b855SJim Harris static char **g_eal_cmdline; 36d631b855SJim Harris static int g_eal_cmdline_argcount; 37725f9de3SJim Harris static bool g_external_init = true; 38cc3c7906SPawel Wodkowski 3918d26e42SBen Walker static char * 4018d26e42SBen Walker _sprintf_alloc(const char *format, ...) 4118d26e42SBen Walker { 4218d26e42SBen Walker va_list args; 4318d26e42SBen Walker va_list args_copy; 4418d26e42SBen Walker char *buf; 4518d26e42SBen Walker size_t bufsize; 4618d26e42SBen Walker int rc; 4718d26e42SBen Walker 4818d26e42SBen Walker va_start(args, format); 4918d26e42SBen Walker 5018d26e42SBen Walker /* Try with a small buffer first. */ 5118d26e42SBen Walker bufsize = 32; 5218d26e42SBen Walker 5318d26e42SBen Walker /* Limit maximum buffer size to something reasonable so we don't loop forever. */ 5418d26e42SBen Walker while (bufsize <= 1024 * 1024) { 5518d26e42SBen Walker buf = malloc(bufsize); 5618d26e42SBen Walker if (buf == NULL) { 5718d26e42SBen Walker va_end(args); 5818d26e42SBen Walker return NULL; 5918d26e42SBen Walker } 6018d26e42SBen Walker 6118d26e42SBen Walker va_copy(args_copy, args); 6218d26e42SBen Walker rc = vsnprintf(buf, bufsize, format, args_copy); 6318d26e42SBen Walker va_end(args_copy); 6418d26e42SBen Walker 6518d26e42SBen Walker /* 6618d26e42SBen Walker * If vsnprintf() returned a count within our current buffer size, we are done. 6718d26e42SBen Walker * The count does not include the \0 terminator, so rc == bufsize is not OK. 6818d26e42SBen Walker */ 6918d26e42SBen Walker if (rc >= 0 && (size_t)rc < bufsize) { 7018d26e42SBen Walker va_end(args); 7118d26e42SBen Walker return buf; 7218d26e42SBen Walker } 7318d26e42SBen Walker 7418d26e42SBen Walker /* 7518d26e42SBen Walker * vsnprintf() should return the required space, but some libc versions do not 7618d26e42SBen Walker * implement this correctly, so just double the buffer size and try again. 7718d26e42SBen Walker * 7818d26e42SBen Walker * We don't need the data in buf, so rather than realloc(), use free() and malloc() 7918d26e42SBen Walker * again to avoid a copy. 8018d26e42SBen Walker */ 8118d26e42SBen Walker free(buf); 8218d26e42SBen Walker bufsize *= 2; 8318d26e42SBen Walker } 8418d26e42SBen Walker 8518d26e42SBen Walker va_end(args); 8618d26e42SBen Walker return NULL; 8718d26e42SBen Walker } 8818d26e42SBen Walker 8918d26e42SBen Walker void 9018d26e42SBen Walker spdk_env_opts_init(struct spdk_env_opts *opts) 9118d26e42SBen Walker { 927c739692SJim Harris size_t opts_size; 937c739692SJim Harris 9418d26e42SBen Walker if (!opts) { 9518d26e42SBen Walker return; 9618d26e42SBen Walker } 9718d26e42SBen Walker 987c739692SJim Harris opts_size = opts->opts_size; 9918d26e42SBen Walker memset(opts, 0, sizeof(*opts)); 1007c739692SJim Harris opts->opts_size = opts_size; 10118d26e42SBen Walker 10218d26e42SBen Walker opts->name = SPDK_ENV_DPDK_DEFAULT_NAME; 10318d26e42SBen Walker opts->core_mask = SPDK_ENV_DPDK_DEFAULT_CORE_MASK; 10418d26e42SBen Walker opts->shm_id = SPDK_ENV_DPDK_DEFAULT_SHM_ID; 105d939572aSZiye Yang opts->mem_size = SPDK_ENV_DPDK_DEFAULT_MEM_SIZE; 106fe137c89SJim Harris opts->main_core = SPDK_ENV_DPDK_DEFAULT_MAIN_CORE; 107d939572aSZiye Yang opts->mem_channel = SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL; 108b3767a23SJacek Kalwas opts->base_virtaddr = SPDK_ENV_DPDK_DEFAULT_BASE_VIRTADDR; 10957fd99b9SJim Harris 11057fd99b9SJim Harris #define SET_FIELD(field, value) \ 11157fd99b9SJim Harris if (offsetof(struct spdk_env_opts, field) + sizeof(opts->field) <= opts->opts_size) { \ 11257fd99b9SJim Harris opts->field = value; \ 11357fd99b9SJim Harris } 11457fd99b9SJim Harris 11541ff6dceSJim Harris SET_FIELD(enforce_numa, false); 11641ff6dceSJim Harris 11757fd99b9SJim Harris #undef SET_FIELD 11818d26e42SBen Walker } 11918d26e42SBen Walker 12018d26e42SBen Walker static void 1213456377bSSeth Howell free_args(char **args, int argcount) 12218d26e42SBen Walker { 12318d26e42SBen Walker int i; 12418d26e42SBen Walker 125ef7c128aSXiaodong Liu if (args == NULL) { 126ef7c128aSXiaodong Liu return; 127ef7c128aSXiaodong Liu } 128ef7c128aSXiaodong Liu 12918d26e42SBen Walker for (i = 0; i < argcount; i++) { 13018d26e42SBen Walker free(args[i]); 13118d26e42SBen Walker } 13218d26e42SBen Walker 133095f4254SLance Hartmann if (argcount) { 13418d26e42SBen Walker free(args); 13518d26e42SBen Walker } 136095f4254SLance Hartmann } 13718d26e42SBen Walker 13818d26e42SBen Walker static char ** 1393456377bSSeth Howell push_arg(char *args[], int *argcount, char *arg) 14018d26e42SBen Walker { 14118d26e42SBen Walker char **tmp; 14218d26e42SBen Walker 14318d26e42SBen Walker if (arg == NULL) { 144d8190d02SVitaliy Mysak SPDK_ERRLOG("%s: NULL arg supplied\n", __func__); 1453456377bSSeth Howell free_args(args, *argcount); 14618d26e42SBen Walker return NULL; 14718d26e42SBen Walker } 14818d26e42SBen Walker 14918d26e42SBen Walker tmp = realloc(args, sizeof(char *) * (*argcount + 1)); 15018d26e42SBen Walker if (tmp == NULL) { 1519858408bSDarek Stojaczyk free(arg); 1523456377bSSeth Howell free_args(args, *argcount); 15318d26e42SBen Walker return NULL; 15418d26e42SBen Walker } 15518d26e42SBen Walker 15618d26e42SBen Walker tmp[*argcount] = arg; 15718d26e42SBen Walker (*argcount)++; 15818d26e42SBen Walker 15918d26e42SBen Walker return tmp; 16018d26e42SBen Walker } 16118d26e42SBen Walker 16297b0f773SBen Walker #if defined(__linux__) && defined(__x86_64__) 16397b0f773SBen Walker 16497b0f773SBen Walker /* TODO: Can likely get this value from rlimits in the future */ 16597b0f773SBen Walker #define SPDK_IOMMU_VA_REQUIRED_WIDTH 48 16697b0f773SBen Walker #define VTD_CAP_MGAW_SHIFT 16 16797b0f773SBen Walker #define VTD_CAP_MGAW_MASK (0x3F << VTD_CAP_MGAW_SHIFT) 1689ffb0497SMichael Piszczek #define RD_AMD_CAP_VASIZE_SHIFT 15 1699ffb0497SMichael Piszczek #define RD_AMD_CAP_VASIZE_MASK (0x7F << RD_AMD_CAP_VASIZE_SHIFT) 1709ffb0497SMichael Piszczek 1719ffb0497SMichael Piszczek static int 1723456377bSSeth Howell get_iommu_width(void) 17397b0f773SBen Walker { 1741473d3b8SMichael Piszczek int width = 0; 1751473d3b8SMichael Piszczek glob_t glob_results = {}; 1769ffb0497SMichael Piszczek 1771473d3b8SMichael Piszczek /* Break * and / into separate strings to appease check_format.sh comment style check. */ 1781473d3b8SMichael Piszczek glob("/sys/devices/virtual/iommu/dmar*" "/intel-iommu/cap", 0, NULL, &glob_results); 1791473d3b8SMichael Piszczek glob("/sys/class/iommu/ivhd*" "/amd-iommu/cap", GLOB_APPEND, NULL, &glob_results); 1801473d3b8SMichael Piszczek 1811473d3b8SMichael Piszczek for (size_t i = 0; i < glob_results.gl_pathc; i++) { 1821473d3b8SMichael Piszczek const char *filename = glob_results.gl_pathv[0]; 1831473d3b8SMichael Piszczek FILE *file = fopen(filename, "r"); 1841473d3b8SMichael Piszczek uint64_t cap_reg = 0; 1851473d3b8SMichael Piszczek 1864f4bf8c4SGangCao if (file == NULL) { 1874f4bf8c4SGangCao continue; 1884f4bf8c4SGangCao } 1894f4bf8c4SGangCao 1904f4bf8c4SGangCao if (fscanf(file, "%" PRIx64, &cap_reg) == 1) { 1911473d3b8SMichael Piszczek if (strstr(filename, "intel-iommu") != NULL) { 1921473d3b8SMichael Piszczek /* We have an Intel IOMMU */ 1931473d3b8SMichael Piszczek int mgaw = ((cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; 1941473d3b8SMichael Piszczek 1951473d3b8SMichael Piszczek if (width == 0 || (mgaw > 0 && mgaw < width)) { 1961473d3b8SMichael Piszczek width = mgaw; 1979ffb0497SMichael Piszczek } 1981473d3b8SMichael Piszczek } else if (strstr(filename, "amd-iommu") != NULL) { 1991473d3b8SMichael Piszczek /* We have an AMD IOMMU */ 2001473d3b8SMichael Piszczek int mgaw = ((cap_reg & RD_AMD_CAP_VASIZE_MASK) >> RD_AMD_CAP_VASIZE_SHIFT) + 1; 20197b0f773SBen Walker 2021473d3b8SMichael Piszczek if (width == 0 || (mgaw > 0 && mgaw < width)) { 2031473d3b8SMichael Piszczek width = mgaw; 20497b0f773SBen Walker } 20597b0f773SBen Walker } 20697b0f773SBen Walker } 20797b0f773SBen Walker 20897b0f773SBen Walker fclose(file); 20997b0f773SBen Walker } 21097b0f773SBen Walker 2111473d3b8SMichael Piszczek globfree(&glob_results); 21297b0f773SBen Walker return width; 21397b0f773SBen Walker } 21497b0f773SBen Walker 21597b0f773SBen Walker #endif 21697b0f773SBen Walker 21718d26e42SBen Walker static int 2183456377bSSeth Howell build_eal_cmdline(const struct spdk_env_opts *opts) 21918d26e42SBen Walker { 22018d26e42SBen Walker int argcount = 0; 22118d26e42SBen Walker char **args; 222a6658c54SSarvesh Lanke bool no_huge; 22318d26e42SBen Walker 22418d26e42SBen Walker args = NULL; 225a6658c54SSarvesh Lanke no_huge = opts->no_huge || (opts->env_context && strstr(opts->env_context, "--no-huge") != NULL); 22618d26e42SBen Walker 22718d26e42SBen Walker /* set the program name */ 2283456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("%s", opts->name)); 22918d26e42SBen Walker if (args == NULL) { 23018d26e42SBen Walker return -1; 23118d26e42SBen Walker } 23218d26e42SBen Walker 233194b8ecaSSeth Howell /* disable shared configuration files when in single process mode. This allows for cleaner shutdown */ 234194b8ecaSSeth Howell if (opts->shm_id < 0) { 2353456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("%s", "--no-shconf")); 236194b8ecaSSeth Howell if (args == NULL) { 237194b8ecaSSeth Howell return -1; 238194b8ecaSSeth Howell } 239194b8ecaSSeth Howell } 240194b8ecaSSeth Howell 241b3a57634SMarcin Spiewak /* Either lcore_map or core_mask must be set. If both, or none specified, fail */ 242b3a57634SMarcin Spiewak if ((opts->core_mask == NULL) == (opts->lcore_map == NULL)) { 243b3a57634SMarcin Spiewak if (opts->core_mask && opts->lcore_map) { 244b3a57634SMarcin Spiewak fprintf(stderr, 245b3a57634SMarcin Spiewak "Both, lcore map and core mask are provided, while only one can be set\n"); 246b3a57634SMarcin Spiewak } else { 247b3a57634SMarcin Spiewak fprintf(stderr, "Core mask or lcore map must be specified\n"); 248b3a57634SMarcin Spiewak } 249b3a57634SMarcin Spiewak free_args(args, argcount); 250b3a57634SMarcin Spiewak return -1; 251b3a57634SMarcin Spiewak } 252b3a57634SMarcin Spiewak 253b3a57634SMarcin Spiewak if (opts->lcore_map) { 254b3a57634SMarcin Spiewak /* If lcore list is set, generate --lcores parameter */ 255b3a57634SMarcin Spiewak args = push_arg(args, &argcount, _sprintf_alloc("--lcores=%s", opts->lcore_map)); 256b3a57634SMarcin Spiewak } else if (opts->core_mask[0] == '-') { 257342001e1SJohn Levon /* 258342001e1SJohn Levon * Set the coremask: 259342001e1SJohn Levon * 260342001e1SJohn Levon * - if it starts with '-', we presume it's literal EAL arguments such 261342001e1SJohn Levon * as --lcores. 262342001e1SJohn Levon * 263342001e1SJohn Levon * - if it starts with '[', we presume it's a core list to use with the 264342001e1SJohn Levon * -l option. 265342001e1SJohn Levon * 266342001e1SJohn Levon * - otherwise, it's a CPU mask of the form "0xff.." as expected by the 267342001e1SJohn Levon * -c option. 268601bcbcfSTomasz Kulasek */ 269342001e1SJohn Levon args = push_arg(args, &argcount, _sprintf_alloc("%s", opts->core_mask)); 270342001e1SJohn Levon } else if (opts->core_mask[0] == '[') { 271601bcbcfSTomasz Kulasek char *l_arg = _sprintf_alloc("-l %s", opts->core_mask + 1); 2723ac9ba25SDarek Stojaczyk 2733ac9ba25SDarek Stojaczyk if (l_arg != NULL) { 274601bcbcfSTomasz Kulasek int len = strlen(l_arg); 2753ac9ba25SDarek Stojaczyk 276601bcbcfSTomasz Kulasek if (l_arg[len - 1] == ']') { 277601bcbcfSTomasz Kulasek l_arg[len - 1] = '\0'; 278601bcbcfSTomasz Kulasek } 2793ac9ba25SDarek Stojaczyk } 2803456377bSSeth Howell args = push_arg(args, &argcount, l_arg); 281601bcbcfSTomasz Kulasek } else { 2823456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("-c %s", opts->core_mask)); 283601bcbcfSTomasz Kulasek } 284601bcbcfSTomasz Kulasek 28518d26e42SBen Walker if (args == NULL) { 28618d26e42SBen Walker return -1; 28718d26e42SBen Walker } 28818d26e42SBen Walker 28918d26e42SBen Walker /* set the memory channel number */ 290d939572aSZiye Yang if (opts->mem_channel > 0) { 2913456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("-n %d", opts->mem_channel)); 29218d26e42SBen Walker if (args == NULL) { 29318d26e42SBen Walker return -1; 29418d26e42SBen Walker } 29518d26e42SBen Walker } 29618d26e42SBen Walker 29718d26e42SBen Walker /* set the memory size */ 29875327bc6SDariusz Stojaczyk if (opts->mem_size >= 0) { 2993456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("-m %d", opts->mem_size)); 30018d26e42SBen Walker if (args == NULL) { 30118d26e42SBen Walker return -1; 30218d26e42SBen Walker } 30318d26e42SBen Walker } 30418d26e42SBen Walker 305a6658c54SSarvesh Lanke /* set no huge pages */ 306a6658c54SSarvesh Lanke if (opts->no_huge) { 307a6658c54SSarvesh Lanke mem_disable_huge_pages(); 308a6658c54SSarvesh Lanke } 309a6658c54SSarvesh Lanke 31041ff6dceSJim Harris if (opts->enforce_numa) { 31141ff6dceSJim Harris mem_enforce_numa(); 31241ff6dceSJim Harris } 31341ff6dceSJim Harris 314fe137c89SJim Harris /* set the main core */ 315fe137c89SJim Harris if (opts->main_core > 0) { 316fe137c89SJim Harris args = push_arg(args, &argcount, _sprintf_alloc("%s=%d", 317fe137c89SJim Harris DPDK_MAIN_CORE_PARAM, opts->main_core)); 31818d26e42SBen Walker if (args == NULL) { 31918d26e42SBen Walker return -1; 32018d26e42SBen Walker } 32118d26e42SBen Walker } 32218d26e42SBen Walker 323fb6c541dSZiye Yang /* set no pci if enabled */ 324fb6c541dSZiye Yang if (opts->no_pci) { 3253456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--no-pci")); 326fb6c541dSZiye Yang if (args == NULL) { 327fb6c541dSZiye Yang return -1; 328fb6c541dSZiye Yang } 329fb6c541dSZiye Yang } 330fb6c541dSZiye Yang 331a6658c54SSarvesh Lanke if (no_huge) { 332db6297b5SJim Harris if (opts->hugepage_single_segments || opts->unlink_hugepage || opts->hugedir) { 333db6297b5SJim Harris fprintf(stderr, "--no-huge invalid with other hugepage options\n"); 334db6297b5SJim Harris free_args(args, argcount); 335db6297b5SJim Harris return -1; 336db6297b5SJim Harris } 337a6658c54SSarvesh Lanke 338a6658c54SSarvesh Lanke if (opts->mem_size < 0) { 339a6658c54SSarvesh Lanke fprintf(stderr, 340a6658c54SSarvesh Lanke "Disabling hugepages requires specifying how much memory " 341a6658c54SSarvesh Lanke "will be allocated using -s parameter\n"); 342a6658c54SSarvesh Lanke free_args(args, argcount); 343a6658c54SSarvesh Lanke return -1; 344a6658c54SSarvesh Lanke } 345a6658c54SSarvesh Lanke 346a6658c54SSarvesh Lanke /* iova-mode=pa is incompatible with no_huge */ 347a6658c54SSarvesh Lanke if (opts->iova_mode && 348a6658c54SSarvesh Lanke (strcmp(opts->iova_mode, "pa") == 0)) { 349a6658c54SSarvesh Lanke fprintf(stderr, "iova-mode=pa is incompatible with specified " 350a6658c54SSarvesh Lanke "no-huge parameter\n"); 351a6658c54SSarvesh Lanke free_args(args, argcount); 352a6658c54SSarvesh Lanke return -1; 353a6658c54SSarvesh Lanke } 354a6658c54SSarvesh Lanke 355a6658c54SSarvesh Lanke args = push_arg(args, &argcount, _sprintf_alloc("--no-huge")); 356a6658c54SSarvesh Lanke args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=va")); 357a6658c54SSarvesh Lanke 358db6297b5SJim Harris } else { 359aa8e7002SDariusz Stojaczyk /* create just one hugetlbfs file */ 360aa8e7002SDariusz Stojaczyk if (opts->hugepage_single_segments) { 3613456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--single-file-segments")); 362aa8e7002SDariusz Stojaczyk if (args == NULL) { 363aa8e7002SDariusz Stojaczyk return -1; 364aa8e7002SDariusz Stojaczyk } 365aa8e7002SDariusz Stojaczyk } 366aa8e7002SDariusz Stojaczyk 36701831056SChangpeng Liu /* unlink hugepages after initialization */ 368c833f6aaSJim Harris /* Note: Automatically unlink hugepage when shm_id < 0, since it means we're not using 369c833f6aaSJim Harris * multi-process so we don't need the hugepage links anymore. But we need to make sure 370c833f6aaSJim Harris * we don't specify --huge-unlink implicitly if --single-file-segments was specified since 371c833f6aaSJim Harris * DPDK doesn't support that. 372c833f6aaSJim Harris */ 373c833f6aaSJim Harris if (opts->unlink_hugepage || 374c833f6aaSJim Harris (opts->shm_id < 0 && !opts->hugepage_single_segments)) { 3753456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--huge-unlink")); 37601831056SChangpeng Liu if (args == NULL) { 37701831056SChangpeng Liu return -1; 37801831056SChangpeng Liu } 37901831056SChangpeng Liu } 38001831056SChangpeng Liu 3813e75e90aSDarek Stojaczyk /* use a specific hugetlbfs mount */ 3823e75e90aSDarek Stojaczyk if (opts->hugedir) { 3833456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--huge-dir=%s", opts->hugedir)); 3843e75e90aSDarek Stojaczyk if (args == NULL) { 3853e75e90aSDarek Stojaczyk return -1; 3863e75e90aSDarek Stojaczyk } 3873e75e90aSDarek Stojaczyk } 388db6297b5SJim Harris } 3893e75e90aSDarek Stojaczyk 390d546e3d9SYoung Tack Jin if (opts->num_pci_addr) { 391d546e3d9SYoung Tack Jin size_t i; 392d546e3d9SYoung Tack Jin char bdf[32]; 393d546e3d9SYoung Tack Jin struct spdk_pci_addr *pci_addr = 3944a6a2824SJim Harris opts->pci_blocked ? opts->pci_blocked : opts->pci_allowed; 395d546e3d9SYoung Tack Jin 396d546e3d9SYoung Tack Jin for (i = 0; i < opts->num_pci_addr; i++) { 397d546e3d9SYoung Tack Jin spdk_pci_addr_fmt(bdf, 32, &pci_addr[i]); 3983456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("%s=%s", 3994a6a2824SJim Harris (opts->pci_blocked ? DPDK_BLOCK_PARAM : DPDK_ALLOW_PARAM), 4006082e756SShuhei Matsumoto bdf)); 401d546e3d9SYoung Tack Jin if (args == NULL) { 402d546e3d9SYoung Tack Jin return -1; 403d546e3d9SYoung Tack Jin } 404d546e3d9SYoung Tack Jin } 405d546e3d9SYoung Tack Jin } 406d546e3d9SYoung Tack Jin 407441ad56eSTomasz Zawadzki /* Disable DPDK telemetry information by default, can be modified with env_context. 408441ad56eSTomasz Zawadzki * Prevents creation of dpdk_telemetry socket and additional pthread for it. 409441ad56eSTomasz Zawadzki */ 410441ad56eSTomasz Zawadzki args = push_arg(args, &argcount, _sprintf_alloc("--no-telemetry")); 411441ad56eSTomasz Zawadzki if (args == NULL) { 412441ad56eSTomasz Zawadzki return -1; 413441ad56eSTomasz Zawadzki } 414441ad56eSTomasz Zawadzki 415b42cf6eaSDarek Stojaczyk /* Lower default EAL loglevel to RTE_LOG_NOTICE - normal, but significant messages. 416b42cf6eaSDarek Stojaczyk * This can be overridden by specifying the same option in opts->env_context 417b42cf6eaSDarek Stojaczyk */ 4183456377bSSeth Howell args = push_arg(args, &argcount, strdup("--log-level=lib.eal:6")); 419b42cf6eaSDarek Stojaczyk if (args == NULL) { 420b42cf6eaSDarek Stojaczyk return -1; 421b42cf6eaSDarek Stojaczyk } 422b42cf6eaSDarek Stojaczyk 4233daf1f00SJim Harris /* Lower default CRYPTO loglevel to RTE_LOG_WARNING to avoid a ton of init msgs. 4248ac86b24Spaul luse * This can be overridden by specifying the same option in opts->env_context 4258ac86b24Spaul luse */ 4263456377bSSeth Howell args = push_arg(args, &argcount, strdup("--log-level=lib.cryptodev:5")); 4278ac86b24Spaul luse if (args == NULL) { 4288ac86b24Spaul luse return -1; 4298ac86b24Spaul luse } 4308ac86b24Spaul luse 4313daf1f00SJim Harris /* Lower default POWER loglevel to RTE_LOG_WARNING to avoid a ton of init msgs. 4323daf1f00SJim Harris * This can be overridden by specifying the same option in opts->env_context 4333daf1f00SJim Harris */ 4343daf1f00SJim Harris args = push_arg(args, &argcount, strdup("--log-level=lib.power:5")); 4353daf1f00SJim Harris if (args == NULL) { 4363daf1f00SJim Harris return -1; 4373daf1f00SJim Harris } 4383daf1f00SJim Harris 439e560d53cSDariusz Stojaczyk /* `user1` log type is used by rte_vhost, which prints an INFO log for each received 440e560d53cSDariusz Stojaczyk * vhost user message. We don't want that. The same log type is also used by a couple 441e560d53cSDariusz Stojaczyk * of other DPDK libs, but none of which we make use right now. If necessary, this can 442e560d53cSDariusz Stojaczyk * be overridden via opts->env_context. 443e560d53cSDariusz Stojaczyk */ 4443456377bSSeth Howell args = push_arg(args, &argcount, strdup("--log-level=user1:6")); 445e560d53cSDariusz Stojaczyk if (args == NULL) { 446e560d53cSDariusz Stojaczyk return -1; 447e560d53cSDariusz Stojaczyk } 448e560d53cSDariusz Stojaczyk 44918d26e42SBen Walker #ifdef __linux__ 45007ca0221SBen Walker 451eb76afe7SJin Yu if (opts->iova_mode) { 452a6658c54SSarvesh Lanke /* iova-mode=pa is incompatible with no_huge */ 453eb76afe7SJin Yu args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=%s", opts->iova_mode)); 454eb76afe7SJin Yu if (args == NULL) { 455eb76afe7SJin Yu return -1; 456eb76afe7SJin Yu } 457eb76afe7SJin Yu } else { 458a9c79c33SRichael Zhuang /* When using vfio with enable_unsafe_noiommu_mode=Y, we need iova-mode=pa, 459a9c79c33SRichael Zhuang * but DPDK guesses it should be iova-mode=va. Add a check and force 460a9c79c33SRichael Zhuang * iova-mode=pa here. */ 461a6658c54SSarvesh Lanke if (!no_huge && rte_vfio_noiommu_is_enabled()) { 4623456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); 463a9c79c33SRichael Zhuang if (args == NULL) { 464a9c79c33SRichael Zhuang return -1; 465a9c79c33SRichael Zhuang } 466a9c79c33SRichael Zhuang } 467a9c79c33SRichael Zhuang 46897b0f773SBen Walker #if defined(__x86_64__) 46997b0f773SBen Walker /* DPDK by default guesses that it should be using iova-mode=va so that it can 47097b0f773SBen Walker * support running as an unprivileged user. However, some systems (especially 47197b0f773SBen Walker * virtual machines) don't have an IOMMU capable of handling the full virtual 47297b0f773SBen Walker * address space and DPDK doesn't currently catch that. Add a check in SPDK 47397b0f773SBen Walker * and force iova-mode=pa here. */ 474a6658c54SSarvesh Lanke if (!no_huge && get_iommu_width() < SPDK_IOMMU_VA_REQUIRED_WIDTH) { 4753456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); 47697b0f773SBen Walker if (args == NULL) { 47797b0f773SBen Walker return -1; 47897b0f773SBen Walker } 47997b0f773SBen Walker } 48097b0f773SBen Walker #elif defined(__PPC64__) 48107ca0221SBen Walker /* On Linux + PowerPC, DPDK doesn't support VA mode at all. Unfortunately, it doesn't correctly 48207ca0221SBen Walker * auto-detect at the moment, so we'll just force it here. */ 4833456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); 48407ca0221SBen Walker if (args == NULL) { 48507ca0221SBen Walker return -1; 48607ca0221SBen Walker } 48707ca0221SBen Walker #endif 488eb76afe7SJin Yu } 48907ca0221SBen Walker 49097b0f773SBen Walker 491161af0b5SDarek Stojaczyk /* Set the base virtual address - it must be an address that is not in the 492161af0b5SDarek Stojaczyk * ASAN shadow region, otherwise ASAN-enabled builds will ignore the 493161af0b5SDarek Stojaczyk * mmap hint. 494161af0b5SDarek Stojaczyk * 495161af0b5SDarek Stojaczyk * Ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm 496161af0b5SDarek Stojaczyk */ 497b3767a23SJacek Kalwas args = push_arg(args, &argcount, _sprintf_alloc("--base-virtaddr=0x%" PRIx64, opts->base_virtaddr)); 498161af0b5SDarek Stojaczyk if (args == NULL) { 499161af0b5SDarek Stojaczyk return -1; 500161af0b5SDarek Stojaczyk } 501161af0b5SDarek Stojaczyk 50200986873SSeth Howell /* --match-allocation prevents DPDK from merging or splitting system memory allocations under the hood. 50300986873SSeth Howell * This is critical for RDMA when attempting to use an rte_mempool based buffer pool. If DPDK merges two 50400986873SSeth Howell * physically or IOVA contiguous memory regions, then when we go to allocate a buffer pool, it can split 50500986873SSeth Howell * the memory for a buffer over two allocations meaning the buffer will be split over a memory region. 50600986873SSeth Howell */ 507a6658c54SSarvesh Lanke 508a6658c54SSarvesh Lanke /* --no-huge is incompatible with --match-allocations 509a6658c54SSarvesh Lanke * Ref: https://doc.dpdk.org/guides/prog_guide/env_abstraction_layer.html#hugepage-allocation-matching 510a6658c54SSarvesh Lanke */ 511a6658c54SSarvesh Lanke if (!no_huge && 512a6658c54SSarvesh Lanke (!opts->env_context || strstr(opts->env_context, "--legacy-mem") == NULL)) { 5133456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("%s", "--match-allocations")); 51400986873SSeth Howell if (args == NULL) { 51500986873SSeth Howell return -1; 51600986873SSeth Howell } 5173c4199d6SShuhei Matsumoto } 51800986873SSeth Howell 51918d26e42SBen Walker if (opts->shm_id < 0) { 5203456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk_pid%d", 52118d26e42SBen Walker getpid())); 52218d26e42SBen Walker if (args == NULL) { 52318d26e42SBen Walker return -1; 52418d26e42SBen Walker } 52518d26e42SBen Walker } else { 5263456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk%d", 52718d26e42SBen Walker opts->shm_id)); 52818d26e42SBen Walker if (args == NULL) { 52918d26e42SBen Walker return -1; 53018d26e42SBen Walker } 53118d26e42SBen Walker 53218d26e42SBen Walker /* set the process type */ 5333456377bSSeth Howell args = push_arg(args, &argcount, _sprintf_alloc("--proc-type=auto")); 53418d26e42SBen Walker if (args == NULL) { 53518d26e42SBen Walker return -1; 53618d26e42SBen Walker } 5372f10ea11SDaniel Verkamp } 53886431df1SJun Zeng 53986431df1SJun Zeng /* --vfio-vf-token used for VF initialized by vfio_pci driver. */ 54086431df1SJun Zeng if (opts->vf_token) { 54186431df1SJun Zeng args = push_arg(args, &argcount, _sprintf_alloc("--vfio-vf-token=%s", 54286431df1SJun Zeng opts->vf_token)); 54386431df1SJun Zeng if (args == NULL) { 54486431df1SJun Zeng return -1; 54586431df1SJun Zeng } 54686431df1SJun Zeng } 54718d26e42SBen Walker #endif 54818d26e42SBen Walker 549687da749SJim Harris if (opts->env_context) { 550*b37db069SXuQi char *sp = NULL; 551687da749SJim Harris char *ptr = strdup(opts->env_context); 552*b37db069SXuQi char *tok = strtok_r(ptr, " \t", &sp); 553687da749SJim Harris 554687da749SJim Harris /* DPDK expects each argument as a separate string in the argv 555687da749SJim Harris * array, so we need to tokenize here in case the caller 556687da749SJim Harris * passed multiple arguments in the env_context string. 557687da749SJim Harris */ 558687da749SJim Harris while (tok != NULL) { 559687da749SJim Harris args = push_arg(args, &argcount, strdup(tok)); 560*b37db069SXuQi tok = strtok_r(NULL, " \t", &sp); 561687da749SJim Harris } 562687da749SJim Harris 563687da749SJim Harris free(ptr); 564687da749SJim Harris } 565687da749SJim Harris 566d631b855SJim Harris g_eal_cmdline = args; 567d631b855SJim Harris g_eal_cmdline_argcount = argcount; 56818d26e42SBen Walker return argcount; 56918d26e42SBen Walker } 57018d26e42SBen Walker 57181551144SXiaodong Liu int 572396c445cSJim Harris spdk_env_dpdk_post_init(bool legacy_mem) 57381551144SXiaodong Liu { 57437c0a02eSJim Harris int rc; 57537c0a02eSJim Harris 57652c674d2SJim Harris rc = pci_env_init(); 57752c674d2SJim Harris if (rc < 0) { 57852c674d2SJim Harris SPDK_ERRLOG("pci_env_init() failed\n"); 57952c674d2SJim Harris return rc; 58052c674d2SJim Harris } 58181551144SXiaodong Liu 58215d0ae62SSeth Howell rc = mem_map_init(legacy_mem); 58337c0a02eSJim Harris if (rc < 0) { 584d8190d02SVitaliy Mysak SPDK_ERRLOG("Failed to allocate mem_map\n"); 58537c0a02eSJim Harris return rc; 58681551144SXiaodong Liu } 58737c0a02eSJim Harris 58815d0ae62SSeth Howell rc = vtophys_init(); 58937c0a02eSJim Harris if (rc < 0) { 590d8190d02SVitaliy Mysak SPDK_ERRLOG("Failed to initialize vtophys\n"); 59137c0a02eSJim Harris return rc; 59281551144SXiaodong Liu } 59381551144SXiaodong Liu 59481551144SXiaodong Liu return 0; 59581551144SXiaodong Liu } 59681551144SXiaodong Liu 597f373369aSDarek Stojaczyk void 598f373369aSDarek Stojaczyk spdk_env_dpdk_post_fini(void) 599f373369aSDarek Stojaczyk { 6007b8964c5STomasz Zawadzki pci_env_fini(); 601fb51565aSDarek Stojaczyk 6023456377bSSeth Howell free_args(g_eal_cmdline, g_eal_cmdline_argcount); 603ef7c128aSXiaodong Liu g_eal_cmdline = NULL; 604ef7c128aSXiaodong Liu g_eal_cmdline_argcount = 0; 605f373369aSDarek Stojaczyk } 606f373369aSDarek Stojaczyk 60757fd99b9SJim Harris static void 60857fd99b9SJim Harris env_copy_opts(struct spdk_env_opts *opts, const struct spdk_env_opts *opts_user, 60957fd99b9SJim Harris size_t user_opts_size) 61018d26e42SBen Walker { 61157fd99b9SJim Harris opts->opts_size = sizeof(*opts); 61257fd99b9SJim Harris spdk_env_opts_init(opts); 61357fd99b9SJim Harris memcpy(opts, opts_user, offsetof(struct spdk_env_opts, opts_size)); 61457fd99b9SJim Harris 61557fd99b9SJim Harris #define SET_FIELD(field) \ 61657fd99b9SJim Harris if (offsetof(struct spdk_env_opts, field) + sizeof(opts->field) <= user_opts_size) { \ 61757fd99b9SJim Harris opts->field = opts_user->field; \ 61857fd99b9SJim Harris } 61957fd99b9SJim Harris 62041ff6dceSJim Harris SET_FIELD(enforce_numa); 62141ff6dceSJim Harris 62257fd99b9SJim Harris #undef SET_FIELD 62357fd99b9SJim Harris } 62457fd99b9SJim Harris 62557fd99b9SJim Harris int 62657fd99b9SJim Harris spdk_env_init(const struct spdk_env_opts *opts_user) 62757fd99b9SJim Harris { 62857fd99b9SJim Harris struct spdk_env_opts opts_local = {}; 62957fd99b9SJim Harris struct spdk_env_opts *opts = &opts_local; 63018d26e42SBen Walker char **dpdk_args = NULL; 63141c16a6dSTomasz Zawadzki char *args_print = NULL, *args_tmp = NULL; 6329e7217abSKrzysztof Karas OPENSSL_INIT_SETTINGS *settings; 633cc3c7906SPawel Wodkowski int i, rc; 6349f237eacSDaniel Verkamp int orig_optind; 635396c445cSJim Harris bool legacy_mem; 63657fd99b9SJim Harris size_t min_opts_size, user_opts_size; 63718d26e42SBen Walker 638ef7c128aSXiaodong Liu /* If SPDK env has been initialized before, then only pci env requires 639ef7c128aSXiaodong Liu * reinitialization. 640ef7c128aSXiaodong Liu */ 641ef7c128aSXiaodong Liu if (g_external_init == false) { 64257fd99b9SJim Harris if (opts_user != NULL) { 643ef7c128aSXiaodong Liu fprintf(stderr, "Invalid arguments to reinitialize SPDK env\n"); 644ef7c128aSXiaodong Liu return -EINVAL; 645ef7c128aSXiaodong Liu } 646ef7c128aSXiaodong Liu 647ef7c128aSXiaodong Liu printf("Starting %s / %s reinitialization...\n", SPDK_VERSION_STRING, rte_version()); 648ef7c128aSXiaodong Liu pci_env_reinit(); 649ef7c128aSXiaodong Liu 650ef7c128aSXiaodong Liu return 0; 651ef7c128aSXiaodong Liu } 652ef7c128aSXiaodong Liu 65357fd99b9SJim Harris if (opts_user == NULL) { 654ef7c128aSXiaodong Liu fprintf(stderr, "NULL arguments to initialize DPDK\n"); 655ef7c128aSXiaodong Liu return -EINVAL; 656ef7c128aSXiaodong Liu } 657ef7c128aSXiaodong Liu 65857fd99b9SJim Harris min_opts_size = offsetof(struct spdk_env_opts, opts_size) + sizeof(opts->opts_size); 65957fd99b9SJim Harris user_opts_size = opts_user->opts_size; 66057fd99b9SJim Harris if (user_opts_size < min_opts_size) { 66157fd99b9SJim Harris fprintf(stderr, "Invalid opts->opts_size %d too small, please set opts_size correctly\n", 66257fd99b9SJim Harris (int)opts_user->opts_size); 66357fd99b9SJim Harris user_opts_size = min_opts_size; 66457fd99b9SJim Harris } 66557fd99b9SJim Harris 66657fd99b9SJim Harris env_copy_opts(opts, opts_user, user_opts_size); 66757fd99b9SJim Harris 6689e7217abSKrzysztof Karas settings = OPENSSL_INIT_new(); 6699e7217abSKrzysztof Karas if (!settings) { 6709e7217abSKrzysztof Karas fprintf(stderr, "Failed to create openssl settings object\n"); 6719e7217abSKrzysztof Karas ERR_print_errors_fp(stderr); 6729e7217abSKrzysztof Karas return -ENOMEM; 6739e7217abSKrzysztof Karas } 6749e7217abSKrzysztof Karas 675387dbedcSNathan Claudel #if OPENSSL_VERSION_NUMBER >= 0x30000000 /* OPENSSL 3.0.0 */ 6769e7217abSKrzysztof Karas OPENSSL_INIT_set_config_file_flags(settings, 0); 677387dbedcSNathan Claudel #endif 6789e7217abSKrzysztof Karas rc = OPENSSL_init_ssl(OPENSSL_INIT_LOAD_CONFIG, settings); 6799e7217abSKrzysztof Karas if (rc != 1) { 6809e7217abSKrzysztof Karas fprintf(stderr, "Failed to initialize OpenSSL\n"); 6819e7217abSKrzysztof Karas ERR_print_errors_fp(stderr); 6829e7217abSKrzysztof Karas return -EINVAL; 6839e7217abSKrzysztof Karas } 6849e7217abSKrzysztof Karas OPENSSL_INIT_free(settings); 6859e7217abSKrzysztof Karas 6863456377bSSeth Howell rc = build_eal_cmdline(opts); 687cc3c7906SPawel Wodkowski if (rc < 0) { 688d8190d02SVitaliy Mysak SPDK_ERRLOG("Invalid arguments to initialize DPDK\n"); 68937c0a02eSJim Harris return -EINVAL; 69018d26e42SBen Walker } 69118d26e42SBen Walker 692d8190d02SVitaliy Mysak SPDK_PRINTF("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); 69341c16a6dSTomasz Zawadzki 69441c16a6dSTomasz Zawadzki args_print = _sprintf_alloc("[ DPDK EAL parameters: "); 69541c16a6dSTomasz Zawadzki if (args_print == NULL) { 69641c16a6dSTomasz Zawadzki return -ENOMEM; 69718d26e42SBen Walker } 69841c16a6dSTomasz Zawadzki for (i = 0; i < g_eal_cmdline_argcount; i++) { 69941c16a6dSTomasz Zawadzki args_tmp = args_print; 70041c16a6dSTomasz Zawadzki args_print = _sprintf_alloc("%s%s ", args_tmp, g_eal_cmdline[i]); 70141c16a6dSTomasz Zawadzki if (args_print == NULL) { 70241c16a6dSTomasz Zawadzki free(args_tmp); 70341c16a6dSTomasz Zawadzki return -ENOMEM; 70441c16a6dSTomasz Zawadzki } 70541c16a6dSTomasz Zawadzki free(args_tmp); 70641c16a6dSTomasz Zawadzki } 70741c16a6dSTomasz Zawadzki SPDK_PRINTF("%s]\n", args_print); 70841c16a6dSTomasz Zawadzki free(args_print); 70918d26e42SBen Walker 71018d26e42SBen Walker /* DPDK rearranges the array we pass to it, so make a copy 71118d26e42SBen Walker * before passing so we can still free the individual strings 71218d26e42SBen Walker * correctly. 71318d26e42SBen Walker */ 714d631b855SJim Harris dpdk_args = calloc(g_eal_cmdline_argcount, sizeof(char *)); 715118c0815SCunyin Chang if (dpdk_args == NULL) { 716d8190d02SVitaliy Mysak SPDK_ERRLOG("Failed to allocate dpdk_args\n"); 71737c0a02eSJim Harris return -ENOMEM; 718118c0815SCunyin Chang } 719d631b855SJim Harris memcpy(dpdk_args, g_eal_cmdline, sizeof(char *) * g_eal_cmdline_argcount); 72018d26e42SBen Walker 72118d26e42SBen Walker fflush(stdout); 7229f237eacSDaniel Verkamp orig_optind = optind; 7239f237eacSDaniel Verkamp optind = 1; 724d631b855SJim Harris rc = rte_eal_init(g_eal_cmdline_argcount, dpdk_args); 7259f237eacSDaniel Verkamp optind = orig_optind; 72618d26e42SBen Walker 72718d26e42SBen Walker free(dpdk_args); 72818d26e42SBen Walker 72918d26e42SBen Walker if (rc < 0) { 730570d89a2SJim Harris if (rte_errno == EALREADY) { 731d8190d02SVitaliy Mysak SPDK_ERRLOG("DPDK already initialized\n"); 732570d89a2SJim Harris } else { 733d8190d02SVitaliy Mysak SPDK_ERRLOG("Failed to initialize DPDK\n"); 734570d89a2SJim Harris } 73537c0a02eSJim Harris return -rte_errno; 73618d26e42SBen Walker } 737b49de91eSDaniel Verkamp 738396c445cSJim Harris legacy_mem = false; 739396c445cSJim Harris if (opts->env_context && strstr(opts->env_context, "--legacy-mem") != NULL) { 740396c445cSJim Harris legacy_mem = true; 741396c445cSJim Harris } 742396c445cSJim Harris 74330ff3042SXiaodong Liu rc = spdk_env_dpdk_post_init(legacy_mem); 74430ff3042SXiaodong Liu if (rc == 0) { 74530ff3042SXiaodong Liu g_external_init = false; 74630ff3042SXiaodong Liu } 74730ff3042SXiaodong Liu 74830ff3042SXiaodong Liu return rc; 74918d26e42SBen Walker } 750725f9de3SJim Harris 751af6fd29fSJim Harris /* We use priority 101 which is the highest priority level available 752af6fd29fSJim Harris * to applications (the toolchains reserve 1 to 100 for internal usage). 753af6fd29fSJim Harris * This ensures this destructor runs last, after any other destructors 754af6fd29fSJim Harris * that might still need the environment up and running. 755af6fd29fSJim Harris */ 756af6fd29fSJim Harris __attribute__((destructor(101))) static void 7578b81801eSJim Harris dpdk_cleanup(void) 7588b81801eSJim Harris { 7598b81801eSJim Harris /* Only call rte_eal_cleanup if the SPDK env library called rte_eal_init. */ 7608b81801eSJim Harris if (!g_external_init) { 7618b81801eSJim Harris rte_eal_cleanup(); 7628b81801eSJim Harris } 7638b81801eSJim Harris } 7648b81801eSJim Harris 765f373369aSDarek Stojaczyk void 766f373369aSDarek Stojaczyk spdk_env_fini(void) 767f373369aSDarek Stojaczyk { 768f373369aSDarek Stojaczyk spdk_env_dpdk_post_fini(); 769f373369aSDarek Stojaczyk } 770f373369aSDarek Stojaczyk 771725f9de3SJim Harris bool 772725f9de3SJim Harris spdk_env_dpdk_external_init(void) 773725f9de3SJim Harris { 774725f9de3SJim Harris return g_external_init; 775725f9de3SJim Harris } 776