1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "env_internal.h" 37 38 #include "spdk/version.h" 39 #include "spdk/env_dpdk.h" 40 41 #include <rte_config.h> 42 #include <rte_eal.h> 43 #include <rte_errno.h> 44 45 #define SPDK_ENV_DPDK_DEFAULT_NAME "spdk" 46 #define SPDK_ENV_DPDK_DEFAULT_SHM_ID -1 47 #define SPDK_ENV_DPDK_DEFAULT_MEM_SIZE -1 48 #define SPDK_ENV_DPDK_DEFAULT_MASTER_CORE -1 49 #define SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL -1 50 #define SPDK_ENV_DPDK_DEFAULT_CORE_MASK "0x1" 51 52 static char **g_eal_cmdline; 53 static int g_eal_cmdline_argcount; 54 static bool g_external_init = true; 55 56 static char * 57 _sprintf_alloc(const char *format, ...) 58 { 59 va_list args; 60 va_list args_copy; 61 char *buf; 62 size_t bufsize; 63 int rc; 64 65 va_start(args, format); 66 67 /* Try with a small buffer first. */ 68 bufsize = 32; 69 70 /* Limit maximum buffer size to something reasonable so we don't loop forever. */ 71 while (bufsize <= 1024 * 1024) { 72 buf = malloc(bufsize); 73 if (buf == NULL) { 74 va_end(args); 75 return NULL; 76 } 77 78 va_copy(args_copy, args); 79 rc = vsnprintf(buf, bufsize, format, args_copy); 80 va_end(args_copy); 81 82 /* 83 * If vsnprintf() returned a count within our current buffer size, we are done. 84 * The count does not include the \0 terminator, so rc == bufsize is not OK. 85 */ 86 if (rc >= 0 && (size_t)rc < bufsize) { 87 va_end(args); 88 return buf; 89 } 90 91 /* 92 * vsnprintf() should return the required space, but some libc versions do not 93 * implement this correctly, so just double the buffer size and try again. 94 * 95 * We don't need the data in buf, so rather than realloc(), use free() and malloc() 96 * again to avoid a copy. 97 */ 98 free(buf); 99 bufsize *= 2; 100 } 101 102 va_end(args); 103 return NULL; 104 } 105 106 static void 107 spdk_env_unlink_shared_files(void) 108 { 109 /* Starting with DPDK 18.05, there are more files with unpredictable paths 110 * and filenames. The --no-shconf option prevents from creating them, but 111 * only for DPDK 18.08+. For DPDK 18.05 we just leave them be. 112 */ 113 #if RTE_VERSION < RTE_VERSION_NUM(18, 05, 0, 0) 114 char buffer[PATH_MAX]; 115 116 snprintf(buffer, PATH_MAX, "/var/run/.spdk_pid%d_hugepage_info", getpid()); 117 if (unlink(buffer)) { 118 fprintf(stderr, "Unable to unlink shared memory file: %s. Error code: %d\n", buffer, errno); 119 } 120 #endif 121 } 122 123 void 124 spdk_env_opts_init(struct spdk_env_opts *opts) 125 { 126 if (!opts) { 127 return; 128 } 129 130 memset(opts, 0, sizeof(*opts)); 131 132 opts->name = SPDK_ENV_DPDK_DEFAULT_NAME; 133 opts->core_mask = SPDK_ENV_DPDK_DEFAULT_CORE_MASK; 134 opts->shm_id = SPDK_ENV_DPDK_DEFAULT_SHM_ID; 135 opts->mem_size = SPDK_ENV_DPDK_DEFAULT_MEM_SIZE; 136 opts->master_core = SPDK_ENV_DPDK_DEFAULT_MASTER_CORE; 137 opts->mem_channel = SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL; 138 } 139 140 static void 141 spdk_free_args(char **args, int argcount) 142 { 143 int i; 144 145 for (i = 0; i < argcount; i++) { 146 free(args[i]); 147 } 148 149 if (argcount) { 150 free(args); 151 } 152 } 153 154 static char ** 155 spdk_push_arg(char *args[], int *argcount, char *arg) 156 { 157 char **tmp; 158 159 if (arg == NULL) { 160 fprintf(stderr, "%s: NULL arg supplied\n", __func__); 161 spdk_free_args(args, *argcount); 162 return NULL; 163 } 164 165 tmp = realloc(args, sizeof(char *) * (*argcount + 1)); 166 if (tmp == NULL) { 167 free(arg); 168 spdk_free_args(args, *argcount); 169 return NULL; 170 } 171 172 tmp[*argcount] = arg; 173 (*argcount)++; 174 175 return tmp; 176 } 177 178 #if defined(__linux__) && defined(__x86_64__) 179 180 /* TODO: Can likely get this value from rlimits in the future */ 181 #define SPDK_IOMMU_VA_REQUIRED_WIDTH 48 182 #define VTD_CAP_MGAW_SHIFT 16 183 #define VTD_CAP_MGAW_MASK (0x3F << VTD_CAP_MGAW_SHIFT) 184 185 static int 186 spdk_get_iommu_width(void) 187 { 188 DIR *dir; 189 FILE *file; 190 struct dirent *entry; 191 char mgaw_path[64]; 192 char buf[64]; 193 char *end; 194 long long int val; 195 int width, tmp; 196 197 dir = opendir("/sys/devices/virtual/iommu/"); 198 if (dir == NULL) { 199 return -EINVAL; 200 } 201 202 width = 0; 203 204 while ((entry = readdir(dir)) != NULL) { 205 /* Find directories named "dmar0", "dmar1", etc */ 206 if (strncmp(entry->d_name, "dmar", sizeof("dmar") - 1) != 0) { 207 continue; 208 } 209 210 tmp = snprintf(mgaw_path, sizeof(mgaw_path), "/sys/devices/virtual/iommu/%s/intel-iommu/cap", 211 entry->d_name); 212 if ((unsigned)tmp >= sizeof(mgaw_path)) { 213 continue; 214 } 215 216 file = fopen(mgaw_path, "r"); 217 if (file == NULL) { 218 continue; 219 } 220 221 if (fgets(buf, sizeof(buf), file) == NULL) { 222 fclose(file); 223 continue; 224 } 225 226 val = strtoll(buf, &end, 16); 227 if (val == LLONG_MIN || val == LLONG_MAX) { 228 fclose(file); 229 continue; 230 } 231 232 tmp = ((val & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; 233 if (width == 0 || tmp < width) { 234 width = tmp; 235 } 236 237 fclose(file); 238 } 239 240 closedir(dir); 241 242 return width; 243 } 244 245 #endif 246 247 static int 248 spdk_build_eal_cmdline(const struct spdk_env_opts *opts) 249 { 250 int argcount = 0; 251 char **args; 252 253 args = NULL; 254 255 /* set the program name */ 256 args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", opts->name)); 257 if (args == NULL) { 258 return -1; 259 } 260 261 /* disable shared configuration files when in single process mode. This allows for cleaner shutdown */ 262 if (opts->shm_id < 0) { 263 args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", "--no-shconf")); 264 if (args == NULL) { 265 return -1; 266 } 267 } 268 269 /* set the coremask */ 270 /* NOTE: If coremask starts with '[' and ends with ']' it is a core list 271 */ 272 if (opts->core_mask[0] == '[') { 273 char *l_arg = _sprintf_alloc("-l %s", opts->core_mask + 1); 274 275 if (l_arg != NULL) { 276 int len = strlen(l_arg); 277 278 if (l_arg[len - 1] == ']') { 279 l_arg[len - 1] = '\0'; 280 } 281 } 282 args = spdk_push_arg(args, &argcount, l_arg); 283 } else { 284 args = spdk_push_arg(args, &argcount, _sprintf_alloc("-c %s", opts->core_mask)); 285 } 286 287 if (args == NULL) { 288 return -1; 289 } 290 291 /* set the memory channel number */ 292 if (opts->mem_channel > 0) { 293 args = spdk_push_arg(args, &argcount, _sprintf_alloc("-n %d", opts->mem_channel)); 294 if (args == NULL) { 295 return -1; 296 } 297 } 298 299 /* set the memory size */ 300 if (opts->mem_size >= 0) { 301 args = spdk_push_arg(args, &argcount, _sprintf_alloc("-m %d", opts->mem_size)); 302 if (args == NULL) { 303 return -1; 304 } 305 } 306 307 /* set the master core */ 308 if (opts->master_core > 0) { 309 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--master-lcore=%d", 310 opts->master_core)); 311 if (args == NULL) { 312 return -1; 313 } 314 } 315 316 /* set no pci if enabled */ 317 if (opts->no_pci) { 318 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--no-pci")); 319 if (args == NULL) { 320 return -1; 321 } 322 } 323 324 /* create just one hugetlbfs file */ 325 if (opts->hugepage_single_segments) { 326 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--single-file-segments")); 327 if (args == NULL) { 328 return -1; 329 } 330 } 331 332 /* unlink hugepages after initialization */ 333 if (opts->unlink_hugepage) { 334 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--huge-unlink")); 335 if (args == NULL) { 336 return -1; 337 } 338 } 339 340 /* use a specific hugetlbfs mount */ 341 if (opts->hugedir) { 342 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--huge-dir=%s", opts->hugedir)); 343 if (args == NULL) { 344 return -1; 345 } 346 } 347 348 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) && RTE_VERSION < RTE_VERSION_NUM(18, 5, 1, 0) 349 /* Dynamic memory management is buggy in DPDK 18.05.0. Don't use it. */ 350 if (!opts->env_context || strstr(opts->env_context, "--legacy-mem") == NULL) { 351 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--legacy-mem")); 352 if (args == NULL) { 353 return -1; 354 } 355 } 356 #endif 357 358 if (opts->num_pci_addr) { 359 size_t i; 360 char bdf[32]; 361 struct spdk_pci_addr *pci_addr = 362 opts->pci_blacklist ? opts->pci_blacklist : opts->pci_whitelist; 363 364 for (i = 0; i < opts->num_pci_addr; i++) { 365 spdk_pci_addr_fmt(bdf, 32, &pci_addr[i]); 366 args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s=%s", 367 (opts->pci_blacklist ? "--pci-blacklist" : "--pci-whitelist"), 368 bdf)); 369 if (args == NULL) { 370 return -1; 371 } 372 } 373 } 374 375 /* The following log-level options are not understood by older DPDKs */ 376 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 377 /* Lower default EAL loglevel to RTE_LOG_NOTICE - normal, but significant messages. 378 * This can be overridden by specifying the same option in opts->env_context 379 */ 380 args = spdk_push_arg(args, &argcount, strdup("--log-level=lib.eal:6")); 381 if (args == NULL) { 382 return -1; 383 } 384 385 /* Lower default CRYPTO loglevel to RTE_LOG_ERR to avoid a ton of init msgs. 386 * This can be overridden by specifying the same option in opts->env_context 387 */ 388 args = spdk_push_arg(args, &argcount, strdup("--log-level=lib.cryptodev:5")); 389 if (args == NULL) { 390 return -1; 391 } 392 393 /* `user1` log type is used by rte_vhost, which prints an INFO log for each received 394 * vhost user message. We don't want that. The same log type is also used by a couple 395 * of other DPDK libs, but none of which we make use right now. If necessary, this can 396 * be overridden via opts->env_context. 397 */ 398 args = spdk_push_arg(args, &argcount, strdup("--log-level=user1:6")); 399 if (args == NULL) { 400 return -1; 401 } 402 #endif 403 404 if (opts->env_context) { 405 args = spdk_push_arg(args, &argcount, strdup(opts->env_context)); 406 if (args == NULL) { 407 return -1; 408 } 409 } 410 411 #ifdef __linux__ 412 413 #if defined(__x86_64__) 414 /* DPDK by default guesses that it should be using iova-mode=va so that it can 415 * support running as an unprivileged user. However, some systems (especially 416 * virtual machines) don't have an IOMMU capable of handling the full virtual 417 * address space and DPDK doesn't currently catch that. Add a check in SPDK 418 * and force iova-mode=pa here. */ 419 if (spdk_get_iommu_width() < SPDK_IOMMU_VA_REQUIRED_WIDTH) { 420 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); 421 if (args == NULL) { 422 return -1; 423 } 424 } 425 #elif defined(__PPC64__) 426 /* On Linux + PowerPC, DPDK doesn't support VA mode at all. Unfortunately, it doesn't correctly 427 * auto-detect at the moment, so we'll just force it here. */ 428 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); 429 if (args == NULL) { 430 return -1; 431 } 432 #endif 433 434 435 /* Set the base virtual address - it must be an address that is not in the 436 * ASAN shadow region, otherwise ASAN-enabled builds will ignore the 437 * mmap hint. 438 * 439 * Ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm 440 */ 441 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--base-virtaddr=0x200000000000")); 442 if (args == NULL) { 443 return -1; 444 } 445 446 /* --match-allocation prevents DPDK from merging or splitting system memory allocations under the hood. 447 * This is critical for RDMA when attempting to use an rte_mempool based buffer pool. If DPDK merges two 448 * physically or IOVA contiguous memory regions, then when we go to allocate a buffer pool, it can split 449 * the memory for a buffer over two allocations meaning the buffer will be split over a memory region. 450 */ 451 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0) 452 if (!opts->env_context || strstr(opts->env_context, "--legacy-mem") == NULL) { 453 args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", "--match-allocations")); 454 if (args == NULL) { 455 return -1; 456 } 457 } 458 #endif 459 460 if (opts->shm_id < 0) { 461 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk_pid%d", 462 getpid())); 463 if (args == NULL) { 464 return -1; 465 } 466 } else { 467 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk%d", 468 opts->shm_id)); 469 if (args == NULL) { 470 return -1; 471 } 472 473 /* set the process type */ 474 args = spdk_push_arg(args, &argcount, _sprintf_alloc("--proc-type=auto")); 475 if (args == NULL) { 476 return -1; 477 } 478 } 479 #endif 480 481 g_eal_cmdline = args; 482 g_eal_cmdline_argcount = argcount; 483 return argcount; 484 } 485 486 int 487 spdk_env_dpdk_post_init(bool legacy_mem) 488 { 489 int rc; 490 491 spdk_pci_init(); 492 493 rc = spdk_mem_map_init(legacy_mem); 494 if (rc < 0) { 495 fprintf(stderr, "Failed to allocate mem_map\n"); 496 return rc; 497 } 498 499 rc = spdk_vtophys_init(); 500 if (rc < 0) { 501 fprintf(stderr, "Failed to initialize vtophys\n"); 502 return rc; 503 } 504 505 return 0; 506 } 507 508 void 509 spdk_env_dpdk_post_fini(void) 510 { 511 spdk_pci_fini(); 512 513 spdk_free_args(g_eal_cmdline, g_eal_cmdline_argcount); 514 } 515 516 int 517 spdk_env_init(const struct spdk_env_opts *opts) 518 { 519 char **dpdk_args = NULL; 520 int i, rc; 521 int orig_optind; 522 bool legacy_mem; 523 524 g_external_init = false; 525 526 rc = spdk_build_eal_cmdline(opts); 527 if (rc < 0) { 528 fprintf(stderr, "Invalid arguments to initialize DPDK\n"); 529 return -EINVAL; 530 } 531 532 printf("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); 533 printf("[ DPDK EAL parameters: "); 534 for (i = 0; i < g_eal_cmdline_argcount; i++) { 535 printf("%s ", g_eal_cmdline[i]); 536 } 537 printf("]\n"); 538 539 /* DPDK rearranges the array we pass to it, so make a copy 540 * before passing so we can still free the individual strings 541 * correctly. 542 */ 543 dpdk_args = calloc(g_eal_cmdline_argcount, sizeof(char *)); 544 if (dpdk_args == NULL) { 545 fprintf(stderr, "Failed to allocate dpdk_args\n"); 546 return -ENOMEM; 547 } 548 memcpy(dpdk_args, g_eal_cmdline, sizeof(char *) * g_eal_cmdline_argcount); 549 550 fflush(stdout); 551 orig_optind = optind; 552 optind = 1; 553 rc = rte_eal_init(g_eal_cmdline_argcount, dpdk_args); 554 optind = orig_optind; 555 556 free(dpdk_args); 557 558 if (rc < 0) { 559 if (rte_errno == EALREADY) { 560 fprintf(stderr, "DPDK already initialized\n"); 561 } else { 562 fprintf(stderr, "Failed to initialize DPDK\n"); 563 } 564 return -rte_errno; 565 } 566 567 if (opts->shm_id < 0 && !opts->hugepage_single_segments) { 568 /* 569 * Unlink hugepage and config info files after init. This will ensure they get 570 * deleted on app exit, even if the app crashes and does not exit normally. 571 * Only do this when not in multi-process mode, since for multi-process other 572 * apps will need to open these files. These files are not created for 573 * "single file segments". 574 */ 575 spdk_env_unlink_shared_files(); 576 } 577 578 legacy_mem = false; 579 if (opts->env_context && strstr(opts->env_context, "--legacy-mem") != NULL) { 580 legacy_mem = true; 581 } 582 583 return spdk_env_dpdk_post_init(legacy_mem); 584 } 585 586 void 587 spdk_env_fini(void) 588 { 589 spdk_env_dpdk_post_fini(); 590 } 591 592 bool 593 spdk_env_dpdk_external_init(void) 594 { 595 return g_external_init; 596 } 597