xref: /spdk/lib/env_dpdk/init.c (revision ae7b5890ef728af40bd233a5011b924c482603bf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "env_internal.h"
37 
38 #include "spdk/version.h"
39 #include "spdk/env_dpdk.h"
40 
41 #include <rte_config.h>
42 #include <rte_eal.h>
43 
44 #define SPDK_ENV_DPDK_DEFAULT_NAME		"spdk"
45 #define SPDK_ENV_DPDK_DEFAULT_SHM_ID		-1
46 #define SPDK_ENV_DPDK_DEFAULT_MEM_SIZE		-1
47 #define SPDK_ENV_DPDK_DEFAULT_MASTER_CORE	-1
48 #define SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL	-1
49 #define SPDK_ENV_DPDK_DEFAULT_CORE_MASK		"0x1"
50 
51 static char **g_eal_cmdline;
52 static int g_eal_cmdline_argcount;
53 static bool g_external_init = true;
54 
55 static char *
56 _sprintf_alloc(const char *format, ...)
57 {
58 	va_list args;
59 	va_list args_copy;
60 	char *buf;
61 	size_t bufsize;
62 	int rc;
63 
64 	va_start(args, format);
65 
66 	/* Try with a small buffer first. */
67 	bufsize = 32;
68 
69 	/* Limit maximum buffer size to something reasonable so we don't loop forever. */
70 	while (bufsize <= 1024 * 1024) {
71 		buf = malloc(bufsize);
72 		if (buf == NULL) {
73 			va_end(args);
74 			return NULL;
75 		}
76 
77 		va_copy(args_copy, args);
78 		rc = vsnprintf(buf, bufsize, format, args_copy);
79 		va_end(args_copy);
80 
81 		/*
82 		 * If vsnprintf() returned a count within our current buffer size, we are done.
83 		 * The count does not include the \0 terminator, so rc == bufsize is not OK.
84 		 */
85 		if (rc >= 0 && (size_t)rc < bufsize) {
86 			va_end(args);
87 			return buf;
88 		}
89 
90 		/*
91 		 * vsnprintf() should return the required space, but some libc versions do not
92 		 * implement this correctly, so just double the buffer size and try again.
93 		 *
94 		 * We don't need the data in buf, so rather than realloc(), use free() and malloc()
95 		 * again to avoid a copy.
96 		 */
97 		free(buf);
98 		bufsize *= 2;
99 	}
100 
101 	va_end(args);
102 	return NULL;
103 }
104 
105 static void
106 spdk_env_unlink_shared_files(void)
107 {
108 	/* Starting with DPDK 18.05, there are more files with unpredictable paths
109 	 * and filenames. The --no-shconf option prevents from creating them, but
110 	 * only for DPDK 18.08+. For DPDK 18.05 we just leave them be.
111 	 */
112 #if RTE_VERSION < RTE_VERSION_NUM(18, 05, 0, 0)
113 	char buffer[PATH_MAX];
114 
115 	snprintf(buffer, PATH_MAX, "/var/run/.spdk_pid%d_hugepage_info", getpid());
116 	if (unlink(buffer)) {
117 		fprintf(stderr, "Unable to unlink shared memory file: %s. Error code: %d\n", buffer, errno);
118 	}
119 #endif
120 }
121 
122 void
123 spdk_env_opts_init(struct spdk_env_opts *opts)
124 {
125 	if (!opts) {
126 		return;
127 	}
128 
129 	memset(opts, 0, sizeof(*opts));
130 
131 	opts->name = SPDK_ENV_DPDK_DEFAULT_NAME;
132 	opts->core_mask = SPDK_ENV_DPDK_DEFAULT_CORE_MASK;
133 	opts->shm_id = SPDK_ENV_DPDK_DEFAULT_SHM_ID;
134 	opts->mem_size = SPDK_ENV_DPDK_DEFAULT_MEM_SIZE;
135 	opts->master_core = SPDK_ENV_DPDK_DEFAULT_MASTER_CORE;
136 	opts->mem_channel = SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL;
137 }
138 
139 static void
140 spdk_free_args(char **args, int argcount)
141 {
142 	int i;
143 
144 	for (i = 0; i < argcount; i++) {
145 		free(args[i]);
146 	}
147 
148 	if (argcount) {
149 		free(args);
150 	}
151 }
152 
153 static char **
154 spdk_push_arg(char *args[], int *argcount, char *arg)
155 {
156 	char **tmp;
157 
158 	if (arg == NULL) {
159 		fprintf(stderr, "%s: NULL arg supplied\n", __func__);
160 		spdk_free_args(args, *argcount);
161 		return NULL;
162 	}
163 
164 	tmp = realloc(args, sizeof(char *) * (*argcount + 1));
165 	if (tmp == NULL) {
166 		free(arg);
167 		spdk_free_args(args, *argcount);
168 		return NULL;
169 	}
170 
171 	tmp[*argcount] = arg;
172 	(*argcount)++;
173 
174 	return tmp;
175 }
176 
177 static int
178 spdk_build_eal_cmdline(const struct spdk_env_opts *opts)
179 {
180 	int argcount = 0;
181 	char **args;
182 
183 	args = NULL;
184 
185 	/* set the program name */
186 	args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", opts->name));
187 	if (args == NULL) {
188 		return -1;
189 	}
190 
191 	/* disable shared configuration files when in single process mode. This allows for cleaner shutdown */
192 	if (opts->shm_id < 0) {
193 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", "--no-shconf"));
194 		if (args == NULL) {
195 			return -1;
196 		}
197 	}
198 
199 	/* set the coremask */
200 	/* NOTE: If coremask starts with '[' and ends with ']' it is a core list
201 	 */
202 	if (opts->core_mask[0] == '[') {
203 		char *l_arg = _sprintf_alloc("-l %s", opts->core_mask + 1);
204 		int len = strlen(l_arg);
205 		if (l_arg[len - 1] == ']') {
206 			l_arg[len - 1] = '\0';
207 		}
208 		args = spdk_push_arg(args, &argcount, l_arg);
209 	} else {
210 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-c %s", opts->core_mask));
211 	}
212 
213 	if (args == NULL) {
214 		return -1;
215 	}
216 
217 	/* set the memory channel number */
218 	if (opts->mem_channel > 0) {
219 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-n %d", opts->mem_channel));
220 		if (args == NULL) {
221 			return -1;
222 		}
223 	}
224 
225 	/* set the memory size */
226 	if (opts->mem_size >= 0) {
227 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-m %d", opts->mem_size));
228 		if (args == NULL) {
229 			return -1;
230 		}
231 	}
232 
233 	/* set the master core */
234 	if (opts->master_core > 0) {
235 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--master-lcore=%d",
236 				     opts->master_core));
237 		if (args == NULL) {
238 			return -1;
239 		}
240 	}
241 
242 	/* set no pci  if enabled */
243 	if (opts->no_pci) {
244 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--no-pci"));
245 		if (args == NULL) {
246 			return -1;
247 		}
248 	}
249 
250 	/* create just one hugetlbfs file */
251 	if (opts->hugepage_single_segments) {
252 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--single-file-segments"));
253 		if (args == NULL) {
254 			return -1;
255 		}
256 	}
257 
258 	/* unlink hugepages after initialization */
259 	if (opts->unlink_hugepage) {
260 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--huge-unlink"));
261 		if (args == NULL) {
262 			return -1;
263 		}
264 	}
265 
266 	/* use a specific hugetlbfs mount */
267 	if (opts->hugedir) {
268 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--huge-dir=%s", opts->hugedir));
269 		if (args == NULL) {
270 			return -1;
271 		}
272 	}
273 
274 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) && RTE_VERSION < RTE_VERSION_NUM(18, 5, 1, 0)
275 	/* Dynamic memory management is buggy in DPDK 18.05.0. Don't use it. */
276 	if (!opts->env_context || strcmp(opts->env_context, "--legacy-mem") != 0) {
277 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--legacy-mem"));
278 		if (args == NULL) {
279 			return -1;
280 		}
281 	}
282 #endif
283 
284 	if (opts->num_pci_addr) {
285 		size_t i;
286 		char bdf[32];
287 		struct spdk_pci_addr *pci_addr =
288 				opts->pci_blacklist ? opts->pci_blacklist : opts->pci_whitelist;
289 
290 		for (i = 0; i < opts->num_pci_addr; i++) {
291 			spdk_pci_addr_fmt(bdf, 32, &pci_addr[i]);
292 			args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s=%s",
293 					     (opts->pci_blacklist ? "--pci-blacklist" : "--pci-whitelist"),
294 					     bdf));
295 			if (args == NULL) {
296 				return -1;
297 			}
298 		}
299 	}
300 
301 	/* Lower default EAL loglevel to RTE_LOG_NOTICE - normal, but significant messages.
302 	 * This can be overridden by specifying the same option in opts->env_context
303 	 */
304 	args = spdk_push_arg(args, &argcount, strdup("--log-level=lib.eal:6"));
305 	if (args == NULL) {
306 		return -1;
307 	}
308 
309 	/* Lower default CRYPTO loglevel to RTE_LOG_ERR to avoid a ton of init msgs.
310 	 * This can be overridden by specifying the same option in opts->env_context
311 	 */
312 	args = spdk_push_arg(args, &argcount, strdup("--log-level=lib.cryptodev:5"));
313 	if (args == NULL) {
314 		return -1;
315 	}
316 
317 	/* `user1` log type is used by rte_vhost, which prints an INFO log for each received
318 	 * vhost user message. We don't want that. The same log type is also used by a couple
319 	 * of other DPDK libs, but none of which we make use right now. If necessary, this can
320 	 * be overridden via opts->env_context.
321 	 */
322 	args = spdk_push_arg(args, &argcount, strdup("--log-level=user1:6"));
323 	if (args == NULL) {
324 		return -1;
325 	}
326 
327 	if (opts->env_context) {
328 		args = spdk_push_arg(args, &argcount, strdup(opts->env_context));
329 		if (args == NULL) {
330 			return -1;
331 		}
332 	}
333 
334 #ifdef __linux__
335 	/* Set the base virtual address - it must be an address that is not in the
336 	 * ASAN shadow region, otherwise ASAN-enabled builds will ignore the
337 	 * mmap hint.
338 	 *
339 	 * Ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
340 	 */
341 	args = spdk_push_arg(args, &argcount, _sprintf_alloc("--base-virtaddr=0x200000000000"));
342 	if (args == NULL) {
343 		return -1;
344 	}
345 
346 	/* --match-allocation prevents DPDK from merging or splitting system memory allocations under the hood.
347 	 * This is critical for RDMA when attempting to use an rte_mempool based buffer pool. If DPDK merges two
348 	 * physically or IOVA contiguous memory regions, then when we go to allocate a buffer pool, it can split
349 	 * the memory for a buffer over two allocations meaning the buffer will be split over a memory region.
350 	 */
351 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
352 	if (!opts->env_context || strcmp(opts->env_context, "--legacy-mem") != 0) {
353 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", "--match-allocations"));
354 		if (args == NULL) {
355 			return -1;
356 		}
357 	}
358 #endif
359 
360 	if (opts->shm_id < 0) {
361 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk_pid%d",
362 				     getpid()));
363 		if (args == NULL) {
364 			return -1;
365 		}
366 	} else {
367 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk%d",
368 				     opts->shm_id));
369 		if (args == NULL) {
370 			return -1;
371 		}
372 
373 		/* set the process type */
374 		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--proc-type=auto"));
375 		if (args == NULL) {
376 			return -1;
377 		}
378 	}
379 #endif
380 
381 	g_eal_cmdline = args;
382 	g_eal_cmdline_argcount = argcount;
383 	return argcount;
384 }
385 
386 int
387 spdk_env_dpdk_post_init(void)
388 {
389 	spdk_pci_init();
390 
391 	if (spdk_mem_map_init() < 0) {
392 		fprintf(stderr, "Failed to allocate mem_map\n");
393 		return -1;
394 	}
395 	if (spdk_vtophys_init() < 0) {
396 		fprintf(stderr, "Failed to initialize vtophys\n");
397 		return -1;
398 	}
399 
400 	return 0;
401 }
402 
403 void
404 spdk_env_dpdk_post_fini(void)
405 {
406 	spdk_pci_fini();
407 
408 	spdk_free_args(g_eal_cmdline, g_eal_cmdline_argcount);
409 }
410 
411 int
412 spdk_env_init(const struct spdk_env_opts *opts)
413 {
414 	char **dpdk_args = NULL;
415 	int i, rc;
416 	int orig_optind;
417 
418 	g_external_init = false;
419 
420 	rc = spdk_build_eal_cmdline(opts);
421 	if (rc < 0) {
422 		fprintf(stderr, "Invalid arguments to initialize DPDK\n");
423 		return -1;
424 	}
425 
426 	printf("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version());
427 	printf("[ DPDK EAL parameters: ");
428 	for (i = 0; i < g_eal_cmdline_argcount; i++) {
429 		printf("%s ", g_eal_cmdline[i]);
430 	}
431 	printf("]\n");
432 
433 	/* DPDK rearranges the array we pass to it, so make a copy
434 	 * before passing so we can still free the individual strings
435 	 * correctly.
436 	 */
437 	dpdk_args = calloc(g_eal_cmdline_argcount, sizeof(char *));
438 	if (dpdk_args == NULL) {
439 		fprintf(stderr, "Failed to allocate dpdk_args\n");
440 		return -1;
441 	}
442 	memcpy(dpdk_args, g_eal_cmdline, sizeof(char *) * g_eal_cmdline_argcount);
443 
444 	fflush(stdout);
445 	orig_optind = optind;
446 	optind = 1;
447 	rc = rte_eal_init(g_eal_cmdline_argcount, dpdk_args);
448 	optind = orig_optind;
449 
450 	free(dpdk_args);
451 
452 	if (rc < 0) {
453 		fprintf(stderr, "Failed to initialize DPDK\n");
454 		return -1;
455 	}
456 
457 	if (opts->shm_id < 0 && !opts->hugepage_single_segments) {
458 		/*
459 		 * Unlink hugepage and config info files after init.  This will ensure they get
460 		 *  deleted on app exit, even if the app crashes and does not exit normally.
461 		 *  Only do this when not in multi-process mode, since for multi-process other
462 		 *  apps will need to open these files. These files are not created for
463 		 *  "single file segments".
464 		 */
465 		spdk_env_unlink_shared_files();
466 	}
467 
468 	return spdk_env_dpdk_post_init();
469 }
470 
471 void
472 spdk_env_fini(void)
473 {
474 	spdk_env_dpdk_post_fini();
475 }
476 
477 bool
478 spdk_env_dpdk_external_init(void)
479 {
480 	return g_external_init;
481 }
482