xref: /dpdk/lib/eal/linux/eal_memory.c (revision 30a1de105a5f40d77b344a891c4a68f79e815c43)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation.
3  * Copyright(c) 2013 6WIND S.A.
4  */
5 
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <stdbool.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <stdint.h>
12 #include <inttypes.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <sys/file.h>
17 #include <sys/resource.h>
18 #include <unistd.h>
19 #include <limits.h>
20 #include <signal.h>
21 #include <setjmp.h>
22 #ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
23 #define MEMFD_SUPPORTED
24 #endif
25 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
26 #include <numa.h>
27 #include <numaif.h>
28 #endif
29 
30 #include <rte_errno.h>
31 #include <rte_log.h>
32 #include <rte_memory.h>
33 #include <rte_eal.h>
34 #include <rte_lcore.h>
35 #include <rte_common.h>
36 
37 #include "eal_private.h"
38 #include "eal_memalloc.h"
39 #include "eal_memcfg.h"
40 #include "eal_internal_cfg.h"
41 #include "eal_filesystem.h"
42 #include "eal_hugepages.h"
43 #include "eal_options.h"
44 
45 #define PFN_MASK_SIZE	8
46 
47 /**
48  * @file
49  * Huge page mapping under linux
50  *
51  * To reserve a big contiguous amount of memory, we use the hugepage
52  * feature of linux. For that, we need to have hugetlbfs mounted. This
53  * code will create many files in this directory (one per page) and
54  * map them in virtual memory. For each page, we will retrieve its
55  * physical address and remap it in order to have a virtual contiguous
56  * zone as well as a physical contiguous zone.
57  */
58 
59 static int phys_addrs_available = -1;
60 
61 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
62 
63 uint64_t eal_get_baseaddr(void)
64 {
65 	/*
66 	 * Linux kernel uses a really high address as starting address for
67 	 * serving mmaps calls. If there exists addressing limitations and IOVA
68 	 * mode is VA, this starting address is likely too high for those
69 	 * devices. However, it is possible to use a lower address in the
70 	 * process virtual address space as with 64 bits there is a lot of
71 	 * available space.
72 	 *
73 	 * Current known limitations are 39 or 40 bits. Setting the starting
74 	 * address at 4GB implies there are 508GB or 1020GB for mapping the
75 	 * available hugepages. This is likely enough for most systems, although
76 	 * a device with addressing limitations should call
77 	 * rte_mem_check_dma_mask for ensuring all memory is within supported
78 	 * range.
79 	 */
80 	return 0x100000000ULL;
81 }
82 
83 /*
84  * Get physical address of any mapped virtual address in the current process.
85  */
86 phys_addr_t
87 rte_mem_virt2phy(const void *virtaddr)
88 {
89 	int fd, retval;
90 	uint64_t page, physaddr;
91 	unsigned long virt_pfn;
92 	int page_size;
93 	off_t offset;
94 
95 	if (phys_addrs_available == 0)
96 		return RTE_BAD_IOVA;
97 
98 	/* standard page size */
99 	page_size = getpagesize();
100 
101 	fd = open("/proc/self/pagemap", O_RDONLY);
102 	if (fd < 0) {
103 		RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
104 			__func__, strerror(errno));
105 		return RTE_BAD_IOVA;
106 	}
107 
108 	virt_pfn = (unsigned long)virtaddr / page_size;
109 	offset = sizeof(uint64_t) * virt_pfn;
110 	if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
111 		RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
112 				__func__, strerror(errno));
113 		close(fd);
114 		return RTE_BAD_IOVA;
115 	}
116 
117 	retval = read(fd, &page, PFN_MASK_SIZE);
118 	close(fd);
119 	if (retval < 0) {
120 		RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
121 				__func__, strerror(errno));
122 		return RTE_BAD_IOVA;
123 	} else if (retval != PFN_MASK_SIZE) {
124 		RTE_LOG(INFO, EAL, "%s(): read %d bytes from /proc/self/pagemap "
125 				"but expected %d:\n",
126 				__func__, retval, PFN_MASK_SIZE);
127 		return RTE_BAD_IOVA;
128 	}
129 
130 	/*
131 	 * the pfn (page frame number) are bits 0-54 (see
132 	 * pagemap.txt in linux Documentation)
133 	 */
134 	if ((page & 0x7fffffffffffffULL) == 0)
135 		return RTE_BAD_IOVA;
136 
137 	physaddr = ((page & 0x7fffffffffffffULL) * page_size)
138 		+ ((unsigned long)virtaddr % page_size);
139 
140 	return physaddr;
141 }
142 
143 rte_iova_t
144 rte_mem_virt2iova(const void *virtaddr)
145 {
146 	if (rte_eal_iova_mode() == RTE_IOVA_VA)
147 		return (uintptr_t)virtaddr;
148 	return rte_mem_virt2phy(virtaddr);
149 }
150 
151 /*
152  * For each hugepage in hugepg_tbl, fill the physaddr value. We find
153  * it by browsing the /proc/self/pagemap special file.
154  */
155 static int
156 find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
157 {
158 	unsigned int i;
159 	phys_addr_t addr;
160 
161 	for (i = 0; i < hpi->num_pages[0]; i++) {
162 		addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va);
163 		if (addr == RTE_BAD_PHYS_ADDR)
164 			return -1;
165 		hugepg_tbl[i].physaddr = addr;
166 	}
167 	return 0;
168 }
169 
170 /*
171  * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
172  */
173 static int
174 set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
175 {
176 	unsigned int i;
177 	static phys_addr_t addr;
178 
179 	for (i = 0; i < hpi->num_pages[0]; i++) {
180 		hugepg_tbl[i].physaddr = addr;
181 		addr += hugepg_tbl[i].size;
182 	}
183 	return 0;
184 }
185 
186 /*
187  * Check whether address-space layout randomization is enabled in
188  * the kernel. This is important for multi-process as it can prevent
189  * two processes mapping data to the same virtual address
190  * Returns:
191  *    0 - address space randomization disabled
192  *    1/2 - address space randomization enabled
193  *    negative error code on error
194  */
195 static int
196 aslr_enabled(void)
197 {
198 	char c;
199 	int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
200 	if (fd < 0)
201 		return -errno;
202 	retval = read(fd, &c, 1);
203 	close(fd);
204 	if (retval < 0)
205 		return -errno;
206 	if (retval == 0)
207 		return -EIO;
208 	switch (c) {
209 		case '0' : return 0;
210 		case '1' : return 1;
211 		case '2' : return 2;
212 		default: return -EINVAL;
213 	}
214 }
215 
216 static sigjmp_buf huge_jmpenv;
217 
218 static void huge_sigbus_handler(int signo __rte_unused)
219 {
220 	siglongjmp(huge_jmpenv, 1);
221 }
222 
223 /* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
224  * non-static local variable in the stack frame calling sigsetjmp might be
225  * clobbered by a call to longjmp.
226  */
227 static int huge_wrap_sigsetjmp(void)
228 {
229 	return sigsetjmp(huge_jmpenv, 1);
230 }
231 
232 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
233 /* Callback for numa library. */
234 void numa_error(char *where)
235 {
236 	RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno));
237 }
238 #endif
239 
240 /*
241  * Mmap all hugepages of hugepage table: it first open a file in
242  * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
243  * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
244  * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
245  * map contiguous physical blocks in contiguous virtual blocks.
246  */
247 static unsigned
248 map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
249 		  uint64_t *essential_memory __rte_unused)
250 {
251 	int fd;
252 	unsigned i;
253 	void *virtaddr;
254 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
255 	int node_id = -1;
256 	int essential_prev = 0;
257 	int oldpolicy;
258 	struct bitmask *oldmask = NULL;
259 	bool have_numa = true;
260 	unsigned long maxnode = 0;
261 	const struct internal_config *internal_conf =
262 		eal_get_internal_configuration();
263 
264 	/* Check if kernel supports NUMA. */
265 	if (numa_available() != 0) {
266 		RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
267 		have_numa = false;
268 	}
269 
270 	if (have_numa) {
271 		RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
272 		oldmask = numa_allocate_nodemask();
273 		if (get_mempolicy(&oldpolicy, oldmask->maskp,
274 				  oldmask->size + 1, 0, 0) < 0) {
275 			RTE_LOG(ERR, EAL,
276 				"Failed to get current mempolicy: %s. "
277 				"Assuming MPOL_DEFAULT.\n", strerror(errno));
278 			oldpolicy = MPOL_DEFAULT;
279 		}
280 		for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
281 			if (internal_conf->socket_mem[i])
282 				maxnode = i + 1;
283 	}
284 #endif
285 
286 	for (i = 0; i < hpi->num_pages[0]; i++) {
287 		struct hugepage_file *hf = &hugepg_tbl[i];
288 		uint64_t hugepage_sz = hpi->hugepage_sz;
289 
290 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
291 		if (maxnode) {
292 			unsigned int j;
293 
294 			for (j = 0; j < maxnode; j++)
295 				if (essential_memory[j])
296 					break;
297 
298 			if (j == maxnode) {
299 				node_id = (node_id + 1) % maxnode;
300 				while (!internal_conf->socket_mem[node_id]) {
301 					node_id++;
302 					node_id %= maxnode;
303 				}
304 				essential_prev = 0;
305 			} else {
306 				node_id = j;
307 				essential_prev = essential_memory[j];
308 
309 				if (essential_memory[j] < hugepage_sz)
310 					essential_memory[j] = 0;
311 				else
312 					essential_memory[j] -= hugepage_sz;
313 			}
314 
315 			RTE_LOG(DEBUG, EAL,
316 				"Setting policy MPOL_PREFERRED for socket %d\n",
317 				node_id);
318 			numa_set_preferred(node_id);
319 		}
320 #endif
321 
322 		hf->file_id = i;
323 		hf->size = hugepage_sz;
324 		eal_get_hugefile_path(hf->filepath, sizeof(hf->filepath),
325 				hpi->hugedir, hf->file_id);
326 		hf->filepath[sizeof(hf->filepath) - 1] = '\0';
327 
328 		/* try to create hugepage file */
329 		fd = open(hf->filepath, O_CREAT | O_RDWR, 0600);
330 		if (fd < 0) {
331 			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
332 					strerror(errno));
333 			goto out;
334 		}
335 
336 		/* map the segment, and populate page tables,
337 		 * the kernel fills this segment with zeros. we don't care where
338 		 * this gets mapped - we already have contiguous memory areas
339 		 * ready for us to map into.
340 		 */
341 		virtaddr = mmap(NULL, hugepage_sz, PROT_READ | PROT_WRITE,
342 				MAP_SHARED | MAP_POPULATE, fd, 0);
343 		if (virtaddr == MAP_FAILED) {
344 			RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
345 					strerror(errno));
346 			close(fd);
347 			goto out;
348 		}
349 
350 		hf->orig_va = virtaddr;
351 
352 		/* In linux, hugetlb limitations, like cgroup, are
353 		 * enforced at fault time instead of mmap(), even
354 		 * with the option of MAP_POPULATE. Kernel will send
355 		 * a SIGBUS signal. To avoid to be killed, save stack
356 		 * environment here, if SIGBUS happens, we can jump
357 		 * back here.
358 		 */
359 		if (huge_wrap_sigsetjmp()) {
360 			RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
361 				"hugepages of size %u MB\n",
362 				(unsigned int)(hugepage_sz / 0x100000));
363 			munmap(virtaddr, hugepage_sz);
364 			close(fd);
365 			unlink(hugepg_tbl[i].filepath);
366 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
367 			if (maxnode)
368 				essential_memory[node_id] =
369 					essential_prev;
370 #endif
371 			goto out;
372 		}
373 		*(int *)virtaddr = 0;
374 
375 		/* set shared lock on the file. */
376 		if (flock(fd, LOCK_SH) < 0) {
377 			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
378 				__func__, strerror(errno));
379 			close(fd);
380 			goto out;
381 		}
382 
383 		close(fd);
384 	}
385 
386 out:
387 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
388 	if (maxnode) {
389 		RTE_LOG(DEBUG, EAL,
390 			"Restoring previous memory policy: %d\n", oldpolicy);
391 		if (oldpolicy == MPOL_DEFAULT) {
392 			numa_set_localalloc();
393 		} else if (set_mempolicy(oldpolicy, oldmask->maskp,
394 					 oldmask->size + 1) < 0) {
395 			RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
396 				strerror(errno));
397 			numa_set_localalloc();
398 		}
399 	}
400 	if (oldmask != NULL)
401 		numa_free_cpumask(oldmask);
402 #endif
403 	return i;
404 }
405 
406 /*
407  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
408  * page.
409  */
410 static int
411 find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
412 {
413 	int socket_id;
414 	char *end, *nodestr;
415 	unsigned i, hp_count = 0;
416 	uint64_t virt_addr;
417 	char buf[BUFSIZ];
418 	char hugedir_str[PATH_MAX];
419 	FILE *f;
420 
421 	f = fopen("/proc/self/numa_maps", "r");
422 	if (f == NULL) {
423 		RTE_LOG(NOTICE, EAL, "NUMA support not available"
424 			" consider that all memory is in socket_id 0\n");
425 		return 0;
426 	}
427 
428 	snprintf(hugedir_str, sizeof(hugedir_str),
429 			"%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
430 
431 	/* parse numa map */
432 	while (fgets(buf, sizeof(buf), f) != NULL) {
433 
434 		/* ignore non huge page */
435 		if (strstr(buf, " huge ") == NULL &&
436 				strstr(buf, hugedir_str) == NULL)
437 			continue;
438 
439 		/* get zone addr */
440 		virt_addr = strtoull(buf, &end, 16);
441 		if (virt_addr == 0 || end == buf) {
442 			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
443 			goto error;
444 		}
445 
446 		/* get node id (socket id) */
447 		nodestr = strstr(buf, " N");
448 		if (nodestr == NULL) {
449 			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
450 			goto error;
451 		}
452 		nodestr += 2;
453 		end = strstr(nodestr, "=");
454 		if (end == NULL) {
455 			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
456 			goto error;
457 		}
458 		end[0] = '\0';
459 		end = NULL;
460 
461 		socket_id = strtoul(nodestr, &end, 0);
462 		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
463 			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
464 			goto error;
465 		}
466 
467 		/* if we find this page in our mappings, set socket_id */
468 		for (i = 0; i < hpi->num_pages[0]; i++) {
469 			void *va = (void *)(unsigned long)virt_addr;
470 			if (hugepg_tbl[i].orig_va == va) {
471 				hugepg_tbl[i].socket_id = socket_id;
472 				hp_count++;
473 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
474 				RTE_LOG(DEBUG, EAL,
475 					"Hugepage %s is on socket %d\n",
476 					hugepg_tbl[i].filepath, socket_id);
477 #endif
478 			}
479 		}
480 	}
481 
482 	if (hp_count < hpi->num_pages[0])
483 		goto error;
484 
485 	fclose(f);
486 	return 0;
487 
488 error:
489 	fclose(f);
490 	return -1;
491 }
492 
493 static int
494 cmp_physaddr(const void *a, const void *b)
495 {
496 #ifndef RTE_ARCH_PPC_64
497 	const struct hugepage_file *p1 = a;
498 	const struct hugepage_file *p2 = b;
499 #else
500 	/* PowerPC needs memory sorted in reverse order from x86 */
501 	const struct hugepage_file *p1 = b;
502 	const struct hugepage_file *p2 = a;
503 #endif
504 	if (p1->physaddr < p2->physaddr)
505 		return -1;
506 	else if (p1->physaddr > p2->physaddr)
507 		return 1;
508 	else
509 		return 0;
510 }
511 
512 /*
513  * Uses mmap to create a shared memory area for storage of data
514  * Used in this file to store the hugepage file map on disk
515  */
516 static void *
517 create_shared_memory(const char *filename, const size_t mem_size)
518 {
519 	void *retval;
520 	int fd;
521 	const struct internal_config *internal_conf =
522 		eal_get_internal_configuration();
523 
524 	/* if no shared files mode is used, create anonymous memory instead */
525 	if (internal_conf->no_shconf) {
526 		retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
527 				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
528 		if (retval == MAP_FAILED)
529 			return NULL;
530 		return retval;
531 	}
532 
533 	fd = open(filename, O_CREAT | O_RDWR, 0600);
534 	if (fd < 0)
535 		return NULL;
536 	if (ftruncate(fd, mem_size) < 0) {
537 		close(fd);
538 		return NULL;
539 	}
540 	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
541 	close(fd);
542 	if (retval == MAP_FAILED)
543 		return NULL;
544 	return retval;
545 }
546 
547 /*
548  * this copies *active* hugepages from one hugepage table to another.
549  * destination is typically the shared memory.
550  */
551 static int
552 copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
553 		const struct hugepage_file * src, int src_size)
554 {
555 	int src_pos, dst_pos = 0;
556 
557 	for (src_pos = 0; src_pos < src_size; src_pos++) {
558 		if (src[src_pos].orig_va != NULL) {
559 			/* error on overflow attempt */
560 			if (dst_pos == dest_size)
561 				return -1;
562 			memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
563 			dst_pos++;
564 		}
565 	}
566 	return 0;
567 }
568 
569 static int
570 unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
571 		unsigned num_hp_info)
572 {
573 	unsigned socket, size;
574 	int page, nrpages = 0;
575 	const struct internal_config *internal_conf =
576 		eal_get_internal_configuration();
577 
578 	/* get total number of hugepages */
579 	for (size = 0; size < num_hp_info; size++)
580 		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
581 			nrpages +=
582 			internal_conf->hugepage_info[size].num_pages[socket];
583 
584 	for (page = 0; page < nrpages; page++) {
585 		struct hugepage_file *hp = &hugepg_tbl[page];
586 
587 		if (hp->orig_va != NULL && unlink(hp->filepath)) {
588 			RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
589 				__func__, hp->filepath, strerror(errno));
590 		}
591 	}
592 	return 0;
593 }
594 
595 /*
596  * unmaps hugepages that are not going to be used. since we originally allocate
597  * ALL hugepages (not just those we need), additional unmapping needs to be done.
598  */
599 static int
600 unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
601 		struct hugepage_info *hpi,
602 		unsigned num_hp_info)
603 {
604 	unsigned socket, size;
605 	int page, nrpages = 0;
606 	const struct internal_config *internal_conf =
607 		eal_get_internal_configuration();
608 
609 	/* get total number of hugepages */
610 	for (size = 0; size < num_hp_info; size++)
611 		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
612 			nrpages += internal_conf->hugepage_info[size].num_pages[socket];
613 
614 	for (size = 0; size < num_hp_info; size++) {
615 		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
616 			unsigned pages_found = 0;
617 
618 			/* traverse until we have unmapped all the unused pages */
619 			for (page = 0; page < nrpages; page++) {
620 				struct hugepage_file *hp = &hugepg_tbl[page];
621 
622 				/* find a page that matches the criteria */
623 				if ((hp->size == hpi[size].hugepage_sz) &&
624 						(hp->socket_id == (int) socket)) {
625 
626 					/* if we skipped enough pages, unmap the rest */
627 					if (pages_found == hpi[size].num_pages[socket]) {
628 						uint64_t unmap_len;
629 
630 						unmap_len = hp->size;
631 
632 						/* get start addr and len of the remaining segment */
633 						munmap(hp->orig_va,
634 							(size_t)unmap_len);
635 
636 						hp->orig_va = NULL;
637 						if (unlink(hp->filepath) == -1) {
638 							RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
639 									__func__, hp->filepath, strerror(errno));
640 							return -1;
641 						}
642 					} else {
643 						/* lock the page and skip */
644 						pages_found++;
645 					}
646 
647 				} /* match page */
648 			} /* foreach page */
649 		} /* foreach socket */
650 	} /* foreach pagesize */
651 
652 	return 0;
653 }
654 
655 static int
656 remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
657 {
658 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
659 	struct rte_memseg_list *msl;
660 	struct rte_fbarray *arr;
661 	int cur_page, seg_len;
662 	unsigned int msl_idx;
663 	int ms_idx;
664 	uint64_t page_sz;
665 	size_t memseg_len;
666 	int socket_id;
667 #ifndef RTE_ARCH_64
668 	const struct internal_config *internal_conf =
669 		eal_get_internal_configuration();
670 #endif
671 	page_sz = hugepages[seg_start].size;
672 	socket_id = hugepages[seg_start].socket_id;
673 	seg_len = seg_end - seg_start;
674 
675 	RTE_LOG(DEBUG, EAL, "Attempting to map %" PRIu64 "M on socket %i\n",
676 			(seg_len * page_sz) >> 20ULL, socket_id);
677 
678 	/* find free space in memseg lists */
679 	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
680 		bool empty;
681 		msl = &mcfg->memsegs[msl_idx];
682 		arr = &msl->memseg_arr;
683 
684 		if (msl->page_sz != page_sz)
685 			continue;
686 		if (msl->socket_id != socket_id)
687 			continue;
688 
689 		/* leave space for a hole if array is not empty */
690 		empty = arr->count == 0;
691 		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
692 				seg_len + (empty ? 0 : 1));
693 
694 		/* memseg list is full? */
695 		if (ms_idx < 0)
696 			continue;
697 
698 		/* leave some space between memsegs, they are not IOVA
699 		 * contiguous, so they shouldn't be VA contiguous either.
700 		 */
701 		if (!empty)
702 			ms_idx++;
703 		break;
704 	}
705 	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
706 		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
707 				RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
708 				RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
709 		return -1;
710 	}
711 
712 #ifdef RTE_ARCH_PPC_64
713 	/* for PPC64 we go through the list backwards */
714 	for (cur_page = seg_end - 1; cur_page >= seg_start;
715 			cur_page--, ms_idx++) {
716 #else
717 	for (cur_page = seg_start; cur_page < seg_end; cur_page++, ms_idx++) {
718 #endif
719 		struct hugepage_file *hfile = &hugepages[cur_page];
720 		struct rte_memseg *ms = rte_fbarray_get(arr, ms_idx);
721 		void *addr;
722 		int fd;
723 
724 		fd = open(hfile->filepath, O_RDWR);
725 		if (fd < 0) {
726 			RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
727 					hfile->filepath, strerror(errno));
728 			return -1;
729 		}
730 		/* set shared lock on the file. */
731 		if (flock(fd, LOCK_SH) < 0) {
732 			RTE_LOG(DEBUG, EAL, "Could not lock '%s': %s\n",
733 					hfile->filepath, strerror(errno));
734 			close(fd);
735 			return -1;
736 		}
737 		memseg_len = (size_t)page_sz;
738 		addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
739 
740 		/* we know this address is already mmapped by memseg list, so
741 		 * using MAP_FIXED here is safe
742 		 */
743 		addr = mmap(addr, page_sz, PROT_READ | PROT_WRITE,
744 				MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, 0);
745 		if (addr == MAP_FAILED) {
746 			RTE_LOG(ERR, EAL, "Couldn't remap '%s': %s\n",
747 					hfile->filepath, strerror(errno));
748 			close(fd);
749 			return -1;
750 		}
751 
752 		/* we have a new address, so unmap previous one */
753 #ifndef RTE_ARCH_64
754 		/* in 32-bit legacy mode, we have already unmapped the page */
755 		if (!internal_conf->legacy_mem)
756 			munmap(hfile->orig_va, page_sz);
757 #else
758 		munmap(hfile->orig_va, page_sz);
759 #endif
760 
761 		hfile->orig_va = NULL;
762 		hfile->final_va = addr;
763 
764 		/* rewrite physical addresses in IOVA as VA mode */
765 		if (rte_eal_iova_mode() == RTE_IOVA_VA)
766 			hfile->physaddr = (uintptr_t)addr;
767 
768 		/* set up memseg data */
769 		ms->addr = addr;
770 		ms->hugepage_sz = page_sz;
771 		ms->len = memseg_len;
772 		ms->iova = hfile->physaddr;
773 		ms->socket_id = hfile->socket_id;
774 		ms->nchannel = rte_memory_get_nchannel();
775 		ms->nrank = rte_memory_get_nrank();
776 
777 		rte_fbarray_set_used(arr, ms_idx);
778 
779 		/* store segment fd internally */
780 		if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
781 			RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
782 				rte_strerror(rte_errno));
783 	}
784 	RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
785 			(seg_len * page_sz) >> 20, socket_id);
786 	return 0;
787 }
788 
789 static uint64_t
790 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
791 {
792 	uint64_t area_sz, max_pages;
793 
794 	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
795 	max_pages = RTE_MAX_MEMSEG_PER_LIST;
796 	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
797 
798 	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
799 
800 	/* make sure the list isn't smaller than the page size */
801 	area_sz = RTE_MAX(area_sz, page_sz);
802 
803 	return RTE_ALIGN(area_sz, page_sz);
804 }
805 
806 static int
807 memseg_list_free(struct rte_memseg_list *msl)
808 {
809 	if (rte_fbarray_destroy(&msl->memseg_arr)) {
810 		RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
811 		return -1;
812 	}
813 	memset(msl, 0, sizeof(*msl));
814 	return 0;
815 }
816 
817 /*
818  * Our VA space is not preallocated yet, so preallocate it here. We need to know
819  * how many segments there are in order to map all pages into one address space,
820  * and leave appropriate holes between segments so that rte_malloc does not
821  * concatenate them into one big segment.
822  *
823  * we also need to unmap original pages to free up address space.
824  */
825 static int __rte_unused
826 prealloc_segments(struct hugepage_file *hugepages, int n_pages)
827 {
828 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
829 	int cur_page, seg_start_page, end_seg, new_memseg;
830 	unsigned int hpi_idx, socket, i;
831 	int n_contig_segs, n_segs;
832 	int msl_idx;
833 	const struct internal_config *internal_conf =
834 		eal_get_internal_configuration();
835 
836 	/* before we preallocate segments, we need to free up our VA space.
837 	 * we're not removing files, and we already have information about
838 	 * PA-contiguousness, so it is safe to unmap everything.
839 	 */
840 	for (cur_page = 0; cur_page < n_pages; cur_page++) {
841 		struct hugepage_file *hpi = &hugepages[cur_page];
842 		munmap(hpi->orig_va, hpi->size);
843 		hpi->orig_va = NULL;
844 	}
845 
846 	/* we cannot know how many page sizes and sockets we have discovered, so
847 	 * loop over all of them
848 	 */
849 	for (hpi_idx = 0; hpi_idx < internal_conf->num_hugepage_sizes;
850 			hpi_idx++) {
851 		uint64_t page_sz =
852 			internal_conf->hugepage_info[hpi_idx].hugepage_sz;
853 
854 		for (i = 0; i < rte_socket_count(); i++) {
855 			struct rte_memseg_list *msl;
856 
857 			socket = rte_socket_id_by_idx(i);
858 			n_contig_segs = 0;
859 			n_segs = 0;
860 			seg_start_page = -1;
861 
862 			for (cur_page = 0; cur_page < n_pages; cur_page++) {
863 				struct hugepage_file *prev, *cur;
864 				int prev_seg_start_page = -1;
865 
866 				cur = &hugepages[cur_page];
867 				prev = cur_page == 0 ? NULL :
868 						&hugepages[cur_page - 1];
869 
870 				new_memseg = 0;
871 				end_seg = 0;
872 
873 				if (cur->size == 0)
874 					end_seg = 1;
875 				else if (cur->socket_id != (int) socket)
876 					end_seg = 1;
877 				else if (cur->size != page_sz)
878 					end_seg = 1;
879 				else if (cur_page == 0)
880 					new_memseg = 1;
881 #ifdef RTE_ARCH_PPC_64
882 				/* On PPC64 architecture, the mmap always start
883 				 * from higher address to lower address. Here,
884 				 * physical addresses are in descending order.
885 				 */
886 				else if ((prev->physaddr - cur->physaddr) !=
887 						cur->size)
888 					new_memseg = 1;
889 #else
890 				else if ((cur->physaddr - prev->physaddr) !=
891 						cur->size)
892 					new_memseg = 1;
893 #endif
894 				if (new_memseg) {
895 					/* if we're already inside a segment,
896 					 * new segment means end of current one
897 					 */
898 					if (seg_start_page != -1) {
899 						end_seg = 1;
900 						prev_seg_start_page =
901 								seg_start_page;
902 					}
903 					seg_start_page = cur_page;
904 				}
905 
906 				if (end_seg) {
907 					if (prev_seg_start_page != -1) {
908 						/* we've found a new segment */
909 						n_contig_segs++;
910 						n_segs += cur_page -
911 							prev_seg_start_page;
912 					} else if (seg_start_page != -1) {
913 						/* we didn't find new segment,
914 						 * but did end current one
915 						 */
916 						n_contig_segs++;
917 						n_segs += cur_page -
918 								seg_start_page;
919 						seg_start_page = -1;
920 						continue;
921 					} else {
922 						/* we're skipping this page */
923 						continue;
924 					}
925 				}
926 				/* segment continues */
927 			}
928 			/* check if we missed last segment */
929 			if (seg_start_page != -1) {
930 				n_contig_segs++;
931 				n_segs += cur_page - seg_start_page;
932 			}
933 
934 			/* if no segments were found, do not preallocate */
935 			if (n_segs == 0)
936 				continue;
937 
938 			/* we now have total number of pages that we will
939 			 * allocate for this segment list. add separator pages
940 			 * to the total count, and preallocate VA space.
941 			 */
942 			n_segs += n_contig_segs - 1;
943 
944 			/* now, preallocate VA space for these segments */
945 
946 			/* first, find suitable memseg list for this */
947 			for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
948 					msl_idx++) {
949 				msl = &mcfg->memsegs[msl_idx];
950 
951 				if (msl->base_va != NULL)
952 					continue;
953 				break;
954 			}
955 			if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
956 				RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n",
957 					RTE_STR(RTE_MAX_MEMSEG_LISTS));
958 				return -1;
959 			}
960 
961 			/* now, allocate fbarray itself */
962 			if (eal_memseg_list_init(msl, page_sz, n_segs,
963 					socket, msl_idx, true) < 0)
964 				return -1;
965 
966 			/* finally, allocate VA space */
967 			if (eal_memseg_list_alloc(msl, 0) < 0) {
968 				RTE_LOG(ERR, EAL, "Cannot preallocate 0x%"PRIx64"kB hugepages\n",
969 					page_sz >> 10);
970 				return -1;
971 			}
972 		}
973 	}
974 	return 0;
975 }
976 
977 /*
978  * We cannot reallocate memseg lists on the fly because PPC64 stores pages
979  * backwards, therefore we have to process the entire memseg first before
980  * remapping it into memseg list VA space.
981  */
982 static int
983 remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
984 {
985 	int cur_page, seg_start_page, new_memseg, ret;
986 
987 	seg_start_page = 0;
988 	for (cur_page = 0; cur_page < n_pages; cur_page++) {
989 		struct hugepage_file *prev, *cur;
990 
991 		new_memseg = 0;
992 
993 		cur = &hugepages[cur_page];
994 		prev = cur_page == 0 ? NULL : &hugepages[cur_page - 1];
995 
996 		/* if size is zero, no more pages left */
997 		if (cur->size == 0)
998 			break;
999 
1000 		if (cur_page == 0)
1001 			new_memseg = 1;
1002 		else if (cur->socket_id != prev->socket_id)
1003 			new_memseg = 1;
1004 		else if (cur->size != prev->size)
1005 			new_memseg = 1;
1006 #ifdef RTE_ARCH_PPC_64
1007 		/* On PPC64 architecture, the mmap always start from higher
1008 		 * address to lower address. Here, physical addresses are in
1009 		 * descending order.
1010 		 */
1011 		else if ((prev->physaddr - cur->physaddr) != cur->size)
1012 			new_memseg = 1;
1013 #else
1014 		else if ((cur->physaddr - prev->physaddr) != cur->size)
1015 			new_memseg = 1;
1016 #endif
1017 
1018 		if (new_memseg) {
1019 			/* if this isn't the first time, remap segment */
1020 			if (cur_page != 0) {
1021 				ret = remap_segment(hugepages, seg_start_page,
1022 						cur_page);
1023 				if (ret != 0)
1024 					return -1;
1025 			}
1026 			/* remember where we started */
1027 			seg_start_page = cur_page;
1028 		}
1029 		/* continuation of previous memseg */
1030 	}
1031 	/* we were stopped, but we didn't remap the last segment, do it now */
1032 	if (cur_page != 0) {
1033 		ret = remap_segment(hugepages, seg_start_page,
1034 				cur_page);
1035 		if (ret != 0)
1036 			return -1;
1037 	}
1038 	return 0;
1039 }
1040 
1041 static inline size_t
1042 eal_get_hugepage_mem_size(void)
1043 {
1044 	uint64_t size = 0;
1045 	unsigned i, j;
1046 	struct internal_config *internal_conf =
1047 		eal_get_internal_configuration();
1048 
1049 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
1050 		struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
1051 		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
1052 			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
1053 				size += hpi->hugepage_sz * hpi->num_pages[j];
1054 			}
1055 		}
1056 	}
1057 
1058 	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
1059 }
1060 
1061 static struct sigaction huge_action_old;
1062 static int huge_need_recover;
1063 
1064 static void
1065 huge_register_sigbus(void)
1066 {
1067 	sigset_t mask;
1068 	struct sigaction action;
1069 
1070 	sigemptyset(&mask);
1071 	sigaddset(&mask, SIGBUS);
1072 	action.sa_flags = 0;
1073 	action.sa_mask = mask;
1074 	action.sa_handler = huge_sigbus_handler;
1075 
1076 	huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
1077 }
1078 
1079 static void
1080 huge_recover_sigbus(void)
1081 {
1082 	if (huge_need_recover) {
1083 		sigaction(SIGBUS, &huge_action_old, NULL);
1084 		huge_need_recover = 0;
1085 	}
1086 }
1087 
1088 /*
1089  * Prepare physical memory mapping: fill configuration structure with
1090  * these infos, return 0 on success.
1091  *  1. map N huge pages in separate files in hugetlbfs
1092  *  2. find associated physical addr
1093  *  3. find associated NUMA socket ID
1094  *  4. sort all huge pages by physical address
1095  *  5. remap these N huge pages in the correct order
1096  *  6. unmap the first mapping
1097  *  7. fill memsegs in configuration with contiguous zones
1098  */
1099 static int
1100 eal_legacy_hugepage_init(void)
1101 {
1102 	struct rte_mem_config *mcfg;
1103 	struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
1104 	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
1105 	struct internal_config *internal_conf =
1106 		eal_get_internal_configuration();
1107 
1108 	uint64_t memory[RTE_MAX_NUMA_NODES];
1109 
1110 	unsigned hp_offset;
1111 	int i, j;
1112 	int nr_hugefiles, nr_hugepages = 0;
1113 	void *addr;
1114 
1115 	memset(used_hp, 0, sizeof(used_hp));
1116 
1117 	/* get pointer to global configuration */
1118 	mcfg = rte_eal_get_configuration()->mem_config;
1119 
1120 	/* hugetlbfs can be disabled */
1121 	if (internal_conf->no_hugetlbfs) {
1122 		void *prealloc_addr;
1123 		size_t mem_sz;
1124 		struct rte_memseg_list *msl;
1125 		int n_segs, fd, flags;
1126 #ifdef MEMFD_SUPPORTED
1127 		int memfd;
1128 #endif
1129 		uint64_t page_sz;
1130 
1131 		/* nohuge mode is legacy mode */
1132 		internal_conf->legacy_mem = 1;
1133 
1134 		/* nohuge mode is single-file segments mode */
1135 		internal_conf->single_file_segments = 1;
1136 
1137 		/* create a memseg list */
1138 		msl = &mcfg->memsegs[0];
1139 
1140 		mem_sz = internal_conf->memory;
1141 		page_sz = RTE_PGSIZE_4K;
1142 		n_segs = mem_sz / page_sz;
1143 
1144 		if (eal_memseg_list_init_named(
1145 				msl, "nohugemem", page_sz, n_segs, 0, true)) {
1146 			return -1;
1147 		}
1148 
1149 		/* set up parameters for anonymous mmap */
1150 		fd = -1;
1151 		flags = MAP_PRIVATE | MAP_ANONYMOUS;
1152 
1153 #ifdef MEMFD_SUPPORTED
1154 		/* create a memfd and store it in the segment fd table */
1155 		memfd = memfd_create("nohuge", 0);
1156 		if (memfd < 0) {
1157 			RTE_LOG(DEBUG, EAL, "Cannot create memfd: %s\n",
1158 					strerror(errno));
1159 			RTE_LOG(DEBUG, EAL, "Falling back to anonymous map\n");
1160 		} else {
1161 			/* we got an fd - now resize it */
1162 			if (ftruncate(memfd, internal_conf->memory) < 0) {
1163 				RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n",
1164 						strerror(errno));
1165 				RTE_LOG(ERR, EAL, "Falling back to anonymous map\n");
1166 				close(memfd);
1167 			} else {
1168 				/* creating memfd-backed file was successful.
1169 				 * we want changes to memfd to be visible to
1170 				 * other processes (such as vhost backend), so
1171 				 * map it as shared memory.
1172 				 */
1173 				RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
1174 				fd = memfd;
1175 				flags = MAP_SHARED;
1176 			}
1177 		}
1178 #endif
1179 		/* preallocate address space for the memory, so that it can be
1180 		 * fit into the DMA mask.
1181 		 */
1182 		if (eal_memseg_list_alloc(msl, 0)) {
1183 			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
1184 			return -1;
1185 		}
1186 
1187 		prealloc_addr = msl->base_va;
1188 		addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE,
1189 				flags | MAP_FIXED, fd, 0);
1190 		if (addr == MAP_FAILED || addr != prealloc_addr) {
1191 			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
1192 					strerror(errno));
1193 			munmap(prealloc_addr, mem_sz);
1194 			return -1;
1195 		}
1196 
1197 		/* we're in single-file segments mode, so only the segment list
1198 		 * fd needs to be set up.
1199 		 */
1200 		if (fd != -1) {
1201 			if (eal_memalloc_set_seg_list_fd(0, fd) < 0) {
1202 				RTE_LOG(ERR, EAL, "Cannot set up segment list fd\n");
1203 				/* not a serious error, proceed */
1204 			}
1205 		}
1206 
1207 		eal_memseg_list_populate(msl, addr, n_segs);
1208 
1209 		if (mcfg->dma_maskbits &&
1210 		    rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
1211 			RTE_LOG(ERR, EAL,
1212 				"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
1213 				__func__);
1214 			if (rte_eal_iova_mode() == RTE_IOVA_VA &&
1215 			    rte_eal_using_phys_addrs())
1216 				RTE_LOG(ERR, EAL,
1217 					"%s(): Please try initializing EAL with --iova-mode=pa parameter.\n",
1218 					__func__);
1219 			goto fail;
1220 		}
1221 		return 0;
1222 	}
1223 
1224 	/* calculate total number of hugepages available. at this point we haven't
1225 	 * yet started sorting them so they all are on socket 0 */
1226 	for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
1227 		/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
1228 		used_hp[i].hugepage_sz = internal_conf->hugepage_info[i].hugepage_sz;
1229 
1230 		nr_hugepages += internal_conf->hugepage_info[i].num_pages[0];
1231 	}
1232 
1233 	/*
1234 	 * allocate a memory area for hugepage table.
1235 	 * this isn't shared memory yet. due to the fact that we need some
1236 	 * processing done on these pages, shared memory will be created
1237 	 * at a later stage.
1238 	 */
1239 	tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
1240 	if (tmp_hp == NULL)
1241 		goto fail;
1242 
1243 	memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
1244 
1245 	hp_offset = 0; /* where we start the current page size entries */
1246 
1247 	huge_register_sigbus();
1248 
1249 	/* make a copy of socket_mem, needed for balanced allocation. */
1250 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
1251 		memory[i] = internal_conf->socket_mem[i];
1252 
1253 	/* map all hugepages and sort them */
1254 	for (i = 0; i < (int)internal_conf->num_hugepage_sizes; i++) {
1255 		unsigned pages_old, pages_new;
1256 		struct hugepage_info *hpi;
1257 
1258 		/*
1259 		 * we don't yet mark hugepages as used at this stage, so
1260 		 * we just map all hugepages available to the system
1261 		 * all hugepages are still located on socket 0
1262 		 */
1263 		hpi = &internal_conf->hugepage_info[i];
1264 
1265 		if (hpi->num_pages[0] == 0)
1266 			continue;
1267 
1268 		/* map all hugepages available */
1269 		pages_old = hpi->num_pages[0];
1270 		pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory);
1271 		if (pages_new < pages_old) {
1272 			RTE_LOG(DEBUG, EAL,
1273 				"%d not %d hugepages of size %u MB allocated\n",
1274 				pages_new, pages_old,
1275 				(unsigned)(hpi->hugepage_sz / 0x100000));
1276 
1277 			int pages = pages_old - pages_new;
1278 
1279 			nr_hugepages -= pages;
1280 			hpi->num_pages[0] = pages_new;
1281 			if (pages_new == 0)
1282 				continue;
1283 		}
1284 
1285 		if (rte_eal_using_phys_addrs() &&
1286 				rte_eal_iova_mode() != RTE_IOVA_VA) {
1287 			/* find physical addresses for each hugepage */
1288 			if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
1289 				RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
1290 					"for %u MB pages\n",
1291 					(unsigned int)(hpi->hugepage_sz / 0x100000));
1292 				goto fail;
1293 			}
1294 		} else {
1295 			/* set physical addresses for each hugepage */
1296 			if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
1297 				RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
1298 					"for %u MB pages\n",
1299 					(unsigned int)(hpi->hugepage_sz / 0x100000));
1300 				goto fail;
1301 			}
1302 		}
1303 
1304 		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
1305 			RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
1306 					(unsigned)(hpi->hugepage_sz / 0x100000));
1307 			goto fail;
1308 		}
1309 
1310 		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
1311 		      sizeof(struct hugepage_file), cmp_physaddr);
1312 
1313 		/* we have processed a num of hugepages of this size, so inc offset */
1314 		hp_offset += hpi->num_pages[0];
1315 	}
1316 
1317 	huge_recover_sigbus();
1318 
1319 	if (internal_conf->memory == 0 && internal_conf->force_sockets == 0)
1320 		internal_conf->memory = eal_get_hugepage_mem_size();
1321 
1322 	nr_hugefiles = nr_hugepages;
1323 
1324 
1325 	/* clean out the numbers of pages */
1326 	for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++)
1327 		for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
1328 			internal_conf->hugepage_info[i].num_pages[j] = 0;
1329 
1330 	/* get hugepages for each socket */
1331 	for (i = 0; i < nr_hugefiles; i++) {
1332 		int socket = tmp_hp[i].socket_id;
1333 
1334 		/* find a hugepage info with right size and increment num_pages */
1335 		const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
1336 				(int)internal_conf->num_hugepage_sizes);
1337 		for (j = 0; j < nb_hpsizes; j++) {
1338 			if (tmp_hp[i].size ==
1339 					internal_conf->hugepage_info[j].hugepage_sz) {
1340 				internal_conf->hugepage_info[j].num_pages[socket]++;
1341 			}
1342 		}
1343 	}
1344 
1345 	/* make a copy of socket_mem, needed for number of pages calculation */
1346 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
1347 		memory[i] = internal_conf->socket_mem[i];
1348 
1349 	/* calculate final number of pages */
1350 	nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory,
1351 			internal_conf->hugepage_info, used_hp,
1352 			internal_conf->num_hugepage_sizes);
1353 
1354 	/* error if not enough memory available */
1355 	if (nr_hugepages < 0)
1356 		goto fail;
1357 
1358 	/* reporting in! */
1359 	for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
1360 		for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
1361 			if (used_hp[i].num_pages[j] > 0) {
1362 				RTE_LOG(DEBUG, EAL,
1363 					"Requesting %u pages of size %uMB"
1364 					" from socket %i\n",
1365 					used_hp[i].num_pages[j],
1366 					(unsigned)
1367 					(used_hp[i].hugepage_sz / 0x100000),
1368 					j);
1369 			}
1370 		}
1371 	}
1372 
1373 	/* create shared memory */
1374 	hugepage = create_shared_memory(eal_hugepage_data_path(),
1375 			nr_hugefiles * sizeof(struct hugepage_file));
1376 
1377 	if (hugepage == NULL) {
1378 		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
1379 		goto fail;
1380 	}
1381 	memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
1382 
1383 	/*
1384 	 * unmap pages that we won't need (looks at used_hp).
1385 	 * also, sets final_va to NULL on pages that were unmapped.
1386 	 */
1387 	if (unmap_unneeded_hugepages(tmp_hp, used_hp,
1388 			internal_conf->num_hugepage_sizes) < 0) {
1389 		RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
1390 		goto fail;
1391 	}
1392 
1393 	/*
1394 	 * copy stuff from malloc'd hugepage* to the actual shared memory.
1395 	 * this procedure only copies those hugepages that have orig_va
1396 	 * not NULL. has overflow protection.
1397 	 */
1398 	if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
1399 			tmp_hp, nr_hugefiles) < 0) {
1400 		RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
1401 		goto fail;
1402 	}
1403 
1404 #ifndef RTE_ARCH_64
1405 	/* for legacy 32-bit mode, we did not preallocate VA space, so do it */
1406 	if (internal_conf->legacy_mem &&
1407 			prealloc_segments(hugepage, nr_hugefiles)) {
1408 		RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n");
1409 		goto fail;
1410 	}
1411 #endif
1412 
1413 	/* remap all pages we do need into memseg list VA space, so that those
1414 	 * pages become first-class citizens in DPDK memory subsystem
1415 	 */
1416 	if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
1417 		RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
1418 		goto fail;
1419 	}
1420 
1421 	/* free the hugepage backing files */
1422 	if (internal_conf->hugepage_file.unlink_before_mapping &&
1423 		unlink_hugepage_files(tmp_hp, internal_conf->num_hugepage_sizes) < 0) {
1424 		RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
1425 		goto fail;
1426 	}
1427 
1428 	/* free the temporary hugepage table */
1429 	free(tmp_hp);
1430 	tmp_hp = NULL;
1431 
1432 	munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
1433 	hugepage = NULL;
1434 
1435 	/* we're not going to allocate more pages, so release VA space for
1436 	 * unused memseg lists
1437 	 */
1438 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
1439 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
1440 		size_t mem_sz;
1441 
1442 		/* skip inactive lists */
1443 		if (msl->base_va == NULL)
1444 			continue;
1445 		/* skip lists where there is at least one page allocated */
1446 		if (msl->memseg_arr.count > 0)
1447 			continue;
1448 		/* this is an unused list, deallocate it */
1449 		mem_sz = msl->len;
1450 		munmap(msl->base_va, mem_sz);
1451 		msl->base_va = NULL;
1452 		msl->heap = 0;
1453 
1454 		/* destroy backing fbarray */
1455 		rte_fbarray_destroy(&msl->memseg_arr);
1456 	}
1457 
1458 	if (mcfg->dma_maskbits &&
1459 	    rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
1460 		RTE_LOG(ERR, EAL,
1461 			"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
1462 			__func__);
1463 		goto fail;
1464 	}
1465 
1466 	return 0;
1467 
1468 fail:
1469 	huge_recover_sigbus();
1470 	free(tmp_hp);
1471 	if (hugepage != NULL)
1472 		munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
1473 
1474 	return -1;
1475 }
1476 
1477 /*
1478  * uses fstat to report the size of a file on disk
1479  */
1480 static off_t
1481 getFileSize(int fd)
1482 {
1483 	struct stat st;
1484 	if (fstat(fd, &st) < 0)
1485 		return 0;
1486 	return st.st_size;
1487 }
1488 
1489 /*
1490  * This creates the memory mappings in the secondary process to match that of
1491  * the server process. It goes through each memory segment in the DPDK runtime
1492  * configuration and finds the hugepages which form that segment, mapping them
1493  * in order to form a contiguous block in the virtual memory space
1494  */
1495 static int
1496 eal_legacy_hugepage_attach(void)
1497 {
1498 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1499 	struct hugepage_file *hp = NULL;
1500 	unsigned int num_hp = 0;
1501 	unsigned int i = 0;
1502 	unsigned int cur_seg;
1503 	off_t size = 0;
1504 	int fd, fd_hugepage = -1;
1505 
1506 	if (aslr_enabled() > 0) {
1507 		RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
1508 				"(ASLR) is enabled in the kernel.\n");
1509 		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory "
1510 				"into secondary processes\n");
1511 	}
1512 
1513 	fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
1514 	if (fd_hugepage < 0) {
1515 		RTE_LOG(ERR, EAL, "Could not open %s\n",
1516 				eal_hugepage_data_path());
1517 		goto error;
1518 	}
1519 
1520 	size = getFileSize(fd_hugepage);
1521 	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
1522 	if (hp == MAP_FAILED) {
1523 		RTE_LOG(ERR, EAL, "Could not mmap %s\n",
1524 				eal_hugepage_data_path());
1525 		goto error;
1526 	}
1527 
1528 	num_hp = size / sizeof(struct hugepage_file);
1529 	RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
1530 
1531 	/* map all segments into memory to make sure we get the addrs. the
1532 	 * segments themselves are already in memseg list (which is shared and
1533 	 * has its VA space already preallocated), so we just need to map
1534 	 * everything into correct addresses.
1535 	 */
1536 	for (i = 0; i < num_hp; i++) {
1537 		struct hugepage_file *hf = &hp[i];
1538 		size_t map_sz = hf->size;
1539 		void *map_addr = hf->final_va;
1540 		int msl_idx, ms_idx;
1541 		struct rte_memseg_list *msl;
1542 		struct rte_memseg *ms;
1543 
1544 		/* if size is zero, no more pages left */
1545 		if (map_sz == 0)
1546 			break;
1547 
1548 		fd = open(hf->filepath, O_RDWR);
1549 		if (fd < 0) {
1550 			RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
1551 				hf->filepath, strerror(errno));
1552 			goto error;
1553 		}
1554 
1555 		map_addr = mmap(map_addr, map_sz, PROT_READ | PROT_WRITE,
1556 				MAP_SHARED | MAP_FIXED, fd, 0);
1557 		if (map_addr == MAP_FAILED) {
1558 			RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
1559 				hf->filepath, strerror(errno));
1560 			goto fd_error;
1561 		}
1562 
1563 		/* set shared lock on the file. */
1564 		if (flock(fd, LOCK_SH) < 0) {
1565 			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
1566 				__func__, strerror(errno));
1567 			goto mmap_error;
1568 		}
1569 
1570 		/* find segment data */
1571 		msl = rte_mem_virt2memseg_list(map_addr);
1572 		if (msl == NULL) {
1573 			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
1574 				__func__);
1575 			goto mmap_error;
1576 		}
1577 		ms = rte_mem_virt2memseg(map_addr, msl);
1578 		if (ms == NULL) {
1579 			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
1580 				__func__);
1581 			goto mmap_error;
1582 		}
1583 
1584 		msl_idx = msl - mcfg->memsegs;
1585 		ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
1586 		if (ms_idx < 0) {
1587 			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
1588 				__func__);
1589 			goto mmap_error;
1590 		}
1591 
1592 		/* store segment fd internally */
1593 		if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
1594 			RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
1595 				rte_strerror(rte_errno));
1596 	}
1597 	/* unmap the hugepage config file, since we are done using it */
1598 	munmap(hp, size);
1599 	close(fd_hugepage);
1600 	return 0;
1601 
1602 mmap_error:
1603 	munmap(hp[i].final_va, hp[i].size);
1604 fd_error:
1605 	close(fd);
1606 error:
1607 	/* unwind mmap's done so far */
1608 	for (cur_seg = 0; cur_seg < i; cur_seg++)
1609 		munmap(hp[cur_seg].final_va, hp[cur_seg].size);
1610 
1611 	if (hp != NULL && hp != MAP_FAILED)
1612 		munmap(hp, size);
1613 	if (fd_hugepage >= 0)
1614 		close(fd_hugepage);
1615 	return -1;
1616 }
1617 
1618 static int
1619 eal_hugepage_attach(void)
1620 {
1621 	if (eal_memalloc_sync_with_primary()) {
1622 		RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
1623 		if (aslr_enabled() > 0)
1624 			RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel and retry running both primary and secondary processes\n");
1625 		return -1;
1626 	}
1627 	return 0;
1628 }
1629 
1630 int
1631 rte_eal_hugepage_init(void)
1632 {
1633 	const struct internal_config *internal_conf =
1634 		eal_get_internal_configuration();
1635 
1636 	return internal_conf->legacy_mem ?
1637 			eal_legacy_hugepage_init() :
1638 			eal_dynmem_hugepage_init();
1639 }
1640 
1641 int
1642 rte_eal_hugepage_attach(void)
1643 {
1644 	const struct internal_config *internal_conf =
1645 		eal_get_internal_configuration();
1646 
1647 	return internal_conf->legacy_mem ?
1648 			eal_legacy_hugepage_attach() :
1649 			eal_hugepage_attach();
1650 }
1651 
1652 int
1653 rte_eal_using_phys_addrs(void)
1654 {
1655 	if (phys_addrs_available == -1) {
1656 		uint64_t tmp = 0;
1657 
1658 		if (rte_eal_has_hugepages() != 0 &&
1659 		    rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
1660 			phys_addrs_available = 1;
1661 		else
1662 			phys_addrs_available = 0;
1663 	}
1664 	return phys_addrs_available;
1665 }
1666 
1667 static int __rte_unused
1668 memseg_primary_init_32(void)
1669 {
1670 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1671 	int active_sockets, hpi_idx, msl_idx = 0;
1672 	unsigned int socket_id, i;
1673 	struct rte_memseg_list *msl;
1674 	uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
1675 	uint64_t max_mem;
1676 	struct internal_config *internal_conf =
1677 		eal_get_internal_configuration();
1678 
1679 	/* no-huge does not need this at all */
1680 	if (internal_conf->no_hugetlbfs)
1681 		return 0;
1682 
1683 	/* this is a giant hack, but desperate times call for desperate
1684 	 * measures. in legacy 32-bit mode, we cannot preallocate VA space,
1685 	 * because having upwards of 2 gigabytes of VA space already mapped will
1686 	 * interfere with our ability to map and sort hugepages.
1687 	 *
1688 	 * therefore, in legacy 32-bit mode, we will be initializing memseg
1689 	 * lists much later - in eal_memory.c, right after we unmap all the
1690 	 * unneeded pages. this will not affect secondary processes, as those
1691 	 * should be able to mmap the space without (too many) problems.
1692 	 */
1693 	if (internal_conf->legacy_mem)
1694 		return 0;
1695 
1696 	/* 32-bit mode is a very special case. we cannot know in advance where
1697 	 * the user will want to allocate their memory, so we have to do some
1698 	 * heuristics.
1699 	 */
1700 	active_sockets = 0;
1701 	total_requested_mem = 0;
1702 	if (internal_conf->force_sockets)
1703 		for (i = 0; i < rte_socket_count(); i++) {
1704 			uint64_t mem;
1705 
1706 			socket_id = rte_socket_id_by_idx(i);
1707 			mem = internal_conf->socket_mem[socket_id];
1708 
1709 			if (mem == 0)
1710 				continue;
1711 
1712 			active_sockets++;
1713 			total_requested_mem += mem;
1714 		}
1715 	else
1716 		total_requested_mem = internal_conf->memory;
1717 
1718 	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
1719 	if (total_requested_mem > max_mem) {
1720 		RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
1721 				(unsigned int)(max_mem >> 20));
1722 		return -1;
1723 	}
1724 	total_extra_mem = max_mem - total_requested_mem;
1725 	extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
1726 			total_extra_mem / active_sockets;
1727 
1728 	/* the allocation logic is a little bit convoluted, but here's how it
1729 	 * works, in a nutshell:
1730 	 *  - if user hasn't specified on which sockets to allocate memory via
1731 	 *    --socket-mem, we allocate all of our memory on main core socket.
1732 	 *  - if user has specified sockets to allocate memory on, there may be
1733 	 *    some "unused" memory left (e.g. if user has specified --socket-mem
1734 	 *    such that not all memory adds up to 2 gigabytes), so add it to all
1735 	 *    sockets that are in use equally.
1736 	 *
1737 	 * page sizes are sorted by size in descending order, so we can safely
1738 	 * assume that we dispense with bigger page sizes first.
1739 	 */
1740 
1741 	/* create memseg lists */
1742 	for (i = 0; i < rte_socket_count(); i++) {
1743 		int hp_sizes = (int) internal_conf->num_hugepage_sizes;
1744 		uint64_t max_socket_mem, cur_socket_mem;
1745 		unsigned int main_lcore_socket;
1746 		struct rte_config *cfg = rte_eal_get_configuration();
1747 		bool skip;
1748 
1749 		socket_id = rte_socket_id_by_idx(i);
1750 
1751 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1752 		/* we can still sort pages by socket in legacy mode */
1753 		if (!internal_conf->legacy_mem && socket_id > 0)
1754 			break;
1755 #endif
1756 
1757 		/* if we didn't specifically request memory on this socket */
1758 		skip = active_sockets != 0 &&
1759 				internal_conf->socket_mem[socket_id] == 0;
1760 		/* ...or if we didn't specifically request memory on *any*
1761 		 * socket, and this is not main lcore
1762 		 */
1763 		main_lcore_socket = rte_lcore_to_socket_id(cfg->main_lcore);
1764 		skip |= active_sockets == 0 && socket_id != main_lcore_socket;
1765 
1766 		if (skip) {
1767 			RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
1768 					socket_id);
1769 			continue;
1770 		}
1771 
1772 		/* max amount of memory on this socket */
1773 		max_socket_mem = (active_sockets != 0 ?
1774 					internal_conf->socket_mem[socket_id] :
1775 					internal_conf->memory) +
1776 					extra_mem_per_socket;
1777 		cur_socket_mem = 0;
1778 
1779 		for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
1780 			uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
1781 			uint64_t hugepage_sz;
1782 			struct hugepage_info *hpi;
1783 			int type_msl_idx, max_segs, total_segs = 0;
1784 
1785 			hpi = &internal_conf->hugepage_info[hpi_idx];
1786 			hugepage_sz = hpi->hugepage_sz;
1787 
1788 			/* check if pages are actually available */
1789 			if (hpi->num_pages[socket_id] == 0)
1790 				continue;
1791 
1792 			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
1793 			max_pagesz_mem = max_socket_mem - cur_socket_mem;
1794 
1795 			/* make it multiple of page size */
1796 			max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
1797 					hugepage_sz);
1798 
1799 			RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
1800 					"%" PRIu64 "M on socket %i\n",
1801 					max_pagesz_mem >> 20, socket_id);
1802 
1803 			type_msl_idx = 0;
1804 			while (cur_pagesz_mem < max_pagesz_mem &&
1805 					total_segs < max_segs) {
1806 				uint64_t cur_mem;
1807 				unsigned int n_segs;
1808 
1809 				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
1810 					RTE_LOG(ERR, EAL,
1811 						"No more space in memseg lists, please increase %s\n",
1812 						RTE_STR(RTE_MAX_MEMSEG_LISTS));
1813 					return -1;
1814 				}
1815 
1816 				msl = &mcfg->memsegs[msl_idx];
1817 
1818 				cur_mem = get_mem_amount(hugepage_sz,
1819 						max_pagesz_mem);
1820 				n_segs = cur_mem / hugepage_sz;
1821 
1822 				if (eal_memseg_list_init(msl, hugepage_sz,
1823 						n_segs, socket_id, type_msl_idx,
1824 						true)) {
1825 					/* failing to allocate a memseg list is
1826 					 * a serious error.
1827 					 */
1828 					RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
1829 					return -1;
1830 				}
1831 
1832 				if (eal_memseg_list_alloc(msl, 0)) {
1833 					/* if we couldn't allocate VA space, we
1834 					 * can try with smaller page sizes.
1835 					 */
1836 					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
1837 					/* deallocate memseg list */
1838 					if (memseg_list_free(msl))
1839 						return -1;
1840 					break;
1841 				}
1842 
1843 				total_segs += msl->memseg_arr.len;
1844 				cur_pagesz_mem = total_segs * hugepage_sz;
1845 				type_msl_idx++;
1846 				msl_idx++;
1847 			}
1848 			cur_socket_mem += cur_pagesz_mem;
1849 		}
1850 		if (cur_socket_mem == 0) {
1851 			RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
1852 				socket_id);
1853 			return -1;
1854 		}
1855 	}
1856 
1857 	return 0;
1858 }
1859 
1860 static int __rte_unused
1861 memseg_primary_init(void)
1862 {
1863 	return eal_dynmem_memseg_lists_init();
1864 }
1865 
1866 static int
1867 memseg_secondary_init(void)
1868 {
1869 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1870 	int msl_idx = 0;
1871 	struct rte_memseg_list *msl;
1872 
1873 	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
1874 
1875 		msl = &mcfg->memsegs[msl_idx];
1876 
1877 		/* skip empty memseg lists */
1878 		if (msl->memseg_arr.len == 0)
1879 			continue;
1880 
1881 		if (rte_fbarray_attach(&msl->memseg_arr)) {
1882 			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
1883 			return -1;
1884 		}
1885 
1886 		/* preallocate VA space */
1887 		if (eal_memseg_list_alloc(msl, 0)) {
1888 			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
1889 			return -1;
1890 		}
1891 	}
1892 
1893 	return 0;
1894 }
1895 
1896 int
1897 rte_eal_memseg_init(void)
1898 {
1899 	/* increase rlimit to maximum */
1900 	struct rlimit lim;
1901 
1902 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1903 	const struct internal_config *internal_conf =
1904 		eal_get_internal_configuration();
1905 #endif
1906 	if (getrlimit(RLIMIT_NOFILE, &lim) == 0) {
1907 		/* set limit to maximum */
1908 		lim.rlim_cur = lim.rlim_max;
1909 
1910 		if (setrlimit(RLIMIT_NOFILE, &lim) < 0) {
1911 			RTE_LOG(DEBUG, EAL, "Setting maximum number of open files failed: %s\n",
1912 					strerror(errno));
1913 		} else {
1914 			RTE_LOG(DEBUG, EAL, "Setting maximum number of open files to %"
1915 					PRIu64 "\n",
1916 					(uint64_t)lim.rlim_cur);
1917 		}
1918 	} else {
1919 		RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
1920 	}
1921 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1922 	if (!internal_conf->legacy_mem && rte_socket_count() > 1) {
1923 		RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n");
1924 		RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n");
1925 		RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");
1926 	}
1927 #endif
1928 
1929 	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
1930 #ifndef RTE_ARCH_64
1931 			memseg_primary_init_32() :
1932 #else
1933 			memseg_primary_init() :
1934 #endif
1935 			memseg_secondary_init();
1936 }
1937