xref: /dpdk/lib/eal/freebsd/eal_memory.c (revision ae67895b507bb6af22263c79ba0d5c374b396485)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12 
13 #include <rte_eal.h>
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_string_fns.h>
17 
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
23 
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
25 
eal_get_baseaddr(void)26 uint64_t eal_get_baseaddr(void)
27 {
28 	/*
29 	 * FreeBSD may allocate something in the space we will be mapping things
30 	 * before we get a chance to do that, so use a base address that's far
31 	 * away from where malloc() et al usually map things.
32 	 */
33 	return 0x1000000000ULL;
34 }
35 
36 /*
37  * Get physical address of any mapped virtual address in the current process.
38  */
39 phys_addr_t
rte_mem_virt2phy(const void * virtaddr)40 rte_mem_virt2phy(const void *virtaddr)
41 {
42 	/* XXX not implemented. This function is only used by
43 	 * rte_mempool_virt2iova() when hugepages are disabled. */
44 	(void)virtaddr;
45 	return RTE_BAD_IOVA;
46 }
47 rte_iova_t
rte_mem_virt2iova(const void * virtaddr)48 rte_mem_virt2iova(const void *virtaddr)
49 {
50 	return rte_mem_virt2phy(virtaddr);
51 }
52 
53 int
rte_eal_hugepage_init(void)54 rte_eal_hugepage_init(void)
55 {
56 	struct rte_mem_config *mcfg;
57 	uint64_t total_mem = 0;
58 	void *addr;
59 	unsigned int i, j, seg_idx = 0;
60 	struct internal_config *internal_conf =
61 		eal_get_internal_configuration();
62 
63 	/* get pointer to global configuration */
64 	mcfg = rte_eal_get_configuration()->mem_config;
65 
66 	/* for debug purposes, hugetlbfs can be disabled */
67 	if (internal_conf->no_hugetlbfs) {
68 		struct rte_memseg_list *msl;
69 		uint64_t mem_sz, page_sz;
70 		int n_segs;
71 
72 		/* create a memseg list */
73 		msl = &mcfg->memsegs[0];
74 
75 		mem_sz = internal_conf->memory;
76 		page_sz = RTE_PGSIZE_4K;
77 		n_segs = mem_sz / page_sz;
78 
79 		if (eal_memseg_list_init_named(
80 				msl, "nohugemem", page_sz, n_segs, 0, true)) {
81 			return -1;
82 		}
83 
84 		addr = mmap(NULL, mem_sz, PROT_READ | PROT_WRITE,
85 				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
86 		if (addr == MAP_FAILED) {
87 			EAL_LOG(ERR, "%s: mmap() failed: %s", __func__,
88 					strerror(errno));
89 			return -1;
90 		}
91 
92 		msl->base_va = addr;
93 		msl->len = mem_sz;
94 
95 		eal_memseg_list_populate(msl, addr, n_segs);
96 
97 		return 0;
98 	}
99 
100 	/* map all hugepages and sort them */
101 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
102 		struct hugepage_info *hpi;
103 		rte_iova_t prev_end = 0;
104 		int prev_ms_idx = -1;
105 		uint64_t page_sz, mem_needed;
106 		unsigned int n_pages, max_pages;
107 
108 		hpi = &internal_conf->hugepage_info[i];
109 		page_sz = hpi->hugepage_sz;
110 		max_pages = hpi->num_pages[0];
111 		mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem,
112 				page_sz);
113 
114 		n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
115 
116 		for (j = 0; j < n_pages; j++) {
117 			struct rte_memseg_list *msl;
118 			struct rte_fbarray *arr;
119 			struct rte_memseg *seg;
120 			int msl_idx, ms_idx;
121 			rte_iova_t physaddr;
122 			int error;
123 			size_t sysctl_size = sizeof(physaddr);
124 			char physaddr_str[64];
125 			bool is_adjacent;
126 
127 			/* first, check if this segment is IOVA-adjacent to
128 			 * the previous one.
129 			 */
130 			snprintf(physaddr_str, sizeof(physaddr_str),
131 					"hw.contigmem.physaddr.%d", j);
132 			error = sysctlbyname(physaddr_str, &physaddr,
133 					&sysctl_size, NULL, 0);
134 			if (error < 0) {
135 				EAL_LOG(ERR, "Failed to get physical addr for buffer %u "
136 						"from %s", j, hpi->hugedir);
137 				return -1;
138 			}
139 
140 			is_adjacent = prev_end != 0 && physaddr == prev_end;
141 			prev_end = physaddr + hpi->hugepage_sz;
142 
143 			for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
144 					msl_idx++) {
145 				bool empty, need_hole;
146 				msl = &mcfg->memsegs[msl_idx];
147 				arr = &msl->memseg_arr;
148 
149 				if (msl->page_sz != page_sz)
150 					continue;
151 
152 				empty = arr->count == 0;
153 
154 				/* we need a hole if this isn't an empty memseg
155 				 * list, and if previous segment was not
156 				 * adjacent to current one.
157 				 */
158 				need_hole = !empty && !is_adjacent;
159 
160 				/* we need 1, plus hole if not adjacent */
161 				ms_idx = rte_fbarray_find_next_n_free(arr,
162 						0, 1 + (need_hole ? 1 : 0));
163 
164 				/* memseg list is full? */
165 				if (ms_idx < 0)
166 					continue;
167 
168 				if (need_hole && prev_ms_idx == ms_idx - 1)
169 					ms_idx++;
170 				prev_ms_idx = ms_idx;
171 
172 				break;
173 			}
174 			if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
175 				EAL_LOG(ERR, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
176 					"RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.");
177 				return -1;
178 			}
179 			arr = &msl->memseg_arr;
180 			seg = rte_fbarray_get(arr, ms_idx);
181 
182 			addr = RTE_PTR_ADD(msl->base_va,
183 					(size_t)msl->page_sz * ms_idx);
184 
185 			/* address is already mapped in memseg list, so using
186 			 * MAP_FIXED here is safe.
187 			 */
188 			addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
189 					MAP_SHARED | MAP_FIXED,
190 					hpi->lock_descriptor,
191 					j * EAL_PAGE_SIZE);
192 			if (addr == MAP_FAILED) {
193 				EAL_LOG(ERR, "Failed to mmap buffer %u from %s",
194 						j, hpi->hugedir);
195 				return -1;
196 			}
197 
198 			seg->addr = addr;
199 			seg->iova = physaddr;
200 			seg->hugepage_sz = page_sz;
201 			seg->len = page_sz;
202 			seg->nchannel = mcfg->nchannel;
203 			seg->nrank = mcfg->nrank;
204 			seg->socket_id = 0;
205 
206 			rte_fbarray_set_used(arr, ms_idx);
207 
208 			EAL_LOG(INFO, "Mapped memory segment %u @ %p: physaddr:0x%"
209 					PRIx64", len %zu",
210 					seg_idx++, addr, physaddr, page_sz);
211 
212 			total_mem += seg->len;
213 		}
214 		if (total_mem >= internal_conf->memory)
215 			break;
216 	}
217 	if (total_mem < internal_conf->memory) {
218 		EAL_LOG(ERR, "Couldn't reserve requested memory, "
219 				"requested: %" PRIu64 "M "
220 				"available: %" PRIu64 "M",
221 				internal_conf->memory >> 20, total_mem >> 20);
222 		return -1;
223 	}
224 	return 0;
225 }
226 
227 struct attach_walk_args {
228 	int fd_hugepage;
229 	int seg_idx;
230 };
231 static int
attach_segment(const struct rte_memseg_list * msl,const struct rte_memseg * ms,void * arg)232 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
233 		void *arg)
234 {
235 	struct attach_walk_args *wa = arg;
236 	void *addr;
237 
238 	if (msl->external)
239 		return 0;
240 
241 	addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
242 			MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
243 			wa->seg_idx * EAL_PAGE_SIZE);
244 	if (addr == MAP_FAILED || addr != ms->addr)
245 		return -1;
246 	wa->seg_idx++;
247 
248 	return 0;
249 }
250 
251 int
rte_eal_hugepage_attach(void)252 rte_eal_hugepage_attach(void)
253 {
254 	struct hugepage_info *hpi;
255 	int fd_hugepage = -1;
256 	unsigned int i;
257 	struct internal_config *internal_conf =
258 		eal_get_internal_configuration();
259 
260 	hpi = &internal_conf->hugepage_info[0];
261 
262 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
263 		const struct hugepage_info *cur_hpi = &hpi[i];
264 		struct attach_walk_args wa;
265 
266 		memset(&wa, 0, sizeof(wa));
267 
268 		/* Obtain a file descriptor for contiguous memory */
269 		fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
270 		if (fd_hugepage < 0) {
271 			EAL_LOG(ERR, "Could not open %s",
272 					cur_hpi->hugedir);
273 			goto error;
274 		}
275 		wa.fd_hugepage = fd_hugepage;
276 		wa.seg_idx = 0;
277 
278 		/* Map the contiguous memory into each memory segment */
279 		if (rte_memseg_walk(attach_segment, &wa) < 0) {
280 			EAL_LOG(ERR, "Failed to mmap buffer %u from %s",
281 				wa.seg_idx, cur_hpi->hugedir);
282 			goto error;
283 		}
284 
285 		close(fd_hugepage);
286 		fd_hugepage = -1;
287 	}
288 
289 	/* hugepage_info is no longer required */
290 	return 0;
291 
292 error:
293 	if (fd_hugepage >= 0)
294 		close(fd_hugepage);
295 	return -1;
296 }
297 
298 int
rte_eal_using_phys_addrs(void)299 rte_eal_using_phys_addrs(void)
300 {
301 	return 0;
302 }
303 
304 static uint64_t
get_mem_amount(uint64_t page_sz,uint64_t max_mem)305 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
306 {
307 	uint64_t area_sz, max_pages;
308 
309 	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
310 	max_pages = RTE_MAX_MEMSEG_PER_LIST;
311 	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
312 
313 	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
314 
315 	/* make sure the list isn't smaller than the page size */
316 	area_sz = RTE_MAX(area_sz, page_sz);
317 
318 	return RTE_ALIGN(area_sz, page_sz);
319 }
320 
321 static int
memseg_list_alloc(struct rte_memseg_list * msl)322 memseg_list_alloc(struct rte_memseg_list *msl)
323 {
324 	int flags = 0;
325 
326 #ifdef RTE_ARCH_PPC_64
327 	flags |= EAL_RESERVE_HUGEPAGES;
328 #endif
329 	return eal_memseg_list_alloc(msl, flags);
330 }
331 
332 static int
memseg_primary_init(void)333 memseg_primary_init(void)
334 {
335 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
336 	int hpi_idx, msl_idx = 0;
337 	struct rte_memseg_list *msl;
338 	uint64_t max_mem, total_mem;
339 	struct internal_config *internal_conf =
340 		eal_get_internal_configuration();
341 
342 	/* no-huge does not need this at all */
343 	if (internal_conf->no_hugetlbfs)
344 		return 0;
345 
346 	/* FreeBSD has an issue where core dump will dump the entire memory
347 	 * contents, including anonymous zero-page memory. Therefore, while we
348 	 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
349 	 * also be further limiting total memory amount to whatever memory is
350 	 * available to us through contigmem driver (plus spacing blocks).
351 	 *
352 	 * so, at each stage, we will be checking how much memory we are
353 	 * preallocating, and adjust all the values accordingly.
354 	 */
355 
356 	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
357 	total_mem = 0;
358 
359 	/* create memseg lists */
360 	for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
361 			hpi_idx++) {
362 		uint64_t max_type_mem, total_type_mem = 0;
363 		uint64_t avail_mem;
364 		int type_msl_idx, max_segs, avail_segs, total_segs = 0;
365 		struct hugepage_info *hpi;
366 		uint64_t hugepage_sz;
367 
368 		hpi = &internal_conf->hugepage_info[hpi_idx];
369 		hugepage_sz = hpi->hugepage_sz;
370 
371 		/* no NUMA support on FreeBSD */
372 
373 		/* check if we've already exceeded total memory amount */
374 		if (total_mem >= max_mem)
375 			break;
376 
377 		/* first, calculate theoretical limits according to config */
378 		max_type_mem = RTE_MIN(max_mem - total_mem,
379 			(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
380 		max_segs = RTE_MAX_MEMSEG_PER_TYPE;
381 
382 		/* now, limit all of that to whatever will actually be
383 		 * available to us, because without dynamic allocation support,
384 		 * all of that extra memory will be sitting there being useless
385 		 * and slowing down core dumps in case of a crash.
386 		 *
387 		 * we need (N*2)-1 segments because we cannot guarantee that
388 		 * each segment will be IOVA-contiguous with the previous one,
389 		 * so we will allocate more and put spaces between segments
390 		 * that are non-contiguous.
391 		 */
392 		avail_segs = (hpi->num_pages[0] * 2) - 1;
393 		avail_mem = avail_segs * hugepage_sz;
394 
395 		max_type_mem = RTE_MIN(avail_mem, max_type_mem);
396 		max_segs = RTE_MIN(avail_segs, max_segs);
397 
398 		type_msl_idx = 0;
399 		while (total_type_mem < max_type_mem &&
400 				total_segs < max_segs) {
401 			uint64_t cur_max_mem, cur_mem;
402 			unsigned int n_segs;
403 
404 			if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
405 				EAL_LOG(ERR,
406 					"No more space in memseg lists, please increase RTE_MAX_MEMSEG_LISTS");
407 				return -1;
408 			}
409 
410 			msl = &mcfg->memsegs[msl_idx++];
411 
412 			cur_max_mem = max_type_mem - total_type_mem;
413 
414 			cur_mem = get_mem_amount(hugepage_sz,
415 					cur_max_mem);
416 			n_segs = cur_mem / hugepage_sz;
417 
418 			if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
419 					0, type_msl_idx, false))
420 				return -1;
421 
422 			total_segs += msl->memseg_arr.len;
423 			total_type_mem = total_segs * hugepage_sz;
424 			type_msl_idx++;
425 
426 			if (memseg_list_alloc(msl)) {
427 				EAL_LOG(ERR, "Cannot allocate VA space for memseg list");
428 				return -1;
429 			}
430 		}
431 		total_mem += total_type_mem;
432 	}
433 	return 0;
434 }
435 
436 static int
memseg_secondary_init(void)437 memseg_secondary_init(void)
438 {
439 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
440 	int msl_idx = 0;
441 	struct rte_memseg_list *msl;
442 
443 	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
444 
445 		msl = &mcfg->memsegs[msl_idx];
446 
447 		/* skip empty and external memseg lists */
448 		if (msl->memseg_arr.len == 0 || msl->external)
449 			continue;
450 
451 		if (rte_fbarray_attach(&msl->memseg_arr)) {
452 			EAL_LOG(ERR, "Cannot attach to primary process memseg lists");
453 			return -1;
454 		}
455 
456 		/* preallocate VA space */
457 		if (memseg_list_alloc(msl)) {
458 			EAL_LOG(ERR, "Cannot preallocate VA space for hugepage memory");
459 			return -1;
460 		}
461 	}
462 
463 	return 0;
464 }
465 
466 int
rte_eal_memseg_init(void)467 rte_eal_memseg_init(void)
468 {
469 	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
470 			memseg_primary_init() :
471 			memseg_secondary_init();
472 }
473