xref: /dpdk/lib/eal/common/eal_common_memory.c (revision 17bb60044bae68c0f062755527ad8febe9f448d1)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2014 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
52054f31aSAmit Prakash Shukla #include <ctype.h>
699a2dd95SBruce Richardson #include <errno.h>
799a2dd95SBruce Richardson #include <stdio.h>
899a2dd95SBruce Richardson #include <stdint.h>
999a2dd95SBruce Richardson #include <stdlib.h>
1099a2dd95SBruce Richardson #include <string.h>
1199a2dd95SBruce Richardson #include <inttypes.h>
1299a2dd95SBruce Richardson 
1399a2dd95SBruce Richardson #include <rte_fbarray.h>
1499a2dd95SBruce Richardson #include <rte_memory.h>
1599a2dd95SBruce Richardson #include <rte_eal.h>
1699a2dd95SBruce Richardson #include <rte_eal_memconfig.h>
1799a2dd95SBruce Richardson #include <rte_eal_paging.h>
1899a2dd95SBruce Richardson #include <rte_errno.h>
1999a2dd95SBruce Richardson #include <rte_log.h>
20e6732d0dSHarman Kalra #ifndef RTE_EXEC_ENV_WINDOWS
21e6732d0dSHarman Kalra #include <rte_telemetry.h>
22e6732d0dSHarman Kalra #endif
2399a2dd95SBruce Richardson 
2499a2dd95SBruce Richardson #include "eal_memalloc.h"
2599a2dd95SBruce Richardson #include "eal_private.h"
2699a2dd95SBruce Richardson #include "eal_internal_cfg.h"
2799a2dd95SBruce Richardson #include "eal_memcfg.h"
2899a2dd95SBruce Richardson #include "eal_options.h"
292054f31aSAmit Prakash Shukla #include "malloc_elem.h"
3099a2dd95SBruce Richardson #include "malloc_heap.h"
3199a2dd95SBruce Richardson 
3299a2dd95SBruce Richardson /*
3399a2dd95SBruce Richardson  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
3499a2dd95SBruce Richardson  * pointer to the mmap'd area and keep *size unmodified. Else, retry
3599a2dd95SBruce Richardson  * with a smaller zone: decrease *size by hugepage_sz until it reaches
3699a2dd95SBruce Richardson  * 0. In this case, return NULL. Note: this function returns an address
3799a2dd95SBruce Richardson  * which is a multiple of hugepage size.
3899a2dd95SBruce Richardson  */
3999a2dd95SBruce Richardson 
4099a2dd95SBruce Richardson #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
4199a2dd95SBruce Richardson 
4299a2dd95SBruce Richardson static void *next_baseaddr;
4399a2dd95SBruce Richardson static uint64_t system_page_sz;
4499a2dd95SBruce Richardson 
4599a2dd95SBruce Richardson #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
4699a2dd95SBruce Richardson void *
4799a2dd95SBruce Richardson eal_get_virtual_area(void *requested_addr, size_t *size,
4899a2dd95SBruce Richardson 	size_t page_sz, int flags, int reserve_flags)
4999a2dd95SBruce Richardson {
5099a2dd95SBruce Richardson 	bool addr_is_hint, allow_shrink, unmap, no_align;
5199a2dd95SBruce Richardson 	uint64_t map_sz;
5299a2dd95SBruce Richardson 	void *mapped_addr, *aligned_addr;
5399a2dd95SBruce Richardson 	uint8_t try = 0;
5499a2dd95SBruce Richardson 	struct internal_config *internal_conf =
5599a2dd95SBruce Richardson 		eal_get_internal_configuration();
5699a2dd95SBruce Richardson 
5799a2dd95SBruce Richardson 	if (system_page_sz == 0)
5899a2dd95SBruce Richardson 		system_page_sz = rte_mem_page_size();
5999a2dd95SBruce Richardson 
60ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "Ask a virtual area of 0x%zx bytes", *size);
6199a2dd95SBruce Richardson 
6299a2dd95SBruce Richardson 	addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
6399a2dd95SBruce Richardson 	allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
6499a2dd95SBruce Richardson 	unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
6599a2dd95SBruce Richardson 
6699a2dd95SBruce Richardson 	if (next_baseaddr == NULL && internal_conf->base_virtaddr != 0 &&
6799a2dd95SBruce Richardson 			rte_eal_process_type() == RTE_PROC_PRIMARY)
6899a2dd95SBruce Richardson 		next_baseaddr = (void *) internal_conf->base_virtaddr;
6999a2dd95SBruce Richardson 
7099a2dd95SBruce Richardson #ifdef RTE_ARCH_64
7199a2dd95SBruce Richardson 	if (next_baseaddr == NULL && internal_conf->base_virtaddr == 0 &&
7299a2dd95SBruce Richardson 			rte_eal_process_type() == RTE_PROC_PRIMARY)
7399a2dd95SBruce Richardson 		next_baseaddr = (void *) eal_get_baseaddr();
7499a2dd95SBruce Richardson #endif
7599a2dd95SBruce Richardson 	if (requested_addr == NULL && next_baseaddr != NULL) {
7699a2dd95SBruce Richardson 		requested_addr = next_baseaddr;
7799a2dd95SBruce Richardson 		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
7899a2dd95SBruce Richardson 		addr_is_hint = true;
7999a2dd95SBruce Richardson 	}
8099a2dd95SBruce Richardson 
8199a2dd95SBruce Richardson 	/* we don't need alignment of resulting pointer in the following cases:
8299a2dd95SBruce Richardson 	 *
8399a2dd95SBruce Richardson 	 * 1. page size is equal to system size
8499a2dd95SBruce Richardson 	 * 2. we have a requested address, and it is page-aligned, and we will
8599a2dd95SBruce Richardson 	 *    be discarding the address if we get a different one.
8699a2dd95SBruce Richardson 	 *
8799a2dd95SBruce Richardson 	 * for all other cases, alignment is potentially necessary.
8899a2dd95SBruce Richardson 	 */
8999a2dd95SBruce Richardson 	no_align = (requested_addr != NULL &&
9099a2dd95SBruce Richardson 		requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
9199a2dd95SBruce Richardson 		!addr_is_hint) ||
9299a2dd95SBruce Richardson 		page_sz == system_page_sz;
9399a2dd95SBruce Richardson 
9499a2dd95SBruce Richardson 	do {
9599a2dd95SBruce Richardson 		map_sz = no_align ? *size : *size + page_sz;
9699a2dd95SBruce Richardson 		if (map_sz > SIZE_MAX) {
97ae67895bSDavid Marchand 			EAL_LOG(ERR, "Map size too big");
9899a2dd95SBruce Richardson 			rte_errno = E2BIG;
9999a2dd95SBruce Richardson 			return NULL;
10099a2dd95SBruce Richardson 		}
10199a2dd95SBruce Richardson 
10299a2dd95SBruce Richardson 		mapped_addr = eal_mem_reserve(
10399a2dd95SBruce Richardson 			requested_addr, (size_t)map_sz, reserve_flags);
10499a2dd95SBruce Richardson 		if ((mapped_addr == NULL) && allow_shrink)
10599a2dd95SBruce Richardson 			*size -= page_sz;
10699a2dd95SBruce Richardson 
10799a2dd95SBruce Richardson 		if ((mapped_addr != NULL) && addr_is_hint &&
10899a2dd95SBruce Richardson 				(mapped_addr != requested_addr)) {
10999a2dd95SBruce Richardson 			try++;
11099a2dd95SBruce Richardson 			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
11199a2dd95SBruce Richardson 			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
11299a2dd95SBruce Richardson 				/* hint was not used. Try with another offset */
11399a2dd95SBruce Richardson 				eal_mem_free(mapped_addr, map_sz);
11499a2dd95SBruce Richardson 				mapped_addr = NULL;
11599a2dd95SBruce Richardson 				requested_addr = next_baseaddr;
11699a2dd95SBruce Richardson 			}
11799a2dd95SBruce Richardson 		}
11899a2dd95SBruce Richardson 	} while ((allow_shrink || addr_is_hint) &&
11999a2dd95SBruce Richardson 		(mapped_addr == NULL) && (*size > 0));
12099a2dd95SBruce Richardson 
12199a2dd95SBruce Richardson 	/* align resulting address - if map failed, we will ignore the value
12299a2dd95SBruce Richardson 	 * anyway, so no need to add additional checks.
12399a2dd95SBruce Richardson 	 */
12499a2dd95SBruce Richardson 	aligned_addr = no_align ? mapped_addr :
12599a2dd95SBruce Richardson 			RTE_PTR_ALIGN(mapped_addr, page_sz);
12699a2dd95SBruce Richardson 
12799a2dd95SBruce Richardson 	if (*size == 0) {
128ae67895bSDavid Marchand 		EAL_LOG(ERR, "Cannot get a virtual area of any size: %s",
12999a2dd95SBruce Richardson 			rte_strerror(rte_errno));
13099a2dd95SBruce Richardson 		return NULL;
13199a2dd95SBruce Richardson 	} else if (mapped_addr == NULL) {
132ae67895bSDavid Marchand 		EAL_LOG(ERR, "Cannot get a virtual area: %s",
13399a2dd95SBruce Richardson 			rte_strerror(rte_errno));
13499a2dd95SBruce Richardson 		return NULL;
13599a2dd95SBruce Richardson 	} else if (requested_addr != NULL && !addr_is_hint &&
13699a2dd95SBruce Richardson 			aligned_addr != requested_addr) {
137ae67895bSDavid Marchand 		EAL_LOG(ERR, "Cannot get a virtual area at requested address: %p (got %p)",
13899a2dd95SBruce Richardson 			requested_addr, aligned_addr);
13999a2dd95SBruce Richardson 		eal_mem_free(mapped_addr, map_sz);
14099a2dd95SBruce Richardson 		rte_errno = EADDRNOTAVAIL;
14199a2dd95SBruce Richardson 		return NULL;
14299a2dd95SBruce Richardson 	} else if (requested_addr != NULL && addr_is_hint &&
14399a2dd95SBruce Richardson 			aligned_addr != requested_addr) {
1444042dc20SAnatoly Burakov 		/*
1454042dc20SAnatoly Burakov 		 * demote this warning to debug if we did not explicitly request
1464042dc20SAnatoly Burakov 		 * a base virtual address.
1474042dc20SAnatoly Burakov 		 */
1484042dc20SAnatoly Burakov 		if (internal_conf->base_virtaddr != 0) {
149ae67895bSDavid Marchand 			EAL_LOG(WARNING, "WARNING! Base virtual address hint (%p != %p) not respected!",
15099a2dd95SBruce Richardson 				requested_addr, aligned_addr);
151ae67895bSDavid Marchand 			EAL_LOG(WARNING, "   This may cause issues with mapping memory into secondary processes");
1524042dc20SAnatoly Burakov 		} else {
153ae67895bSDavid Marchand 			EAL_LOG(DEBUG, "WARNING! Base virtual address hint (%p != %p) not respected!",
1544042dc20SAnatoly Burakov 				requested_addr, aligned_addr);
155ae67895bSDavid Marchand 			EAL_LOG(DEBUG, "   This may cause issues with mapping memory into secondary processes");
1564042dc20SAnatoly Burakov 		}
15799a2dd95SBruce Richardson 	} else if (next_baseaddr != NULL) {
15899a2dd95SBruce Richardson 		next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
15999a2dd95SBruce Richardson 	}
16099a2dd95SBruce Richardson 
161ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "Virtual area found at %p (size = 0x%zx)",
16299a2dd95SBruce Richardson 		aligned_addr, *size);
16399a2dd95SBruce Richardson 
16499a2dd95SBruce Richardson 	if (unmap) {
16599a2dd95SBruce Richardson 		eal_mem_free(mapped_addr, map_sz);
16699a2dd95SBruce Richardson 	} else if (!no_align) {
16799a2dd95SBruce Richardson 		void *map_end, *aligned_end;
16899a2dd95SBruce Richardson 		size_t before_len, after_len;
16999a2dd95SBruce Richardson 
17099a2dd95SBruce Richardson 		/* when we reserve space with alignment, we add alignment to
17199a2dd95SBruce Richardson 		 * mapping size. On 32-bit, if 1GB alignment was requested, this
17299a2dd95SBruce Richardson 		 * would waste 1GB of address space, which is a luxury we cannot
17399a2dd95SBruce Richardson 		 * afford. so, if alignment was performed, check if any unneeded
17499a2dd95SBruce Richardson 		 * address space can be unmapped back.
17599a2dd95SBruce Richardson 		 */
17699a2dd95SBruce Richardson 
17799a2dd95SBruce Richardson 		map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
17899a2dd95SBruce Richardson 		aligned_end = RTE_PTR_ADD(aligned_addr, *size);
17999a2dd95SBruce Richardson 
18099a2dd95SBruce Richardson 		/* unmap space before aligned mmap address */
18199a2dd95SBruce Richardson 		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
18299a2dd95SBruce Richardson 		if (before_len > 0)
18399a2dd95SBruce Richardson 			eal_mem_free(mapped_addr, before_len);
18499a2dd95SBruce Richardson 
18599a2dd95SBruce Richardson 		/* unmap space after aligned end mmap address */
18699a2dd95SBruce Richardson 		after_len = RTE_PTR_DIFF(map_end, aligned_end);
18799a2dd95SBruce Richardson 		if (after_len > 0)
18899a2dd95SBruce Richardson 			eal_mem_free(aligned_end, after_len);
18999a2dd95SBruce Richardson 	}
19099a2dd95SBruce Richardson 
19199a2dd95SBruce Richardson 	if (!unmap) {
19299a2dd95SBruce Richardson 		/* Exclude these pages from a core dump. */
19399a2dd95SBruce Richardson 		eal_mem_set_dump(aligned_addr, *size, false);
19499a2dd95SBruce Richardson 	}
19599a2dd95SBruce Richardson 
19699a2dd95SBruce Richardson 	return aligned_addr;
19799a2dd95SBruce Richardson }
19899a2dd95SBruce Richardson 
19999a2dd95SBruce Richardson int
20099a2dd95SBruce Richardson eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name,
20199a2dd95SBruce Richardson 		uint64_t page_sz, int n_segs, int socket_id, bool heap)
20299a2dd95SBruce Richardson {
20399a2dd95SBruce Richardson 	if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
20499a2dd95SBruce Richardson 			sizeof(struct rte_memseg))) {
205ae67895bSDavid Marchand 		EAL_LOG(ERR, "Cannot allocate memseg list: %s",
20699a2dd95SBruce Richardson 			rte_strerror(rte_errno));
20799a2dd95SBruce Richardson 		return -1;
20899a2dd95SBruce Richardson 	}
20999a2dd95SBruce Richardson 
21099a2dd95SBruce Richardson 	msl->page_sz = page_sz;
21199a2dd95SBruce Richardson 	msl->socket_id = socket_id;
21299a2dd95SBruce Richardson 	msl->base_va = NULL;
21399a2dd95SBruce Richardson 	msl->heap = heap;
21499a2dd95SBruce Richardson 
215ae67895bSDavid Marchand 	EAL_LOG(DEBUG,
216ae67895bSDavid Marchand 		"Memseg list allocated at socket %i, page size 0x%"PRIx64"kB",
21799a2dd95SBruce Richardson 		socket_id, page_sz >> 10);
21899a2dd95SBruce Richardson 
21999a2dd95SBruce Richardson 	return 0;
22099a2dd95SBruce Richardson }
22199a2dd95SBruce Richardson 
22299a2dd95SBruce Richardson int
22399a2dd95SBruce Richardson eal_memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
22499a2dd95SBruce Richardson 		int n_segs, int socket_id, int type_msl_idx, bool heap)
22599a2dd95SBruce Richardson {
22699a2dd95SBruce Richardson 	char name[RTE_FBARRAY_NAME_LEN];
22799a2dd95SBruce Richardson 
22899a2dd95SBruce Richardson 	snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
22999a2dd95SBruce Richardson 		 type_msl_idx);
23099a2dd95SBruce Richardson 
23199a2dd95SBruce Richardson 	return eal_memseg_list_init_named(
23299a2dd95SBruce Richardson 		msl, name, page_sz, n_segs, socket_id, heap);
23399a2dd95SBruce Richardson }
23499a2dd95SBruce Richardson 
23599a2dd95SBruce Richardson int
23699a2dd95SBruce Richardson eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags)
23799a2dd95SBruce Richardson {
23899a2dd95SBruce Richardson 	size_t page_sz, mem_sz;
23999a2dd95SBruce Richardson 	void *addr;
24099a2dd95SBruce Richardson 
24199a2dd95SBruce Richardson 	page_sz = msl->page_sz;
24299a2dd95SBruce Richardson 	mem_sz = page_sz * msl->memseg_arr.len;
24399a2dd95SBruce Richardson 
24499a2dd95SBruce Richardson 	addr = eal_get_virtual_area(
24599a2dd95SBruce Richardson 		msl->base_va, &mem_sz, page_sz, 0, reserve_flags);
24699a2dd95SBruce Richardson 	if (addr == NULL) {
24799a2dd95SBruce Richardson #ifndef RTE_EXEC_ENV_WINDOWS
24899a2dd95SBruce Richardson 		/* The hint would be misleading on Windows, because address
24999a2dd95SBruce Richardson 		 * is by default system-selected (base VA = 0).
25099a2dd95SBruce Richardson 		 * However, this function is called from many places,
25199a2dd95SBruce Richardson 		 * including common code, so don't duplicate the message.
25299a2dd95SBruce Richardson 		 */
25399a2dd95SBruce Richardson 		if (rte_errno == EADDRNOTAVAIL)
254ae67895bSDavid Marchand 			EAL_LOG(ERR, "Cannot reserve %llu bytes at [%p] - "
255ae67895bSDavid Marchand 				"please use '--" OPT_BASE_VIRTADDR "' option",
25699a2dd95SBruce Richardson 				(unsigned long long)mem_sz, msl->base_va);
25799a2dd95SBruce Richardson #endif
25899a2dd95SBruce Richardson 		return -1;
25999a2dd95SBruce Richardson 	}
26099a2dd95SBruce Richardson 	msl->base_va = addr;
26199a2dd95SBruce Richardson 	msl->len = mem_sz;
26299a2dd95SBruce Richardson 
263ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "VA reserved for memseg list at %p, size %zx",
26499a2dd95SBruce Richardson 			addr, mem_sz);
26599a2dd95SBruce Richardson 
26699a2dd95SBruce Richardson 	return 0;
26799a2dd95SBruce Richardson }
26899a2dd95SBruce Richardson 
26999a2dd95SBruce Richardson void
27099a2dd95SBruce Richardson eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs)
27199a2dd95SBruce Richardson {
27299a2dd95SBruce Richardson 	size_t page_sz = msl->page_sz;
27399a2dd95SBruce Richardson 	int i;
27499a2dd95SBruce Richardson 
27599a2dd95SBruce Richardson 	for (i = 0; i < n_segs; i++) {
27699a2dd95SBruce Richardson 		struct rte_fbarray *arr = &msl->memseg_arr;
27799a2dd95SBruce Richardson 		struct rte_memseg *ms = rte_fbarray_get(arr, i);
27899a2dd95SBruce Richardson 
27999a2dd95SBruce Richardson 		if (rte_eal_iova_mode() == RTE_IOVA_VA)
28099a2dd95SBruce Richardson 			ms->iova = (uintptr_t)addr;
28199a2dd95SBruce Richardson 		else
28299a2dd95SBruce Richardson 			ms->iova = RTE_BAD_IOVA;
28399a2dd95SBruce Richardson 		ms->addr = addr;
28499a2dd95SBruce Richardson 		ms->hugepage_sz = page_sz;
28599a2dd95SBruce Richardson 		ms->socket_id = 0;
28699a2dd95SBruce Richardson 		ms->len = page_sz;
28799a2dd95SBruce Richardson 
28899a2dd95SBruce Richardson 		rte_fbarray_set_used(arr, i);
28999a2dd95SBruce Richardson 
29099a2dd95SBruce Richardson 		addr = RTE_PTR_ADD(addr, page_sz);
29199a2dd95SBruce Richardson 	}
29299a2dd95SBruce Richardson }
29399a2dd95SBruce Richardson 
29499a2dd95SBruce Richardson static struct rte_memseg *
29599a2dd95SBruce Richardson virt2memseg(const void *addr, const struct rte_memseg_list *msl)
29699a2dd95SBruce Richardson {
29799a2dd95SBruce Richardson 	const struct rte_fbarray *arr;
29899a2dd95SBruce Richardson 	void *start, *end;
29999a2dd95SBruce Richardson 	int ms_idx;
30099a2dd95SBruce Richardson 
30199a2dd95SBruce Richardson 	if (msl == NULL)
30299a2dd95SBruce Richardson 		return NULL;
30399a2dd95SBruce Richardson 
30499a2dd95SBruce Richardson 	/* a memseg list was specified, check if it's the right one */
30599a2dd95SBruce Richardson 	start = msl->base_va;
30699a2dd95SBruce Richardson 	end = RTE_PTR_ADD(start, msl->len);
30799a2dd95SBruce Richardson 
30899a2dd95SBruce Richardson 	if (addr < start || addr >= end)
30999a2dd95SBruce Richardson 		return NULL;
31099a2dd95SBruce Richardson 
31199a2dd95SBruce Richardson 	/* now, calculate index */
31299a2dd95SBruce Richardson 	arr = &msl->memseg_arr;
31399a2dd95SBruce Richardson 	ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
31499a2dd95SBruce Richardson 	return rte_fbarray_get(arr, ms_idx);
31599a2dd95SBruce Richardson }
31699a2dd95SBruce Richardson 
31799a2dd95SBruce Richardson static struct rte_memseg_list *
31899a2dd95SBruce Richardson virt2memseg_list(const void *addr)
31999a2dd95SBruce Richardson {
32099a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
32199a2dd95SBruce Richardson 	struct rte_memseg_list *msl;
32299a2dd95SBruce Richardson 	int msl_idx;
32399a2dd95SBruce Richardson 
32499a2dd95SBruce Richardson 	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
32599a2dd95SBruce Richardson 		void *start, *end;
32699a2dd95SBruce Richardson 		msl = &mcfg->memsegs[msl_idx];
32799a2dd95SBruce Richardson 
32899a2dd95SBruce Richardson 		start = msl->base_va;
32999a2dd95SBruce Richardson 		end = RTE_PTR_ADD(start, msl->len);
33099a2dd95SBruce Richardson 		if (addr >= start && addr < end)
33199a2dd95SBruce Richardson 			break;
33299a2dd95SBruce Richardson 	}
33399a2dd95SBruce Richardson 	/* if we didn't find our memseg list */
33499a2dd95SBruce Richardson 	if (msl_idx == RTE_MAX_MEMSEG_LISTS)
33599a2dd95SBruce Richardson 		return NULL;
33699a2dd95SBruce Richardson 	return msl;
33799a2dd95SBruce Richardson }
33899a2dd95SBruce Richardson 
33999a2dd95SBruce Richardson struct rte_memseg_list *
34099a2dd95SBruce Richardson rte_mem_virt2memseg_list(const void *addr)
34199a2dd95SBruce Richardson {
34299a2dd95SBruce Richardson 	return virt2memseg_list(addr);
34399a2dd95SBruce Richardson }
34499a2dd95SBruce Richardson 
34599a2dd95SBruce Richardson struct virtiova {
34699a2dd95SBruce Richardson 	rte_iova_t iova;
34799a2dd95SBruce Richardson 	void *virt;
34899a2dd95SBruce Richardson };
34999a2dd95SBruce Richardson static int
35099a2dd95SBruce Richardson find_virt(const struct rte_memseg_list *msl __rte_unused,
35199a2dd95SBruce Richardson 		const struct rte_memseg *ms, void *arg)
35299a2dd95SBruce Richardson {
35399a2dd95SBruce Richardson 	struct virtiova *vi = arg;
35499a2dd95SBruce Richardson 	if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
35599a2dd95SBruce Richardson 		size_t offset = vi->iova - ms->iova;
35699a2dd95SBruce Richardson 		vi->virt = RTE_PTR_ADD(ms->addr, offset);
35799a2dd95SBruce Richardson 		/* stop the walk */
35899a2dd95SBruce Richardson 		return 1;
35999a2dd95SBruce Richardson 	}
36099a2dd95SBruce Richardson 	return 0;
36199a2dd95SBruce Richardson }
36299a2dd95SBruce Richardson static int
36399a2dd95SBruce Richardson find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
36499a2dd95SBruce Richardson 		const struct rte_memseg *ms, size_t len, void *arg)
36599a2dd95SBruce Richardson {
36699a2dd95SBruce Richardson 	struct virtiova *vi = arg;
36799a2dd95SBruce Richardson 	if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
36899a2dd95SBruce Richardson 		size_t offset = vi->iova - ms->iova;
36999a2dd95SBruce Richardson 		vi->virt = RTE_PTR_ADD(ms->addr, offset);
37099a2dd95SBruce Richardson 		/* stop the walk */
37199a2dd95SBruce Richardson 		return 1;
37299a2dd95SBruce Richardson 	}
37399a2dd95SBruce Richardson 	return 0;
37499a2dd95SBruce Richardson }
37599a2dd95SBruce Richardson 
37699a2dd95SBruce Richardson void *
37799a2dd95SBruce Richardson rte_mem_iova2virt(rte_iova_t iova)
37899a2dd95SBruce Richardson {
37999a2dd95SBruce Richardson 	struct virtiova vi;
38099a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
38199a2dd95SBruce Richardson 		eal_get_internal_configuration();
38299a2dd95SBruce Richardson 
38399a2dd95SBruce Richardson 	memset(&vi, 0, sizeof(vi));
38499a2dd95SBruce Richardson 
38599a2dd95SBruce Richardson 	vi.iova = iova;
38699a2dd95SBruce Richardson 	/* for legacy mem, we can get away with scanning VA-contiguous segments,
38799a2dd95SBruce Richardson 	 * as we know they are PA-contiguous as well
38899a2dd95SBruce Richardson 	 */
38999a2dd95SBruce Richardson 	if (internal_conf->legacy_mem)
39099a2dd95SBruce Richardson 		rte_memseg_contig_walk(find_virt_legacy, &vi);
39199a2dd95SBruce Richardson 	else
39299a2dd95SBruce Richardson 		rte_memseg_walk(find_virt, &vi);
39399a2dd95SBruce Richardson 
39499a2dd95SBruce Richardson 	return vi.virt;
39599a2dd95SBruce Richardson }
39699a2dd95SBruce Richardson 
39799a2dd95SBruce Richardson struct rte_memseg *
39899a2dd95SBruce Richardson rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
39999a2dd95SBruce Richardson {
40099a2dd95SBruce Richardson 	return virt2memseg(addr, msl != NULL ? msl :
40199a2dd95SBruce Richardson 			rte_mem_virt2memseg_list(addr));
40299a2dd95SBruce Richardson }
40399a2dd95SBruce Richardson 
40499a2dd95SBruce Richardson static int
40599a2dd95SBruce Richardson physmem_size(const struct rte_memseg_list *msl, void *arg)
40699a2dd95SBruce Richardson {
40799a2dd95SBruce Richardson 	uint64_t *total_len = arg;
40899a2dd95SBruce Richardson 
40999a2dd95SBruce Richardson 	if (msl->external)
41099a2dd95SBruce Richardson 		return 0;
41199a2dd95SBruce Richardson 
41299a2dd95SBruce Richardson 	*total_len += msl->memseg_arr.count * msl->page_sz;
41399a2dd95SBruce Richardson 
41499a2dd95SBruce Richardson 	return 0;
41599a2dd95SBruce Richardson }
41699a2dd95SBruce Richardson 
41799a2dd95SBruce Richardson /* get the total size of memory */
41899a2dd95SBruce Richardson uint64_t
41999a2dd95SBruce Richardson rte_eal_get_physmem_size(void)
42099a2dd95SBruce Richardson {
42199a2dd95SBruce Richardson 	uint64_t total_len = 0;
42299a2dd95SBruce Richardson 
42399a2dd95SBruce Richardson 	rte_memseg_list_walk(physmem_size, &total_len);
42499a2dd95SBruce Richardson 
42599a2dd95SBruce Richardson 	return total_len;
42699a2dd95SBruce Richardson }
42799a2dd95SBruce Richardson 
42899a2dd95SBruce Richardson static int
42999a2dd95SBruce Richardson dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
43099a2dd95SBruce Richardson 		void *arg)
43199a2dd95SBruce Richardson {
43299a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
43399a2dd95SBruce Richardson 	int msl_idx, ms_idx, fd;
43499a2dd95SBruce Richardson 	FILE *f = arg;
43599a2dd95SBruce Richardson 
43699a2dd95SBruce Richardson 	msl_idx = msl - mcfg->memsegs;
43799a2dd95SBruce Richardson 	if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
43899a2dd95SBruce Richardson 		return -1;
43999a2dd95SBruce Richardson 
44099a2dd95SBruce Richardson 	ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
44199a2dd95SBruce Richardson 	if (ms_idx < 0)
44299a2dd95SBruce Richardson 		return -1;
44399a2dd95SBruce Richardson 
44499a2dd95SBruce Richardson 	fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
44599a2dd95SBruce Richardson 	fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
44699a2dd95SBruce Richardson 			"virt:%p, socket_id:%"PRId32", "
44799a2dd95SBruce Richardson 			"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
44899a2dd95SBruce Richardson 			"nrank:%"PRIx32" fd:%i\n",
44999a2dd95SBruce Richardson 			msl_idx, ms_idx,
45099a2dd95SBruce Richardson 			ms->iova,
45199a2dd95SBruce Richardson 			ms->len,
45299a2dd95SBruce Richardson 			ms->addr,
45399a2dd95SBruce Richardson 			ms->socket_id,
45499a2dd95SBruce Richardson 			ms->hugepage_sz,
45599a2dd95SBruce Richardson 			ms->nchannel,
45699a2dd95SBruce Richardson 			ms->nrank,
45799a2dd95SBruce Richardson 			fd);
45899a2dd95SBruce Richardson 
45999a2dd95SBruce Richardson 	return 0;
46099a2dd95SBruce Richardson }
46199a2dd95SBruce Richardson 
46299a2dd95SBruce Richardson /*
46399a2dd95SBruce Richardson  * Defining here because declared in rte_memory.h, but the actual implementation
46499a2dd95SBruce Richardson  * is in eal_common_memalloc.c, like all other memalloc internals.
46599a2dd95SBruce Richardson  */
46699a2dd95SBruce Richardson int
46799a2dd95SBruce Richardson rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
46899a2dd95SBruce Richardson 		void *arg)
46999a2dd95SBruce Richardson {
47099a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
47199a2dd95SBruce Richardson 		eal_get_internal_configuration();
47299a2dd95SBruce Richardson 
47399a2dd95SBruce Richardson 	/* FreeBSD boots with legacy mem enabled by default */
47499a2dd95SBruce Richardson 	if (internal_conf->legacy_mem) {
475ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Registering mem event callbacks not supported");
47699a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
47799a2dd95SBruce Richardson 		return -1;
47899a2dd95SBruce Richardson 	}
47999a2dd95SBruce Richardson 	return eal_memalloc_mem_event_callback_register(name, clb, arg);
48099a2dd95SBruce Richardson }
48199a2dd95SBruce Richardson 
48299a2dd95SBruce Richardson int
48399a2dd95SBruce Richardson rte_mem_event_callback_unregister(const char *name, void *arg)
48499a2dd95SBruce Richardson {
48599a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
48699a2dd95SBruce Richardson 		eal_get_internal_configuration();
48799a2dd95SBruce Richardson 
48899a2dd95SBruce Richardson 	/* FreeBSD boots with legacy mem enabled by default */
48999a2dd95SBruce Richardson 	if (internal_conf->legacy_mem) {
490ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Registering mem event callbacks not supported");
49199a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
49299a2dd95SBruce Richardson 		return -1;
49399a2dd95SBruce Richardson 	}
49499a2dd95SBruce Richardson 	return eal_memalloc_mem_event_callback_unregister(name, arg);
49599a2dd95SBruce Richardson }
49699a2dd95SBruce Richardson 
49799a2dd95SBruce Richardson int
49899a2dd95SBruce Richardson rte_mem_alloc_validator_register(const char *name,
49999a2dd95SBruce Richardson 		rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
50099a2dd95SBruce Richardson {
50199a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
50299a2dd95SBruce Richardson 		eal_get_internal_configuration();
50399a2dd95SBruce Richardson 
50499a2dd95SBruce Richardson 	/* FreeBSD boots with legacy mem enabled by default */
50599a2dd95SBruce Richardson 	if (internal_conf->legacy_mem) {
506ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Registering mem alloc validators not supported");
50799a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
50899a2dd95SBruce Richardson 		return -1;
50999a2dd95SBruce Richardson 	}
51099a2dd95SBruce Richardson 	return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
51199a2dd95SBruce Richardson 			limit);
51299a2dd95SBruce Richardson }
51399a2dd95SBruce Richardson 
51499a2dd95SBruce Richardson int
51599a2dd95SBruce Richardson rte_mem_alloc_validator_unregister(const char *name, int socket_id)
51699a2dd95SBruce Richardson {
51799a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
51899a2dd95SBruce Richardson 		eal_get_internal_configuration();
51999a2dd95SBruce Richardson 
52099a2dd95SBruce Richardson 	/* FreeBSD boots with legacy mem enabled by default */
52199a2dd95SBruce Richardson 	if (internal_conf->legacy_mem) {
522ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Registering mem alloc validators not supported");
52399a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
52499a2dd95SBruce Richardson 		return -1;
52599a2dd95SBruce Richardson 	}
52699a2dd95SBruce Richardson 	return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
52799a2dd95SBruce Richardson }
52899a2dd95SBruce Richardson 
52999a2dd95SBruce Richardson /* Dump the physical memory layout on console */
53099a2dd95SBruce Richardson void
53199a2dd95SBruce Richardson rte_dump_physmem_layout(FILE *f)
53299a2dd95SBruce Richardson {
53399a2dd95SBruce Richardson 	rte_memseg_walk(dump_memseg, f);
534*17bb6004SGagandeep Singh 	fprintf(f, "Total Memory Segments size = %"PRIu64"M\n",
535*17bb6004SGagandeep Singh 		rte_eal_get_physmem_size() / (1024 * 1024));
53699a2dd95SBruce Richardson }
53799a2dd95SBruce Richardson 
53899a2dd95SBruce Richardson static int
53999a2dd95SBruce Richardson check_iova(const struct rte_memseg_list *msl __rte_unused,
54099a2dd95SBruce Richardson 		const struct rte_memseg *ms, void *arg)
54199a2dd95SBruce Richardson {
54299a2dd95SBruce Richardson 	uint64_t *mask = arg;
54399a2dd95SBruce Richardson 	rte_iova_t iova;
54499a2dd95SBruce Richardson 
54599a2dd95SBruce Richardson 	/* higher address within segment */
54699a2dd95SBruce Richardson 	iova = (ms->iova + ms->len) - 1;
54799a2dd95SBruce Richardson 	if (!(iova & *mask))
54899a2dd95SBruce Richardson 		return 0;
54999a2dd95SBruce Richardson 
550ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "memseg iova %"PRIx64", len %zx, out of range",
55199a2dd95SBruce Richardson 			    ms->iova, ms->len);
55299a2dd95SBruce Richardson 
553ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "\tusing dma mask %"PRIx64, *mask);
55499a2dd95SBruce Richardson 	return 1;
55599a2dd95SBruce Richardson }
55699a2dd95SBruce Richardson 
55799a2dd95SBruce Richardson #define MAX_DMA_MASK_BITS 63
55899a2dd95SBruce Richardson 
55999a2dd95SBruce Richardson /* check memseg iovas are within the required range based on dma mask */
56099a2dd95SBruce Richardson static int
56199a2dd95SBruce Richardson check_dma_mask(uint8_t maskbits, bool thread_unsafe)
56299a2dd95SBruce Richardson {
56399a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
56499a2dd95SBruce Richardson 	uint64_t mask;
56599a2dd95SBruce Richardson 	int ret;
56699a2dd95SBruce Richardson 
56799a2dd95SBruce Richardson 	/* Sanity check. We only check width can be managed with 64 bits
56899a2dd95SBruce Richardson 	 * variables. Indeed any higher value is likely wrong. */
56999a2dd95SBruce Richardson 	if (maskbits > MAX_DMA_MASK_BITS) {
570ae67895bSDavid Marchand 		EAL_LOG(ERR, "wrong dma mask size %u (Max: %u)",
57199a2dd95SBruce Richardson 				   maskbits, MAX_DMA_MASK_BITS);
57299a2dd95SBruce Richardson 		return -1;
57399a2dd95SBruce Richardson 	}
57499a2dd95SBruce Richardson 
57599a2dd95SBruce Richardson 	/* create dma mask */
57699a2dd95SBruce Richardson 	mask = ~((1ULL << maskbits) - 1);
57799a2dd95SBruce Richardson 
57899a2dd95SBruce Richardson 	if (thread_unsafe)
57999a2dd95SBruce Richardson 		ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
58099a2dd95SBruce Richardson 	else
58199a2dd95SBruce Richardson 		ret = rte_memseg_walk(check_iova, &mask);
58299a2dd95SBruce Richardson 
58399a2dd95SBruce Richardson 	if (ret)
58499a2dd95SBruce Richardson 		/*
58599a2dd95SBruce Richardson 		 * Dma mask precludes hugepage usage.
58699a2dd95SBruce Richardson 		 * This device can not be used and we do not need to keep
58799a2dd95SBruce Richardson 		 * the dma mask.
58899a2dd95SBruce Richardson 		 */
58999a2dd95SBruce Richardson 		return 1;
59099a2dd95SBruce Richardson 
59199a2dd95SBruce Richardson 	/*
59299a2dd95SBruce Richardson 	 * we need to keep the more restricted maskbit for checking
59399a2dd95SBruce Richardson 	 * potential dynamic memory allocation in the future.
59499a2dd95SBruce Richardson 	 */
59599a2dd95SBruce Richardson 	mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
59699a2dd95SBruce Richardson 			     RTE_MIN(mcfg->dma_maskbits, maskbits);
59799a2dd95SBruce Richardson 
59899a2dd95SBruce Richardson 	return 0;
59999a2dd95SBruce Richardson }
60099a2dd95SBruce Richardson 
60199a2dd95SBruce Richardson int
60299a2dd95SBruce Richardson rte_mem_check_dma_mask(uint8_t maskbits)
60399a2dd95SBruce Richardson {
60499a2dd95SBruce Richardson 	return check_dma_mask(maskbits, false);
60599a2dd95SBruce Richardson }
60699a2dd95SBruce Richardson 
60799a2dd95SBruce Richardson int
60899a2dd95SBruce Richardson rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
60999a2dd95SBruce Richardson {
61099a2dd95SBruce Richardson 	return check_dma_mask(maskbits, true);
61199a2dd95SBruce Richardson }
61299a2dd95SBruce Richardson 
61399a2dd95SBruce Richardson /*
61499a2dd95SBruce Richardson  * Set dma mask to use when memory initialization is done.
61599a2dd95SBruce Richardson  *
61699a2dd95SBruce Richardson  * This function should ONLY be used by code executed before the memory
61799a2dd95SBruce Richardson  * initialization. PMDs should use rte_mem_check_dma_mask if addressing
61899a2dd95SBruce Richardson  * limitations by the device.
61999a2dd95SBruce Richardson  */
62099a2dd95SBruce Richardson void
62199a2dd95SBruce Richardson rte_mem_set_dma_mask(uint8_t maskbits)
62299a2dd95SBruce Richardson {
62399a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
62499a2dd95SBruce Richardson 
62599a2dd95SBruce Richardson 	mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
62699a2dd95SBruce Richardson 			     RTE_MIN(mcfg->dma_maskbits, maskbits);
62799a2dd95SBruce Richardson }
62899a2dd95SBruce Richardson 
62999a2dd95SBruce Richardson /* return the number of memory channels */
63099a2dd95SBruce Richardson unsigned rte_memory_get_nchannel(void)
63199a2dd95SBruce Richardson {
63299a2dd95SBruce Richardson 	return rte_eal_get_configuration()->mem_config->nchannel;
63399a2dd95SBruce Richardson }
63499a2dd95SBruce Richardson 
63599a2dd95SBruce Richardson /* return the number of memory rank */
63699a2dd95SBruce Richardson unsigned rte_memory_get_nrank(void)
63799a2dd95SBruce Richardson {
63899a2dd95SBruce Richardson 	return rte_eal_get_configuration()->mem_config->nrank;
63999a2dd95SBruce Richardson }
64099a2dd95SBruce Richardson 
64199a2dd95SBruce Richardson static int
64299a2dd95SBruce Richardson rte_eal_memdevice_init(void)
64399a2dd95SBruce Richardson {
64499a2dd95SBruce Richardson 	struct rte_config *config;
64599a2dd95SBruce Richardson 	const struct internal_config *internal_conf;
64699a2dd95SBruce Richardson 
64799a2dd95SBruce Richardson 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
64899a2dd95SBruce Richardson 		return 0;
64999a2dd95SBruce Richardson 
65099a2dd95SBruce Richardson 	internal_conf = eal_get_internal_configuration();
65199a2dd95SBruce Richardson 	config = rte_eal_get_configuration();
65299a2dd95SBruce Richardson 	config->mem_config->nchannel = internal_conf->force_nchannel;
65399a2dd95SBruce Richardson 	config->mem_config->nrank = internal_conf->force_nrank;
65499a2dd95SBruce Richardson 
65599a2dd95SBruce Richardson 	return 0;
65699a2dd95SBruce Richardson }
65799a2dd95SBruce Richardson 
65899a2dd95SBruce Richardson /* Lock page in physical memory and prevent from swapping. */
65999a2dd95SBruce Richardson int
66099a2dd95SBruce Richardson rte_mem_lock_page(const void *virt)
66199a2dd95SBruce Richardson {
66299a2dd95SBruce Richardson 	uintptr_t virtual = (uintptr_t)virt;
66399a2dd95SBruce Richardson 	size_t page_size = rte_mem_page_size();
66499a2dd95SBruce Richardson 	uintptr_t aligned = RTE_PTR_ALIGN_FLOOR(virtual, page_size);
66599a2dd95SBruce Richardson 	return rte_mem_lock((void *)aligned, page_size);
66699a2dd95SBruce Richardson }
66799a2dd95SBruce Richardson 
66899a2dd95SBruce Richardson int
66999a2dd95SBruce Richardson rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
67099a2dd95SBruce Richardson {
67199a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
67299a2dd95SBruce Richardson 	int i, ms_idx, ret = 0;
67399a2dd95SBruce Richardson 
67499a2dd95SBruce Richardson 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
67599a2dd95SBruce Richardson 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
67699a2dd95SBruce Richardson 		const struct rte_memseg *ms;
67799a2dd95SBruce Richardson 		struct rte_fbarray *arr;
67899a2dd95SBruce Richardson 
67999a2dd95SBruce Richardson 		if (msl->memseg_arr.count == 0)
68099a2dd95SBruce Richardson 			continue;
68199a2dd95SBruce Richardson 
68299a2dd95SBruce Richardson 		arr = &msl->memseg_arr;
68399a2dd95SBruce Richardson 
68499a2dd95SBruce Richardson 		ms_idx = rte_fbarray_find_next_used(arr, 0);
68599a2dd95SBruce Richardson 		while (ms_idx >= 0) {
68699a2dd95SBruce Richardson 			int n_segs;
68799a2dd95SBruce Richardson 			size_t len;
68899a2dd95SBruce Richardson 
68999a2dd95SBruce Richardson 			ms = rte_fbarray_get(arr, ms_idx);
69099a2dd95SBruce Richardson 
69199a2dd95SBruce Richardson 			/* find how many more segments there are, starting with
69299a2dd95SBruce Richardson 			 * this one.
69399a2dd95SBruce Richardson 			 */
69499a2dd95SBruce Richardson 			n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
69599a2dd95SBruce Richardson 			len = n_segs * msl->page_sz;
69699a2dd95SBruce Richardson 
69799a2dd95SBruce Richardson 			ret = func(msl, ms, len, arg);
69899a2dd95SBruce Richardson 			if (ret)
69999a2dd95SBruce Richardson 				return ret;
70099a2dd95SBruce Richardson 			ms_idx = rte_fbarray_find_next_used(arr,
70199a2dd95SBruce Richardson 					ms_idx + n_segs);
70299a2dd95SBruce Richardson 		}
70399a2dd95SBruce Richardson 	}
70499a2dd95SBruce Richardson 	return 0;
70599a2dd95SBruce Richardson }
70699a2dd95SBruce Richardson 
70799a2dd95SBruce Richardson int
70899a2dd95SBruce Richardson rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
70999a2dd95SBruce Richardson {
71099a2dd95SBruce Richardson 	int ret = 0;
71199a2dd95SBruce Richardson 
71299a2dd95SBruce Richardson 	/* do not allow allocations/frees/init while we iterate */
71399a2dd95SBruce Richardson 	rte_mcfg_mem_read_lock();
71499a2dd95SBruce Richardson 	ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
71599a2dd95SBruce Richardson 	rte_mcfg_mem_read_unlock();
71699a2dd95SBruce Richardson 
71799a2dd95SBruce Richardson 	return ret;
71899a2dd95SBruce Richardson }
71999a2dd95SBruce Richardson 
72099a2dd95SBruce Richardson int
72199a2dd95SBruce Richardson rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
72299a2dd95SBruce Richardson {
72399a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
72499a2dd95SBruce Richardson 	int i, ms_idx, ret = 0;
72599a2dd95SBruce Richardson 
72699a2dd95SBruce Richardson 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
72799a2dd95SBruce Richardson 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
72899a2dd95SBruce Richardson 		const struct rte_memseg *ms;
72999a2dd95SBruce Richardson 		struct rte_fbarray *arr;
73099a2dd95SBruce Richardson 
73199a2dd95SBruce Richardson 		if (msl->memseg_arr.count == 0)
73299a2dd95SBruce Richardson 			continue;
73399a2dd95SBruce Richardson 
73499a2dd95SBruce Richardson 		arr = &msl->memseg_arr;
73599a2dd95SBruce Richardson 
73699a2dd95SBruce Richardson 		ms_idx = rte_fbarray_find_next_used(arr, 0);
73799a2dd95SBruce Richardson 		while (ms_idx >= 0) {
73899a2dd95SBruce Richardson 			ms = rte_fbarray_get(arr, ms_idx);
73999a2dd95SBruce Richardson 			ret = func(msl, ms, arg);
74099a2dd95SBruce Richardson 			if (ret)
74199a2dd95SBruce Richardson 				return ret;
74299a2dd95SBruce Richardson 			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
74399a2dd95SBruce Richardson 		}
74499a2dd95SBruce Richardson 	}
74599a2dd95SBruce Richardson 	return 0;
74699a2dd95SBruce Richardson }
74799a2dd95SBruce Richardson 
74899a2dd95SBruce Richardson int
74999a2dd95SBruce Richardson rte_memseg_walk(rte_memseg_walk_t func, void *arg)
75099a2dd95SBruce Richardson {
75199a2dd95SBruce Richardson 	int ret = 0;
75299a2dd95SBruce Richardson 
75399a2dd95SBruce Richardson 	/* do not allow allocations/frees/init while we iterate */
75499a2dd95SBruce Richardson 	rte_mcfg_mem_read_lock();
75599a2dd95SBruce Richardson 	ret = rte_memseg_walk_thread_unsafe(func, arg);
75699a2dd95SBruce Richardson 	rte_mcfg_mem_read_unlock();
75799a2dd95SBruce Richardson 
75899a2dd95SBruce Richardson 	return ret;
75999a2dd95SBruce Richardson }
76099a2dd95SBruce Richardson 
76199a2dd95SBruce Richardson int
76299a2dd95SBruce Richardson rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
76399a2dd95SBruce Richardson {
76499a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
76599a2dd95SBruce Richardson 	int i, ret = 0;
76699a2dd95SBruce Richardson 
76799a2dd95SBruce Richardson 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
76899a2dd95SBruce Richardson 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
76999a2dd95SBruce Richardson 
77099a2dd95SBruce Richardson 		if (msl->base_va == NULL)
77199a2dd95SBruce Richardson 			continue;
77299a2dd95SBruce Richardson 
77399a2dd95SBruce Richardson 		ret = func(msl, arg);
77499a2dd95SBruce Richardson 		if (ret)
77599a2dd95SBruce Richardson 			return ret;
77699a2dd95SBruce Richardson 	}
77799a2dd95SBruce Richardson 	return 0;
77899a2dd95SBruce Richardson }
77999a2dd95SBruce Richardson 
78099a2dd95SBruce Richardson int
78199a2dd95SBruce Richardson rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
78299a2dd95SBruce Richardson {
78399a2dd95SBruce Richardson 	int ret = 0;
78499a2dd95SBruce Richardson 
78599a2dd95SBruce Richardson 	/* do not allow allocations/frees/init while we iterate */
78699a2dd95SBruce Richardson 	rte_mcfg_mem_read_lock();
78799a2dd95SBruce Richardson 	ret = rte_memseg_list_walk_thread_unsafe(func, arg);
78899a2dd95SBruce Richardson 	rte_mcfg_mem_read_unlock();
78999a2dd95SBruce Richardson 
79099a2dd95SBruce Richardson 	return ret;
79199a2dd95SBruce Richardson }
79299a2dd95SBruce Richardson 
79399a2dd95SBruce Richardson int
79499a2dd95SBruce Richardson rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
79599a2dd95SBruce Richardson {
79699a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
79799a2dd95SBruce Richardson 	struct rte_memseg_list *msl;
79899a2dd95SBruce Richardson 	struct rte_fbarray *arr;
79999a2dd95SBruce Richardson 	int msl_idx, seg_idx, ret;
80099a2dd95SBruce Richardson 
80199a2dd95SBruce Richardson 	if (ms == NULL) {
80299a2dd95SBruce Richardson 		rte_errno = EINVAL;
80399a2dd95SBruce Richardson 		return -1;
80499a2dd95SBruce Richardson 	}
80599a2dd95SBruce Richardson 
80699a2dd95SBruce Richardson 	msl = rte_mem_virt2memseg_list(ms->addr);
80799a2dd95SBruce Richardson 	if (msl == NULL) {
80899a2dd95SBruce Richardson 		rte_errno = EINVAL;
80999a2dd95SBruce Richardson 		return -1;
81099a2dd95SBruce Richardson 	}
81199a2dd95SBruce Richardson 	arr = &msl->memseg_arr;
81299a2dd95SBruce Richardson 
81399a2dd95SBruce Richardson 	msl_idx = msl - mcfg->memsegs;
81499a2dd95SBruce Richardson 	seg_idx = rte_fbarray_find_idx(arr, ms);
81599a2dd95SBruce Richardson 
81699a2dd95SBruce Richardson 	if (!rte_fbarray_is_used(arr, seg_idx)) {
81799a2dd95SBruce Richardson 		rte_errno = ENOENT;
81899a2dd95SBruce Richardson 		return -1;
81999a2dd95SBruce Richardson 	}
82099a2dd95SBruce Richardson 
82199a2dd95SBruce Richardson 	/* segment fd API is not supported for external segments */
82299a2dd95SBruce Richardson 	if (msl->external) {
82399a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
82499a2dd95SBruce Richardson 		return -1;
82599a2dd95SBruce Richardson 	}
82699a2dd95SBruce Richardson 
82799a2dd95SBruce Richardson 	ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
82899a2dd95SBruce Richardson 	if (ret < 0) {
82999a2dd95SBruce Richardson 		rte_errno = -ret;
83099a2dd95SBruce Richardson 		ret = -1;
83199a2dd95SBruce Richardson 	}
83299a2dd95SBruce Richardson 	return ret;
83399a2dd95SBruce Richardson }
83499a2dd95SBruce Richardson 
83599a2dd95SBruce Richardson int
83699a2dd95SBruce Richardson rte_memseg_get_fd(const struct rte_memseg *ms)
83799a2dd95SBruce Richardson {
83899a2dd95SBruce Richardson 	int ret;
83999a2dd95SBruce Richardson 
84099a2dd95SBruce Richardson 	rte_mcfg_mem_read_lock();
84199a2dd95SBruce Richardson 	ret = rte_memseg_get_fd_thread_unsafe(ms);
84299a2dd95SBruce Richardson 	rte_mcfg_mem_read_unlock();
84399a2dd95SBruce Richardson 
84499a2dd95SBruce Richardson 	return ret;
84599a2dd95SBruce Richardson }
84699a2dd95SBruce Richardson 
84799a2dd95SBruce Richardson int
84899a2dd95SBruce Richardson rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
84999a2dd95SBruce Richardson 		size_t *offset)
85099a2dd95SBruce Richardson {
85199a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
85299a2dd95SBruce Richardson 	struct rte_memseg_list *msl;
85399a2dd95SBruce Richardson 	struct rte_fbarray *arr;
85499a2dd95SBruce Richardson 	int msl_idx, seg_idx, ret;
85599a2dd95SBruce Richardson 
85699a2dd95SBruce Richardson 	if (ms == NULL || offset == NULL) {
85799a2dd95SBruce Richardson 		rte_errno = EINVAL;
85899a2dd95SBruce Richardson 		return -1;
85999a2dd95SBruce Richardson 	}
86099a2dd95SBruce Richardson 
86199a2dd95SBruce Richardson 	msl = rte_mem_virt2memseg_list(ms->addr);
86299a2dd95SBruce Richardson 	if (msl == NULL) {
86399a2dd95SBruce Richardson 		rte_errno = EINVAL;
86499a2dd95SBruce Richardson 		return -1;
86599a2dd95SBruce Richardson 	}
86699a2dd95SBruce Richardson 	arr = &msl->memseg_arr;
86799a2dd95SBruce Richardson 
86899a2dd95SBruce Richardson 	msl_idx = msl - mcfg->memsegs;
86999a2dd95SBruce Richardson 	seg_idx = rte_fbarray_find_idx(arr, ms);
87099a2dd95SBruce Richardson 
87199a2dd95SBruce Richardson 	if (!rte_fbarray_is_used(arr, seg_idx)) {
87299a2dd95SBruce Richardson 		rte_errno = ENOENT;
87399a2dd95SBruce Richardson 		return -1;
87499a2dd95SBruce Richardson 	}
87599a2dd95SBruce Richardson 
87699a2dd95SBruce Richardson 	/* segment fd API is not supported for external segments */
87799a2dd95SBruce Richardson 	if (msl->external) {
87899a2dd95SBruce Richardson 		rte_errno = ENOTSUP;
87999a2dd95SBruce Richardson 		return -1;
88099a2dd95SBruce Richardson 	}
88199a2dd95SBruce Richardson 
88299a2dd95SBruce Richardson 	ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
88399a2dd95SBruce Richardson 	if (ret < 0) {
88499a2dd95SBruce Richardson 		rte_errno = -ret;
88599a2dd95SBruce Richardson 		ret = -1;
88699a2dd95SBruce Richardson 	}
88799a2dd95SBruce Richardson 	return ret;
88899a2dd95SBruce Richardson }
88999a2dd95SBruce Richardson 
89099a2dd95SBruce Richardson int
89199a2dd95SBruce Richardson rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
89299a2dd95SBruce Richardson {
89399a2dd95SBruce Richardson 	int ret;
89499a2dd95SBruce Richardson 
89599a2dd95SBruce Richardson 	rte_mcfg_mem_read_lock();
89699a2dd95SBruce Richardson 	ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
89799a2dd95SBruce Richardson 	rte_mcfg_mem_read_unlock();
89899a2dd95SBruce Richardson 
89999a2dd95SBruce Richardson 	return ret;
90099a2dd95SBruce Richardson }
90199a2dd95SBruce Richardson 
90299a2dd95SBruce Richardson int
90399a2dd95SBruce Richardson rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
90499a2dd95SBruce Richardson 		unsigned int n_pages, size_t page_sz)
90599a2dd95SBruce Richardson {
90699a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
90799a2dd95SBruce Richardson 	unsigned int socket_id, n;
90899a2dd95SBruce Richardson 	int ret = 0;
90999a2dd95SBruce Richardson 
91099a2dd95SBruce Richardson 	if (va_addr == NULL || page_sz == 0 || len == 0 ||
91199a2dd95SBruce Richardson 			!rte_is_power_of_2(page_sz) ||
91299a2dd95SBruce Richardson 			RTE_ALIGN(len, page_sz) != len ||
91399a2dd95SBruce Richardson 			((len / page_sz) != n_pages && iova_addrs != NULL) ||
91499a2dd95SBruce Richardson 			!rte_is_aligned(va_addr, page_sz)) {
91599a2dd95SBruce Richardson 		rte_errno = EINVAL;
91699a2dd95SBruce Richardson 		return -1;
91799a2dd95SBruce Richardson 	}
91899a2dd95SBruce Richardson 	rte_mcfg_mem_write_lock();
91999a2dd95SBruce Richardson 
92099a2dd95SBruce Richardson 	/* make sure the segment doesn't already exist */
92199a2dd95SBruce Richardson 	if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
92299a2dd95SBruce Richardson 		rte_errno = EEXIST;
92399a2dd95SBruce Richardson 		ret = -1;
92499a2dd95SBruce Richardson 		goto unlock;
92599a2dd95SBruce Richardson 	}
92699a2dd95SBruce Richardson 
92799a2dd95SBruce Richardson 	/* get next available socket ID */
92899a2dd95SBruce Richardson 	socket_id = mcfg->next_socket_id;
92999a2dd95SBruce Richardson 	if (socket_id > INT32_MAX) {
930ae67895bSDavid Marchand 		EAL_LOG(ERR, "Cannot assign new socket ID's");
93199a2dd95SBruce Richardson 		rte_errno = ENOSPC;
93299a2dd95SBruce Richardson 		ret = -1;
93399a2dd95SBruce Richardson 		goto unlock;
93499a2dd95SBruce Richardson 	}
93599a2dd95SBruce Richardson 
93699a2dd95SBruce Richardson 	/* we can create a new memseg */
93799a2dd95SBruce Richardson 	n = len / page_sz;
93899a2dd95SBruce Richardson 	if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
93999a2dd95SBruce Richardson 			page_sz, "extmem", socket_id) == NULL) {
94099a2dd95SBruce Richardson 		ret = -1;
94199a2dd95SBruce Richardson 		goto unlock;
94299a2dd95SBruce Richardson 	}
94399a2dd95SBruce Richardson 
94499a2dd95SBruce Richardson 	/* memseg list successfully created - increment next socket ID */
94599a2dd95SBruce Richardson 	mcfg->next_socket_id++;
94699a2dd95SBruce Richardson unlock:
94799a2dd95SBruce Richardson 	rte_mcfg_mem_write_unlock();
94899a2dd95SBruce Richardson 	return ret;
94999a2dd95SBruce Richardson }
95099a2dd95SBruce Richardson 
95199a2dd95SBruce Richardson int
95299a2dd95SBruce Richardson rte_extmem_unregister(void *va_addr, size_t len)
95399a2dd95SBruce Richardson {
95499a2dd95SBruce Richardson 	struct rte_memseg_list *msl;
95599a2dd95SBruce Richardson 	int ret = 0;
95699a2dd95SBruce Richardson 
95799a2dd95SBruce Richardson 	if (va_addr == NULL || len == 0) {
95899a2dd95SBruce Richardson 		rte_errno = EINVAL;
95999a2dd95SBruce Richardson 		return -1;
96099a2dd95SBruce Richardson 	}
96199a2dd95SBruce Richardson 	rte_mcfg_mem_write_lock();
96299a2dd95SBruce Richardson 
96399a2dd95SBruce Richardson 	/* find our segment */
96499a2dd95SBruce Richardson 	msl = malloc_heap_find_external_seg(va_addr, len);
96599a2dd95SBruce Richardson 	if (msl == NULL) {
96699a2dd95SBruce Richardson 		rte_errno = ENOENT;
96799a2dd95SBruce Richardson 		ret = -1;
96899a2dd95SBruce Richardson 		goto unlock;
96999a2dd95SBruce Richardson 	}
97099a2dd95SBruce Richardson 
97199a2dd95SBruce Richardson 	ret = malloc_heap_destroy_external_seg(msl);
97299a2dd95SBruce Richardson unlock:
97399a2dd95SBruce Richardson 	rte_mcfg_mem_write_unlock();
97499a2dd95SBruce Richardson 	return ret;
97599a2dd95SBruce Richardson }
97699a2dd95SBruce Richardson 
97799a2dd95SBruce Richardson static int
97899a2dd95SBruce Richardson sync_memory(void *va_addr, size_t len, bool attach)
97999a2dd95SBruce Richardson {
98099a2dd95SBruce Richardson 	struct rte_memseg_list *msl;
98199a2dd95SBruce Richardson 	int ret = 0;
98299a2dd95SBruce Richardson 
98399a2dd95SBruce Richardson 	if (va_addr == NULL || len == 0) {
98499a2dd95SBruce Richardson 		rte_errno = EINVAL;
98599a2dd95SBruce Richardson 		return -1;
98699a2dd95SBruce Richardson 	}
98799a2dd95SBruce Richardson 	rte_mcfg_mem_write_lock();
98899a2dd95SBruce Richardson 
98999a2dd95SBruce Richardson 	/* find our segment */
99099a2dd95SBruce Richardson 	msl = malloc_heap_find_external_seg(va_addr, len);
99199a2dd95SBruce Richardson 	if (msl == NULL) {
99299a2dd95SBruce Richardson 		rte_errno = ENOENT;
99399a2dd95SBruce Richardson 		ret = -1;
99499a2dd95SBruce Richardson 		goto unlock;
99599a2dd95SBruce Richardson 	}
99699a2dd95SBruce Richardson 	if (attach)
99799a2dd95SBruce Richardson 		ret = rte_fbarray_attach(&msl->memseg_arr);
99899a2dd95SBruce Richardson 	else
99999a2dd95SBruce Richardson 		ret = rte_fbarray_detach(&msl->memseg_arr);
100099a2dd95SBruce Richardson 
100199a2dd95SBruce Richardson unlock:
100299a2dd95SBruce Richardson 	rte_mcfg_mem_write_unlock();
100399a2dd95SBruce Richardson 	return ret;
100499a2dd95SBruce Richardson }
100599a2dd95SBruce Richardson 
100699a2dd95SBruce Richardson int
100799a2dd95SBruce Richardson rte_extmem_attach(void *va_addr, size_t len)
100899a2dd95SBruce Richardson {
100999a2dd95SBruce Richardson 	return sync_memory(va_addr, len, true);
101099a2dd95SBruce Richardson }
101199a2dd95SBruce Richardson 
101299a2dd95SBruce Richardson int
101399a2dd95SBruce Richardson rte_extmem_detach(void *va_addr, size_t len)
101499a2dd95SBruce Richardson {
101599a2dd95SBruce Richardson 	return sync_memory(va_addr, len, false);
101699a2dd95SBruce Richardson }
101799a2dd95SBruce Richardson 
101899a2dd95SBruce Richardson /* detach all EAL memory */
101999a2dd95SBruce Richardson int
102099a2dd95SBruce Richardson rte_eal_memory_detach(void)
102199a2dd95SBruce Richardson {
102299a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
102399a2dd95SBruce Richardson 		eal_get_internal_configuration();
102499a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
102599a2dd95SBruce Richardson 	size_t page_sz = rte_mem_page_size();
102699a2dd95SBruce Richardson 	unsigned int i;
102799a2dd95SBruce Richardson 
102899a2dd95SBruce Richardson 	if (internal_conf->in_memory == 1)
102999a2dd95SBruce Richardson 		return 0;
103099a2dd95SBruce Richardson 
103199a2dd95SBruce Richardson 	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
103299a2dd95SBruce Richardson 
103399a2dd95SBruce Richardson 	/* detach internal memory subsystem data first */
103499a2dd95SBruce Richardson 	if (eal_memalloc_cleanup())
1035ae67895bSDavid Marchand 		EAL_LOG(ERR, "Could not release memory subsystem data");
103699a2dd95SBruce Richardson 
103799a2dd95SBruce Richardson 	for (i = 0; i < RTE_DIM(mcfg->memsegs); i++) {
103899a2dd95SBruce Richardson 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
103999a2dd95SBruce Richardson 
104099a2dd95SBruce Richardson 		/* skip uninitialized segments */
104199a2dd95SBruce Richardson 		if (msl->base_va == NULL)
104299a2dd95SBruce Richardson 			continue;
104399a2dd95SBruce Richardson 		/*
104499a2dd95SBruce Richardson 		 * external segments are supposed to be detached at this point,
104599a2dd95SBruce Richardson 		 * but if they aren't, we can't really do anything about it,
104699a2dd95SBruce Richardson 		 * because if we skip them here, they'll become invalid after
104799a2dd95SBruce Richardson 		 * we unmap the memconfig anyway. however, if this is externally
104899a2dd95SBruce Richardson 		 * referenced memory, we have no business unmapping it.
104999a2dd95SBruce Richardson 		 */
105099a2dd95SBruce Richardson 		if (!msl->external)
105199a2dd95SBruce Richardson 			if (rte_mem_unmap(msl->base_va, msl->len) != 0)
1052ae67895bSDavid Marchand 				EAL_LOG(ERR, "Could not unmap memory: %s",
105399a2dd95SBruce Richardson 						rte_strerror(rte_errno));
105499a2dd95SBruce Richardson 
105599a2dd95SBruce Richardson 		/*
105699a2dd95SBruce Richardson 		 * we are detaching the fbarray rather than destroying because
105799a2dd95SBruce Richardson 		 * other processes might still reference this fbarray, and we
105899a2dd95SBruce Richardson 		 * have no way of knowing if they still do.
105999a2dd95SBruce Richardson 		 */
106099a2dd95SBruce Richardson 		if (rte_fbarray_detach(&msl->memseg_arr))
1061ae67895bSDavid Marchand 			EAL_LOG(ERR, "Could not detach fbarray: %s",
106299a2dd95SBruce Richardson 					rte_strerror(rte_errno));
106399a2dd95SBruce Richardson 	}
106499a2dd95SBruce Richardson 	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
106599a2dd95SBruce Richardson 
106699a2dd95SBruce Richardson 	/*
106799a2dd95SBruce Richardson 	 * we've detached the memseg lists, so we can unmap the shared mem
106899a2dd95SBruce Richardson 	 * config - we can't zero it out because it might still be referenced
106999a2dd95SBruce Richardson 	 * by other processes.
107099a2dd95SBruce Richardson 	 */
107199a2dd95SBruce Richardson 	if (internal_conf->no_shconf == 0 && mcfg->mem_cfg_addr != 0) {
107299a2dd95SBruce Richardson 		if (rte_mem_unmap(mcfg, RTE_ALIGN(sizeof(*mcfg), page_sz)) != 0)
1073ae67895bSDavid Marchand 			EAL_LOG(ERR, "Could not unmap shared memory config: %s",
107499a2dd95SBruce Richardson 					rte_strerror(rte_errno));
107599a2dd95SBruce Richardson 	}
107699a2dd95SBruce Richardson 	rte_eal_get_configuration()->mem_config = NULL;
107799a2dd95SBruce Richardson 
107899a2dd95SBruce Richardson 	return 0;
107999a2dd95SBruce Richardson }
108099a2dd95SBruce Richardson 
108199a2dd95SBruce Richardson /* init memory subsystem */
108299a2dd95SBruce Richardson int
108399a2dd95SBruce Richardson rte_eal_memory_init(void)
108499a2dd95SBruce Richardson {
108599a2dd95SBruce Richardson 	const struct internal_config *internal_conf =
108699a2dd95SBruce Richardson 		eal_get_internal_configuration();
108799a2dd95SBruce Richardson 	int retval;
10882e2f0272SDavid Marchand 
1089ae67895bSDavid Marchand 	EAL_LOG(DEBUG, "Setting up physically contiguous memory...");
109099a2dd95SBruce Richardson 
109199a2dd95SBruce Richardson 	if (rte_eal_memseg_init() < 0)
109299a2dd95SBruce Richardson 		goto fail;
109399a2dd95SBruce Richardson 
109499a2dd95SBruce Richardson 	if (eal_memalloc_init() < 0)
109599a2dd95SBruce Richardson 		goto fail;
109699a2dd95SBruce Richardson 
109799a2dd95SBruce Richardson 	retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
109899a2dd95SBruce Richardson 			rte_eal_hugepage_init() :
109999a2dd95SBruce Richardson 			rte_eal_hugepage_attach();
110099a2dd95SBruce Richardson 	if (retval < 0)
110199a2dd95SBruce Richardson 		goto fail;
110299a2dd95SBruce Richardson 
110399a2dd95SBruce Richardson 	if (internal_conf->no_shconf == 0 && rte_eal_memdevice_init() < 0)
110499a2dd95SBruce Richardson 		goto fail;
110599a2dd95SBruce Richardson 
110699a2dd95SBruce Richardson 	return 0;
110799a2dd95SBruce Richardson fail:
110899a2dd95SBruce Richardson 	return -1;
110999a2dd95SBruce Richardson }
1110e6732d0dSHarman Kalra 
1111e6732d0dSHarman Kalra #ifndef RTE_EXEC_ENV_WINDOWS
1112e6732d0dSHarman Kalra #define EAL_MEMZONE_LIST_REQ		"/eal/memzone_list"
1113e6732d0dSHarman Kalra #define EAL_MEMZONE_INFO_REQ		"/eal/memzone_info"
1114e6732d0dSHarman Kalra #define EAL_HEAP_LIST_REQ		"/eal/heap_list"
1115e6732d0dSHarman Kalra #define EAL_HEAP_INFO_REQ		"/eal/heap_info"
11162054f31aSAmit Prakash Shukla #define EAL_MEMSEG_LISTS_REQ		"/eal/memseg_lists"
11172054f31aSAmit Prakash Shukla #define EAL_MEMSEG_LIST_INFO_REQ	"/eal/memseg_list_info"
11182054f31aSAmit Prakash Shukla #define EAL_MEMSEG_INFO_REQ		"/eal/memseg_info"
11192054f31aSAmit Prakash Shukla #define EAL_ELEMENT_LIST_REQ		"/eal/mem_element_list"
11202054f31aSAmit Prakash Shukla #define EAL_ELEMENT_INFO_REQ		"/eal/mem_element_info"
1121e6732d0dSHarman Kalra #define ADDR_STR			15
1122e6732d0dSHarman Kalra 
11232054f31aSAmit Prakash Shukla 
1124e6732d0dSHarman Kalra /* Telemetry callback handler to return heap stats for requested heap id. */
1125e6732d0dSHarman Kalra static int
1126e6732d0dSHarman Kalra handle_eal_heap_info_request(const char *cmd __rte_unused, const char *params,
1127e6732d0dSHarman Kalra 			     struct rte_tel_data *d)
1128e6732d0dSHarman Kalra {
1129e6732d0dSHarman Kalra 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1130e6732d0dSHarman Kalra 	struct rte_malloc_socket_stats sock_stats;
1131e6732d0dSHarman Kalra 	struct malloc_heap *heap;
1132e6732d0dSHarman Kalra 	unsigned int heap_id;
1133e6732d0dSHarman Kalra 
1134e6732d0dSHarman Kalra 	if (params == NULL || strlen(params) == 0)
1135e6732d0dSHarman Kalra 		return -1;
1136e6732d0dSHarman Kalra 
1137e6732d0dSHarman Kalra 	heap_id = (unsigned int)strtoul(params, NULL, 10);
1138e6732d0dSHarman Kalra 
1139e6732d0dSHarman Kalra 	/* Get the heap stats of user provided heap id */
1140e6732d0dSHarman Kalra 	heap = &mcfg->malloc_heaps[heap_id];
1141e6732d0dSHarman Kalra 	malloc_heap_get_stats(heap, &sock_stats);
1142e6732d0dSHarman Kalra 
1143e6732d0dSHarman Kalra 	rte_tel_data_start_dict(d);
1144a9dc4888SHuisong Li 	rte_tel_data_add_dict_uint(d, "Heap_id", heap_id);
1145e6732d0dSHarman Kalra 	rte_tel_data_add_dict_string(d, "Name", heap->name);
1146af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Heap_size",
1147e6732d0dSHarman Kalra 				   sock_stats.heap_totalsz_bytes);
1148af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Free_size",
1149af0785a2SBruce Richardson 				   sock_stats.heap_freesz_bytes);
1150af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Alloc_size",
1151e6732d0dSHarman Kalra 				   sock_stats.heap_allocsz_bytes);
1152af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Greatest_free_size",
1153e6732d0dSHarman Kalra 				   sock_stats.greatest_free_size);
1154af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Alloc_count", sock_stats.alloc_count);
1155af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Free_count", sock_stats.free_count);
1156e6732d0dSHarman Kalra 
1157e6732d0dSHarman Kalra 	return 0;
1158e6732d0dSHarman Kalra }
1159e6732d0dSHarman Kalra 
1160e6732d0dSHarman Kalra /* Telemetry callback handler to list the heap ids setup. */
1161e6732d0dSHarman Kalra static int
1162e6732d0dSHarman Kalra handle_eal_heap_list_request(const char *cmd __rte_unused,
1163e6732d0dSHarman Kalra 				const char *params __rte_unused,
1164e6732d0dSHarman Kalra 				struct rte_tel_data *d)
1165e6732d0dSHarman Kalra {
1166e6732d0dSHarman Kalra 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1167e6732d0dSHarman Kalra 	struct rte_malloc_socket_stats sock_stats;
1168e6732d0dSHarman Kalra 	unsigned int heap_id;
1169e6732d0dSHarman Kalra 
1170e6732d0dSHarman Kalra 	rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1171e6732d0dSHarman Kalra 	/* Iterate through all initialised heaps */
1172e6732d0dSHarman Kalra 	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
1173e6732d0dSHarman Kalra 		struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
1174e6732d0dSHarman Kalra 
1175e6732d0dSHarman Kalra 		malloc_heap_get_stats(heap, &sock_stats);
1176e6732d0dSHarman Kalra 		if (sock_stats.heap_totalsz_bytes != 0)
1177e6732d0dSHarman Kalra 			rte_tel_data_add_array_int(d, heap_id);
1178e6732d0dSHarman Kalra 	}
1179e6732d0dSHarman Kalra 
1180e6732d0dSHarman Kalra 	return 0;
1181e6732d0dSHarman Kalra }
1182e6732d0dSHarman Kalra 
1183e6732d0dSHarman Kalra /* Telemetry callback handler to return memzone info for requested index. */
1184e6732d0dSHarman Kalra static int
1185e6732d0dSHarman Kalra handle_eal_memzone_info_request(const char *cmd __rte_unused,
1186e6732d0dSHarman Kalra 				const char *params, struct rte_tel_data *d)
1187e6732d0dSHarman Kalra {
1188e6732d0dSHarman Kalra 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1189e6732d0dSHarman Kalra 	struct rte_memseg_list *msl = NULL;
1190e6732d0dSHarman Kalra 	int ms_idx, ms_count = 0;
1191e6732d0dSHarman Kalra 	void *cur_addr, *mz_end;
1192e6732d0dSHarman Kalra 	struct rte_memzone *mz;
1193e6732d0dSHarman Kalra 	struct rte_memseg *ms;
1194e6732d0dSHarman Kalra 	char addr[ADDR_STR];
1195e6732d0dSHarman Kalra 	unsigned int mz_idx;
1196e6732d0dSHarman Kalra 	size_t page_sz;
1197e6732d0dSHarman Kalra 
1198e6732d0dSHarman Kalra 	if (params == NULL || strlen(params) == 0)
1199e6732d0dSHarman Kalra 		return -1;
1200e6732d0dSHarman Kalra 
1201e6732d0dSHarman Kalra 	mz_idx = strtoul(params, NULL, 10);
1202e6732d0dSHarman Kalra 
1203e6732d0dSHarman Kalra 	/* Get the memzone handle using index */
1204e6732d0dSHarman Kalra 	mz = rte_fbarray_get(&mcfg->memzones, mz_idx);
1205e6732d0dSHarman Kalra 
1206e6732d0dSHarman Kalra 	rte_tel_data_start_dict(d);
1207af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Zone", mz_idx);
1208e6732d0dSHarman Kalra 	rte_tel_data_add_dict_string(d, "Name", mz->name);
1209af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Length", mz->len);
1210e6732d0dSHarman Kalra 	snprintf(addr, ADDR_STR, "%p", mz->addr);
1211e6732d0dSHarman Kalra 	rte_tel_data_add_dict_string(d, "Address", addr);
1212e6732d0dSHarman Kalra 	rte_tel_data_add_dict_int(d, "Socket", mz->socket_id);
1213af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Flags", mz->flags);
1214e6732d0dSHarman Kalra 
1215e6732d0dSHarman Kalra 	/* go through each page occupied by this memzone */
1216e6732d0dSHarman Kalra 	msl = rte_mem_virt2memseg_list(mz->addr);
1217e6732d0dSHarman Kalra 	if (!msl) {
1218ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Skipping bad memzone");
1219e6732d0dSHarman Kalra 		return -1;
1220e6732d0dSHarman Kalra 	}
1221e6732d0dSHarman Kalra 	page_sz = (size_t)mz->hugepage_sz;
1222e6732d0dSHarman Kalra 	cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
1223e6732d0dSHarman Kalra 	mz_end = RTE_PTR_ADD(cur_addr, mz->len);
1224e6732d0dSHarman Kalra 
1225e6732d0dSHarman Kalra 	ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
1226e6732d0dSHarman Kalra 	ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
1227e6732d0dSHarman Kalra 
1228af0785a2SBruce Richardson 	rte_tel_data_add_dict_uint(d, "Hugepage_size", page_sz);
1229e6732d0dSHarman Kalra 	snprintf(addr, ADDR_STR, "%p", ms->addr);
1230e6732d0dSHarman Kalra 	rte_tel_data_add_dict_string(d, "Hugepage_base", addr);
1231e6732d0dSHarman Kalra 
1232e6732d0dSHarman Kalra 	do {
1233e6732d0dSHarman Kalra 		/* advance VA to next page */
1234e6732d0dSHarman Kalra 		cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
1235e6732d0dSHarman Kalra 
1236e6732d0dSHarman Kalra 		/* memzones occupy contiguous segments */
1237e6732d0dSHarman Kalra 		++ms;
1238e6732d0dSHarman Kalra 		ms_count++;
1239e6732d0dSHarman Kalra 	} while (cur_addr < mz_end);
1240e6732d0dSHarman Kalra 
1241e6732d0dSHarman Kalra 	rte_tel_data_add_dict_int(d, "Hugepage_used", ms_count);
1242e6732d0dSHarman Kalra 
1243e6732d0dSHarman Kalra 	return 0;
1244e6732d0dSHarman Kalra }
1245e6732d0dSHarman Kalra 
1246e6732d0dSHarman Kalra static void
1247e6732d0dSHarman Kalra memzone_list_cb(const struct rte_memzone *mz __rte_unused,
1248e6732d0dSHarman Kalra 		 void *arg __rte_unused)
1249e6732d0dSHarman Kalra {
1250e6732d0dSHarman Kalra 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1251e6732d0dSHarman Kalra 	struct rte_tel_data *d = arg;
1252e6732d0dSHarman Kalra 	int mz_idx;
1253e6732d0dSHarman Kalra 
1254e6732d0dSHarman Kalra 	mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
1255e6732d0dSHarman Kalra 	rte_tel_data_add_array_int(d, mz_idx);
1256e6732d0dSHarman Kalra }
1257e6732d0dSHarman Kalra 
1258e6732d0dSHarman Kalra 
1259e6732d0dSHarman Kalra /* Telemetry callback handler to list the memzones reserved. */
1260e6732d0dSHarman Kalra static int
1261e6732d0dSHarman Kalra handle_eal_memzone_list_request(const char *cmd __rte_unused,
1262e6732d0dSHarman Kalra 				const char *params __rte_unused,
1263e6732d0dSHarman Kalra 				struct rte_tel_data *d)
1264e6732d0dSHarman Kalra {
1265e6732d0dSHarman Kalra 	rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1266e6732d0dSHarman Kalra 	rte_memzone_walk(memzone_list_cb, d);
1267e6732d0dSHarman Kalra 
1268e6732d0dSHarman Kalra 	return 0;
1269e6732d0dSHarman Kalra }
1270e6732d0dSHarman Kalra 
12712054f31aSAmit Prakash Shukla /* n_vals is the number of params to be parsed. */
12722054f31aSAmit Prakash Shukla static int
12732054f31aSAmit Prakash Shukla parse_params(const char *params, uint32_t *vals, size_t n_vals)
12742054f31aSAmit Prakash Shukla {
12752054f31aSAmit Prakash Shukla 	char dlim[2] = ",";
12762054f31aSAmit Prakash Shukla 	char *params_args;
12772054f31aSAmit Prakash Shukla 	size_t count = 0;
12782054f31aSAmit Prakash Shukla 	char *token;
12792054f31aSAmit Prakash Shukla 
12802054f31aSAmit Prakash Shukla 	if (vals == NULL || params == NULL || strlen(params) == 0)
12812054f31aSAmit Prakash Shukla 		return -1;
12822054f31aSAmit Prakash Shukla 
12832054f31aSAmit Prakash Shukla 	/* strtok expects char * and param is const char *. Hence on using
12842054f31aSAmit Prakash Shukla 	 * params as "const char *" compiler throws warning.
12852054f31aSAmit Prakash Shukla 	 */
12862054f31aSAmit Prakash Shukla 	params_args = strdup(params);
12872054f31aSAmit Prakash Shukla 	if (params_args == NULL)
12882054f31aSAmit Prakash Shukla 		return -1;
12892054f31aSAmit Prakash Shukla 
12902054f31aSAmit Prakash Shukla 	token = strtok(params_args, dlim);
12912054f31aSAmit Prakash Shukla 	while (token && isdigit(*token) && count < n_vals) {
12922054f31aSAmit Prakash Shukla 		vals[count++] = strtoul(token, NULL, 10);
12932054f31aSAmit Prakash Shukla 		token = strtok(NULL, dlim);
12942054f31aSAmit Prakash Shukla 	}
12952054f31aSAmit Prakash Shukla 
12962054f31aSAmit Prakash Shukla 	free(params_args);
12972054f31aSAmit Prakash Shukla 
12982054f31aSAmit Prakash Shukla 	if (count < n_vals)
12992054f31aSAmit Prakash Shukla 		return -1;
13002054f31aSAmit Prakash Shukla 
13012054f31aSAmit Prakash Shukla 	return 0;
13022054f31aSAmit Prakash Shukla }
13032054f31aSAmit Prakash Shukla 
13042054f31aSAmit Prakash Shukla static int
13052054f31aSAmit Prakash Shukla handle_eal_memseg_lists_request(const char *cmd __rte_unused,
13062054f31aSAmit Prakash Shukla 				const char *params __rte_unused,
13072054f31aSAmit Prakash Shukla 				struct rte_tel_data *d)
13082054f31aSAmit Prakash Shukla {
13092054f31aSAmit Prakash Shukla 	struct rte_mem_config *mcfg;
13102054f31aSAmit Prakash Shukla 	int i;
13112054f31aSAmit Prakash Shukla 
13122054f31aSAmit Prakash Shukla 	rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
13132054f31aSAmit Prakash Shukla 
13142054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_lock();
13152054f31aSAmit Prakash Shukla 	mcfg = rte_eal_get_configuration()->mem_config;
13162054f31aSAmit Prakash Shukla 
13172054f31aSAmit Prakash Shukla 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
13182054f31aSAmit Prakash Shukla 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
13192054f31aSAmit Prakash Shukla 		if (msl->memseg_arr.count == 0)
13202054f31aSAmit Prakash Shukla 			continue;
13212054f31aSAmit Prakash Shukla 
13222054f31aSAmit Prakash Shukla 		rte_tel_data_add_array_int(d, i);
13232054f31aSAmit Prakash Shukla 	}
13242054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_unlock();
13252054f31aSAmit Prakash Shukla 
13262054f31aSAmit Prakash Shukla 	return 0;
13272054f31aSAmit Prakash Shukla }
13282054f31aSAmit Prakash Shukla 
13292054f31aSAmit Prakash Shukla static int
13302054f31aSAmit Prakash Shukla handle_eal_memseg_list_info_request(const char *cmd __rte_unused,
13312054f31aSAmit Prakash Shukla 				    const char *params, struct rte_tel_data *d)
13322054f31aSAmit Prakash Shukla {
13332054f31aSAmit Prakash Shukla 	struct rte_mem_config *mcfg;
13342054f31aSAmit Prakash Shukla 	struct rte_memseg_list *msl;
13352054f31aSAmit Prakash Shukla 	struct rte_fbarray *arr;
13362054f31aSAmit Prakash Shukla 	uint32_t ms_list_idx;
13372054f31aSAmit Prakash Shukla 	int ms_idx;
13382054f31aSAmit Prakash Shukla 	/* size of an array == num params to be parsed. */
13392054f31aSAmit Prakash Shukla 	uint32_t vals[1] = {0};
13402054f31aSAmit Prakash Shukla 
13412054f31aSAmit Prakash Shukla 	if (parse_params(params, vals, RTE_DIM(vals)) < 0)
13422054f31aSAmit Prakash Shukla 		return -1;
13432054f31aSAmit Prakash Shukla 
13442054f31aSAmit Prakash Shukla 	ms_list_idx = vals[0];
13452054f31aSAmit Prakash Shukla 	if (ms_list_idx >= RTE_MAX_MEMSEG_LISTS)
13462054f31aSAmit Prakash Shukla 		return -1;
13472054f31aSAmit Prakash Shukla 
13482054f31aSAmit Prakash Shukla 	rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
13492054f31aSAmit Prakash Shukla 
13502054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_lock();
13512054f31aSAmit Prakash Shukla 	mcfg = rte_eal_get_configuration()->mem_config;
13522054f31aSAmit Prakash Shukla 	msl = &mcfg->memsegs[ms_list_idx];
13532054f31aSAmit Prakash Shukla 	if (msl->memseg_arr.count == 0)
13542054f31aSAmit Prakash Shukla 		goto done;
13552054f31aSAmit Prakash Shukla 
13562054f31aSAmit Prakash Shukla 	arr = &msl->memseg_arr;
13572054f31aSAmit Prakash Shukla 
13582054f31aSAmit Prakash Shukla 	ms_idx = rte_fbarray_find_next_used(arr, 0);
13592054f31aSAmit Prakash Shukla 	while (ms_idx >= 0) {
13602054f31aSAmit Prakash Shukla 		rte_tel_data_add_array_int(d, ms_idx);
13612054f31aSAmit Prakash Shukla 		ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
13622054f31aSAmit Prakash Shukla 	}
13632054f31aSAmit Prakash Shukla 
13642054f31aSAmit Prakash Shukla done:
13652054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_unlock();
13662054f31aSAmit Prakash Shukla 
13672054f31aSAmit Prakash Shukla 	return 0;
13682054f31aSAmit Prakash Shukla }
13692054f31aSAmit Prakash Shukla 
13702054f31aSAmit Prakash Shukla static int
13712054f31aSAmit Prakash Shukla handle_eal_memseg_info_request(const char *cmd __rte_unused,
13722054f31aSAmit Prakash Shukla 			       const char *params, struct rte_tel_data *d)
13732054f31aSAmit Prakash Shukla {
13742054f31aSAmit Prakash Shukla 	struct rte_mem_config *mcfg;
13752054f31aSAmit Prakash Shukla 	uint64_t ms_start_addr, ms_end_addr, ms_size, hugepage_size, ms_iova;
13762054f31aSAmit Prakash Shukla 	struct rte_memseg_list *msl;
13772054f31aSAmit Prakash Shukla 	const struct rte_memseg *ms;
13782054f31aSAmit Prakash Shukla 	struct rte_fbarray *arr;
13792054f31aSAmit Prakash Shukla 	char addr[ADDR_STR];
13802054f31aSAmit Prakash Shukla 	uint32_t ms_list_idx = 0;
13812054f31aSAmit Prakash Shukla 	uint32_t ms_idx = 0;
13822054f31aSAmit Prakash Shukla 	int32_t ms_socket_id;
13832054f31aSAmit Prakash Shukla 	uint32_t ms_flags;
13842054f31aSAmit Prakash Shukla 	/* size of an array == num params to be parsed. */
13852054f31aSAmit Prakash Shukla 	uint32_t vals[2] = {0};
13862054f31aSAmit Prakash Shukla 
13872054f31aSAmit Prakash Shukla 	if (parse_params(params, vals, RTE_DIM(vals)) < 0)
13882054f31aSAmit Prakash Shukla 		return -1;
13892054f31aSAmit Prakash Shukla 
13902054f31aSAmit Prakash Shukla 	ms_list_idx = vals[0];
13912054f31aSAmit Prakash Shukla 	if (ms_list_idx >= RTE_MAX_MEMSEG_LISTS)
13922054f31aSAmit Prakash Shukla 		return -1;
13932054f31aSAmit Prakash Shukla 
13942054f31aSAmit Prakash Shukla 	ms_idx = vals[1];
13952054f31aSAmit Prakash Shukla 
13962054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_lock();
13972054f31aSAmit Prakash Shukla 
13982054f31aSAmit Prakash Shukla 	mcfg = rte_eal_get_configuration()->mem_config;
13992054f31aSAmit Prakash Shukla 	msl = &mcfg->memsegs[ms_list_idx];
14002054f31aSAmit Prakash Shukla 	if (msl->memseg_arr.count == 0) {
14012054f31aSAmit Prakash Shukla 		rte_mcfg_mem_read_unlock();
14022054f31aSAmit Prakash Shukla 		return -1;
14032054f31aSAmit Prakash Shukla 	}
14042054f31aSAmit Prakash Shukla 
14052054f31aSAmit Prakash Shukla 	arr = &msl->memseg_arr;
14062054f31aSAmit Prakash Shukla 	ms = rte_fbarray_get(arr, ms_idx);
14072054f31aSAmit Prakash Shukla 	if (ms == NULL) {
14082054f31aSAmit Prakash Shukla 		rte_mcfg_mem_read_unlock();
1409ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Error fetching requested memseg.");
14102054f31aSAmit Prakash Shukla 		return -1;
14112054f31aSAmit Prakash Shukla 	}
14122054f31aSAmit Prakash Shukla 
14132054f31aSAmit Prakash Shukla 	ms_iova = ms->iova;
14142054f31aSAmit Prakash Shukla 	ms_start_addr = ms->addr_64;
14152054f31aSAmit Prakash Shukla 	ms_end_addr = (uint64_t)RTE_PTR_ADD(ms_start_addr, ms->len);
14162054f31aSAmit Prakash Shukla 	ms_size = ms->len;
14172054f31aSAmit Prakash Shukla 	hugepage_size = ms->hugepage_sz;
14182054f31aSAmit Prakash Shukla 	ms_socket_id = ms->socket_id;
14192054f31aSAmit Prakash Shukla 	ms_flags = ms->flags;
14202054f31aSAmit Prakash Shukla 
14212054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_unlock();
14222054f31aSAmit Prakash Shukla 
14232054f31aSAmit Prakash Shukla 	rte_tel_data_start_dict(d);
14242054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "Memseg_list_index", ms_list_idx);
14252054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "Memseg_index", ms_idx);
14262054f31aSAmit Prakash Shukla 	if (ms_iova == RTE_BAD_IOVA)
14272054f31aSAmit Prakash Shukla 		snprintf(addr, ADDR_STR, "Bad IOVA");
14282054f31aSAmit Prakash Shukla 	else
14292054f31aSAmit Prakash Shukla 		snprintf(addr, ADDR_STR, "0x%"PRIx64, ms_iova);
14302054f31aSAmit Prakash Shukla 
14312054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_string(d, "IOVA_addr", addr);
14322054f31aSAmit Prakash Shukla 	snprintf(addr, ADDR_STR, "0x%"PRIx64, ms_start_addr);
14332054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_string(d, "Start_addr", addr);
14342054f31aSAmit Prakash Shukla 	snprintf(addr, ADDR_STR, "0x%"PRIx64, ms_end_addr);
14352054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_string(d, "End_addr", addr);
14362054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_uint(d, "Size", ms_size);
14372054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_uint(d, "Hugepage_size", hugepage_size);
14382054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "Socket_id", ms_socket_id);
14392054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "flags", ms_flags);
14402054f31aSAmit Prakash Shukla 
14412054f31aSAmit Prakash Shukla 	return 0;
14422054f31aSAmit Prakash Shukla }
14432054f31aSAmit Prakash Shukla 
14442054f31aSAmit Prakash Shukla static int
14452054f31aSAmit Prakash Shukla handle_eal_element_list_request(const char *cmd __rte_unused,
14462054f31aSAmit Prakash Shukla 				const char *params, struct rte_tel_data *d)
14472054f31aSAmit Prakash Shukla {
14482054f31aSAmit Prakash Shukla 	struct rte_mem_config *mcfg;
14492054f31aSAmit Prakash Shukla 	struct rte_memseg_list *msl;
14502054f31aSAmit Prakash Shukla 	const struct rte_memseg *ms;
14512054f31aSAmit Prakash Shukla 	struct malloc_elem *elem;
14522054f31aSAmit Prakash Shukla 	struct malloc_heap *heap;
14532054f31aSAmit Prakash Shukla 	uint64_t ms_start_addr, ms_end_addr;
14542054f31aSAmit Prakash Shukla 	uint64_t elem_start_addr, elem_end_addr;
14552054f31aSAmit Prakash Shukla 	uint32_t ms_list_idx = 0;
14562054f31aSAmit Prakash Shukla 	uint32_t heap_id = 0;
14572054f31aSAmit Prakash Shukla 	uint32_t ms_idx = 0;
14582054f31aSAmit Prakash Shukla 	int elem_count = 0;
14592054f31aSAmit Prakash Shukla 	/* size of an array == num params to be parsed. */
14602054f31aSAmit Prakash Shukla 	uint32_t vals[3] = {0};
14612054f31aSAmit Prakash Shukla 
14622054f31aSAmit Prakash Shukla 	if (parse_params(params, vals, RTE_DIM(vals)) < 0)
14632054f31aSAmit Prakash Shukla 		return -1;
14642054f31aSAmit Prakash Shukla 
14652054f31aSAmit Prakash Shukla 	heap_id = vals[0];
14662054f31aSAmit Prakash Shukla 	if (heap_id >= RTE_MAX_HEAPS)
14672054f31aSAmit Prakash Shukla 		return -1;
14682054f31aSAmit Prakash Shukla 
14692054f31aSAmit Prakash Shukla 	ms_list_idx = vals[1];
14702054f31aSAmit Prakash Shukla 	if (ms_list_idx >= RTE_MAX_MEMSEG_LISTS)
14712054f31aSAmit Prakash Shukla 		return -1;
14722054f31aSAmit Prakash Shukla 
14732054f31aSAmit Prakash Shukla 	ms_idx = vals[2];
14742054f31aSAmit Prakash Shukla 
14752054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_lock();
14762054f31aSAmit Prakash Shukla 
14772054f31aSAmit Prakash Shukla 	mcfg = rte_eal_get_configuration()->mem_config;
14782054f31aSAmit Prakash Shukla 	msl = &mcfg->memsegs[ms_list_idx];
14792054f31aSAmit Prakash Shukla 	ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
14802054f31aSAmit Prakash Shukla 	if (ms == NULL) {
14812054f31aSAmit Prakash Shukla 		rte_mcfg_mem_read_unlock();
1482ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Error fetching requested memseg.");
14832054f31aSAmit Prakash Shukla 		return -1;
14842054f31aSAmit Prakash Shukla 	}
14852054f31aSAmit Prakash Shukla 
14862054f31aSAmit Prakash Shukla 	ms_start_addr = ms->addr_64;
14872054f31aSAmit Prakash Shukla 	ms_end_addr = (uint64_t)RTE_PTR_ADD(ms_start_addr, ms->len);
14882054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_unlock();
14892054f31aSAmit Prakash Shukla 
14902054f31aSAmit Prakash Shukla 	rte_tel_data_start_dict(d);
14912054f31aSAmit Prakash Shukla 
14922054f31aSAmit Prakash Shukla 	heap = &mcfg->malloc_heaps[heap_id];
14932054f31aSAmit Prakash Shukla 	rte_spinlock_lock(&heap->lock);
14942054f31aSAmit Prakash Shukla 
14952054f31aSAmit Prakash Shukla 	elem = heap->first;
14962054f31aSAmit Prakash Shukla 	while (elem) {
14972054f31aSAmit Prakash Shukla 		elem_start_addr = (uint64_t)elem;
14982054f31aSAmit Prakash Shukla 		elem_end_addr =
14992054f31aSAmit Prakash Shukla 			(uint64_t)RTE_PTR_ADD(elem_start_addr, elem->size);
15002054f31aSAmit Prakash Shukla 
15012054f31aSAmit Prakash Shukla 		if ((uint64_t)elem_start_addr >= ms_start_addr &&
15022054f31aSAmit Prakash Shukla 		    (uint64_t)elem_end_addr <= ms_end_addr)
15032054f31aSAmit Prakash Shukla 			elem_count++;
15042054f31aSAmit Prakash Shukla 		elem = elem->next;
15052054f31aSAmit Prakash Shukla 	}
15062054f31aSAmit Prakash Shukla 
15072054f31aSAmit Prakash Shukla 	rte_spinlock_unlock(&heap->lock);
15082054f31aSAmit Prakash Shukla 
15092054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "Element_count", elem_count);
15102054f31aSAmit Prakash Shukla 
15112054f31aSAmit Prakash Shukla 	return 0;
15122054f31aSAmit Prakash Shukla }
15132054f31aSAmit Prakash Shukla 
15142054f31aSAmit Prakash Shukla static int
15152054f31aSAmit Prakash Shukla handle_eal_element_info_request(const char *cmd __rte_unused,
15162054f31aSAmit Prakash Shukla 				const char *params, struct rte_tel_data *d)
15172054f31aSAmit Prakash Shukla {
15182054f31aSAmit Prakash Shukla 	struct rte_mem_config *mcfg;
15192054f31aSAmit Prakash Shukla 	struct rte_memseg_list *msl;
15202054f31aSAmit Prakash Shukla 	const struct rte_memseg *ms;
15212054f31aSAmit Prakash Shukla 	struct malloc_elem *elem;
15222054f31aSAmit Prakash Shukla 	struct malloc_heap *heap;
15232054f31aSAmit Prakash Shukla 	struct rte_tel_data *c;
15242054f31aSAmit Prakash Shukla 	uint64_t ms_start_addr, ms_end_addr;
15252054f31aSAmit Prakash Shukla 	uint64_t elem_start_addr, elem_end_addr;
15262054f31aSAmit Prakash Shukla 	uint32_t ms_list_idx = 0;
15272054f31aSAmit Prakash Shukla 	uint32_t heap_id = 0;
15282054f31aSAmit Prakash Shukla 	uint32_t ms_idx = 0;
15292054f31aSAmit Prakash Shukla 	uint32_t start_elem = 0, end_elem = 0;
15302054f31aSAmit Prakash Shukla 	uint32_t count = 0, elem_count = 0;
15312054f31aSAmit Prakash Shukla 	char str[ADDR_STR];
15322054f31aSAmit Prakash Shukla 	/* size of an array == num params to be parsed. */
15332054f31aSAmit Prakash Shukla 	uint32_t vals[5] = {0};
15342054f31aSAmit Prakash Shukla 
15352054f31aSAmit Prakash Shukla 	if (parse_params(params, vals, RTE_DIM(vals)) < 0)
15362054f31aSAmit Prakash Shukla 		return -1;
15372054f31aSAmit Prakash Shukla 
15382054f31aSAmit Prakash Shukla 	heap_id = vals[0];
15392054f31aSAmit Prakash Shukla 	if (heap_id >= RTE_MAX_HEAPS)
15402054f31aSAmit Prakash Shukla 		return -1;
15412054f31aSAmit Prakash Shukla 
15422054f31aSAmit Prakash Shukla 	ms_list_idx = vals[1];
15432054f31aSAmit Prakash Shukla 	if (ms_list_idx >= RTE_MAX_MEMSEG_LISTS)
15442054f31aSAmit Prakash Shukla 		return -1;
15452054f31aSAmit Prakash Shukla 
15462054f31aSAmit Prakash Shukla 	ms_idx = vals[2];
15472054f31aSAmit Prakash Shukla 	start_elem = vals[3];
15482054f31aSAmit Prakash Shukla 	end_elem = vals[4];
15492054f31aSAmit Prakash Shukla 
15502054f31aSAmit Prakash Shukla 	if (end_elem < start_elem)
15512054f31aSAmit Prakash Shukla 		return -1;
15522054f31aSAmit Prakash Shukla 
15532054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_lock();
15542054f31aSAmit Prakash Shukla 
15552054f31aSAmit Prakash Shukla 	mcfg = rte_eal_get_configuration()->mem_config;
15562054f31aSAmit Prakash Shukla 	msl = &mcfg->memsegs[ms_list_idx];
15572054f31aSAmit Prakash Shukla 	ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
15582054f31aSAmit Prakash Shukla 	if (ms == NULL) {
15592054f31aSAmit Prakash Shukla 		rte_mcfg_mem_read_unlock();
1560ae67895bSDavid Marchand 		EAL_LOG(DEBUG, "Error fetching requested memseg.");
15612054f31aSAmit Prakash Shukla 		return -1;
15622054f31aSAmit Prakash Shukla 	}
15632054f31aSAmit Prakash Shukla 
15642054f31aSAmit Prakash Shukla 	ms_start_addr = ms->addr_64;
15652054f31aSAmit Prakash Shukla 	ms_end_addr = (uint64_t)RTE_PTR_ADD(ms_start_addr, ms->len);
15662054f31aSAmit Prakash Shukla 
15672054f31aSAmit Prakash Shukla 	rte_mcfg_mem_read_unlock();
15682054f31aSAmit Prakash Shukla 
15692054f31aSAmit Prakash Shukla 	rte_tel_data_start_dict(d);
15702054f31aSAmit Prakash Shukla 
15712054f31aSAmit Prakash Shukla 	heap = &mcfg->malloc_heaps[heap_id];
15722054f31aSAmit Prakash Shukla 	rte_spinlock_lock(&heap->lock);
15732054f31aSAmit Prakash Shukla 
15742054f31aSAmit Prakash Shukla 	elem = heap->first;
15752054f31aSAmit Prakash Shukla 	while (elem) {
15762054f31aSAmit Prakash Shukla 		elem_start_addr = (uint64_t)elem;
15772054f31aSAmit Prakash Shukla 		elem_end_addr =
15782054f31aSAmit Prakash Shukla 			(uint64_t)RTE_PTR_ADD(elem_start_addr, elem->size);
15792054f31aSAmit Prakash Shukla 
15802054f31aSAmit Prakash Shukla 		if (elem_start_addr < ms_start_addr ||
15812054f31aSAmit Prakash Shukla 				elem_end_addr > ms_end_addr) {
15822054f31aSAmit Prakash Shukla 			elem = elem->next;
15832054f31aSAmit Prakash Shukla 			continue;
15842054f31aSAmit Prakash Shukla 		}
15852054f31aSAmit Prakash Shukla 
15862054f31aSAmit Prakash Shukla 		if (count < start_elem) {
15872054f31aSAmit Prakash Shukla 			elem = elem->next;
15882054f31aSAmit Prakash Shukla 			count++;
15892054f31aSAmit Prakash Shukla 			continue;
15902054f31aSAmit Prakash Shukla 		}
15912054f31aSAmit Prakash Shukla 
15922054f31aSAmit Prakash Shukla 		c = rte_tel_data_alloc();
15932054f31aSAmit Prakash Shukla 		if (c == NULL)
15942054f31aSAmit Prakash Shukla 			break;
15952054f31aSAmit Prakash Shukla 
15962054f31aSAmit Prakash Shukla 		rte_tel_data_start_dict(c);
15972054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_int(c, "msl_id", ms_list_idx);
15982054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_int(c, "ms_id", ms_idx);
15992054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "0x%"PRIx64, ms_start_addr);
16002054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_string(c, "memseg_start_addr", str);
16012054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "0x%"PRIx64, ms_end_addr);
16022054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_string(c, "memseg_end_addr", str);
16032054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "0x%"PRIx64, elem_start_addr);
16042054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_string(c, "element_start_addr", str);
16052054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "0x%"PRIx64, elem_end_addr);
16062054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_string(c, "element_end_addr", str);
16072054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_int(c, "element_size", elem->size);
16082054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "%s", elem->state == 0 ? "Free" :
16092054f31aSAmit Prakash Shukla 			 elem->state == 1 ? "Busy" : elem->state == 2 ?
16102054f31aSAmit Prakash Shukla 			 "Pad" : "Error");
16112054f31aSAmit Prakash Shukla 		rte_tel_data_add_dict_string(c, "element_state", str);
16122054f31aSAmit Prakash Shukla 
16132054f31aSAmit Prakash Shukla 		snprintf(str, ADDR_STR, "%s_%u", "element", count);
16142054f31aSAmit Prakash Shukla 		if (rte_tel_data_add_dict_container(d, str, c, 0) != 0) {
16152054f31aSAmit Prakash Shukla 			rte_tel_data_free(c);
16162054f31aSAmit Prakash Shukla 			break;
16172054f31aSAmit Prakash Shukla 		}
16182054f31aSAmit Prakash Shukla 
16192054f31aSAmit Prakash Shukla 		elem_count++;
16202054f31aSAmit Prakash Shukla 		count++;
16212054f31aSAmit Prakash Shukla 		if (count > end_elem)
16222054f31aSAmit Prakash Shukla 			break;
16232054f31aSAmit Prakash Shukla 
16242054f31aSAmit Prakash Shukla 		elem = elem->next;
16252054f31aSAmit Prakash Shukla 	}
16262054f31aSAmit Prakash Shukla 
16272054f31aSAmit Prakash Shukla 	rte_spinlock_unlock(&heap->lock);
16282054f31aSAmit Prakash Shukla 
16292054f31aSAmit Prakash Shukla 	rte_tel_data_add_dict_int(d, "Element_count", elem_count);
16302054f31aSAmit Prakash Shukla 
16312054f31aSAmit Prakash Shukla 	return 0;
16322054f31aSAmit Prakash Shukla }
16332054f31aSAmit Prakash Shukla 
1634e6732d0dSHarman Kalra RTE_INIT(memory_telemetry)
1635e6732d0dSHarman Kalra {
1636e6732d0dSHarman Kalra 	rte_telemetry_register_cmd(
1637e6732d0dSHarman Kalra 			EAL_MEMZONE_LIST_REQ, handle_eal_memzone_list_request,
1638e6732d0dSHarman Kalra 			"List of memzone index reserved. Takes no parameters");
1639e6732d0dSHarman Kalra 	rte_telemetry_register_cmd(
1640e6732d0dSHarman Kalra 			EAL_MEMZONE_INFO_REQ, handle_eal_memzone_info_request,
1641e6732d0dSHarman Kalra 			"Returns memzone info. Parameters: int mz_id");
1642e6732d0dSHarman Kalra 	rte_telemetry_register_cmd(
1643e6732d0dSHarman Kalra 			EAL_HEAP_LIST_REQ, handle_eal_heap_list_request,
1644e6732d0dSHarman Kalra 			"List of heap index setup. Takes no parameters");
1645e6732d0dSHarman Kalra 	rte_telemetry_register_cmd(
1646e6732d0dSHarman Kalra 			EAL_HEAP_INFO_REQ, handle_eal_heap_info_request,
1647e6732d0dSHarman Kalra 			"Returns malloc heap stats. Parameters: int heap_id");
16482054f31aSAmit Prakash Shukla 	rte_telemetry_register_cmd(
16492054f31aSAmit Prakash Shukla 			EAL_MEMSEG_LISTS_REQ,
16502054f31aSAmit Prakash Shukla 			handle_eal_memseg_lists_request,
16512054f31aSAmit Prakash Shukla 			"Returns array of memseg list IDs. Takes no parameters");
16522054f31aSAmit Prakash Shukla 	rte_telemetry_register_cmd(
16532054f31aSAmit Prakash Shukla 			EAL_MEMSEG_LIST_INFO_REQ,
16542054f31aSAmit Prakash Shukla 			handle_eal_memseg_list_info_request,
16552054f31aSAmit Prakash Shukla 			"Returns memseg list info. Parameters: int memseg_list_id");
16562054f31aSAmit Prakash Shukla 	rte_telemetry_register_cmd(
16572054f31aSAmit Prakash Shukla 			EAL_MEMSEG_INFO_REQ, handle_eal_memseg_info_request,
16582054f31aSAmit Prakash Shukla 			"Returns memseg info. Parameter: int memseg_list_id,int memseg_id");
16592054f31aSAmit Prakash Shukla 	rte_telemetry_register_cmd(EAL_ELEMENT_LIST_REQ,
16602054f31aSAmit Prakash Shukla 			handle_eal_element_list_request,
16612054f31aSAmit Prakash Shukla 			"Returns array of heap element IDs. Parameters: int heap_id, int memseg_list_id, int memseg_id");
16622054f31aSAmit Prakash Shukla 	rte_telemetry_register_cmd(EAL_ELEMENT_INFO_REQ,
16632054f31aSAmit Prakash Shukla 			handle_eal_element_info_request,
16642054f31aSAmit Prakash Shukla 			"Returns element info. Parameters: int heap_id, int memseg_list_id, int memseg_id, int start_elem_id, int end_elem_id");
1665e6732d0dSHarman Kalra }
1666e6732d0dSHarman Kalra #endif
1667