199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation.
399a2dd95SBruce Richardson * Copyright(c) 2013 6WIND S.A.
499a2dd95SBruce Richardson */
599a2dd95SBruce Richardson
699a2dd95SBruce Richardson #include <inttypes.h>
772b452c5SDmitry Kozlyuk #include <stdlib.h>
899a2dd95SBruce Richardson #include <string.h>
999a2dd95SBruce Richardson
1099a2dd95SBruce Richardson #include <rte_log.h>
1199a2dd95SBruce Richardson #include <rte_string_fns.h>
1299a2dd95SBruce Richardson
1399a2dd95SBruce Richardson #include "eal_internal_cfg.h"
1499a2dd95SBruce Richardson #include "eal_memalloc.h"
1599a2dd95SBruce Richardson #include "eal_memcfg.h"
1699a2dd95SBruce Richardson #include "eal_private.h"
1799a2dd95SBruce Richardson
1899a2dd95SBruce Richardson /** @file Functions common to EALs that support dynamic memory allocation. */
1999a2dd95SBruce Richardson
2099a2dd95SBruce Richardson int
eal_dynmem_memseg_lists_init(void)2199a2dd95SBruce Richardson eal_dynmem_memseg_lists_init(void)
2299a2dd95SBruce Richardson {
2399a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
2499a2dd95SBruce Richardson struct memtype {
2599a2dd95SBruce Richardson uint64_t page_sz;
2699a2dd95SBruce Richardson int socket_id;
2799a2dd95SBruce Richardson } *memtypes = NULL;
2899a2dd95SBruce Richardson int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
2999a2dd95SBruce Richardson struct rte_memseg_list *msl;
3099a2dd95SBruce Richardson uint64_t max_mem, max_mem_per_type;
3199a2dd95SBruce Richardson unsigned int max_seglists_per_type;
3299a2dd95SBruce Richardson unsigned int n_memtypes, cur_type;
3399a2dd95SBruce Richardson struct internal_config *internal_conf =
3499a2dd95SBruce Richardson eal_get_internal_configuration();
3599a2dd95SBruce Richardson
3699a2dd95SBruce Richardson /* no-huge does not need this at all */
3799a2dd95SBruce Richardson if (internal_conf->no_hugetlbfs)
3899a2dd95SBruce Richardson return 0;
3999a2dd95SBruce Richardson
4099a2dd95SBruce Richardson /*
4199a2dd95SBruce Richardson * figuring out amount of memory we're going to have is a long and very
4299a2dd95SBruce Richardson * involved process. the basic element we're operating with is a memory
4399a2dd95SBruce Richardson * type, defined as a combination of NUMA node ID and page size (so that
4499a2dd95SBruce Richardson * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
4599a2dd95SBruce Richardson *
4699a2dd95SBruce Richardson * deciding amount of memory going towards each memory type is a
4799a2dd95SBruce Richardson * balancing act between maximum segments per type, maximum memory per
4899a2dd95SBruce Richardson * type, and number of detected NUMA nodes. the goal is to make sure
4999a2dd95SBruce Richardson * each memory type gets at least one memseg list.
5099a2dd95SBruce Richardson *
5199a2dd95SBruce Richardson * the total amount of memory is limited by RTE_MAX_MEM_MB value.
5299a2dd95SBruce Richardson *
5399a2dd95SBruce Richardson * the total amount of memory per type is limited by either
5499a2dd95SBruce Richardson * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
5599a2dd95SBruce Richardson * of detected NUMA nodes. additionally, maximum number of segments per
5699a2dd95SBruce Richardson * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
5799a2dd95SBruce Richardson * smaller page sizes, it can take hundreds of thousands of segments to
5899a2dd95SBruce Richardson * reach the above specified per-type memory limits.
5999a2dd95SBruce Richardson *
6099a2dd95SBruce Richardson * additionally, each type may have multiple memseg lists associated
6199a2dd95SBruce Richardson * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
6299a2dd95SBruce Richardson * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
6399a2dd95SBruce Richardson *
6499a2dd95SBruce Richardson * the number of memseg lists per type is decided based on the above
6599a2dd95SBruce Richardson * limits, and also taking number of detected NUMA nodes, to make sure
6699a2dd95SBruce Richardson * that we don't run out of memseg lists before we populate all NUMA
6799a2dd95SBruce Richardson * nodes with memory.
6899a2dd95SBruce Richardson *
6999a2dd95SBruce Richardson * we do this in three stages. first, we collect the number of types.
7099a2dd95SBruce Richardson * then, we figure out memory constraints and populate the list of
7199a2dd95SBruce Richardson * would-be memseg lists. then, we go ahead and allocate the memseg
7299a2dd95SBruce Richardson * lists.
7399a2dd95SBruce Richardson */
7499a2dd95SBruce Richardson
7599a2dd95SBruce Richardson /* create space for mem types */
7699a2dd95SBruce Richardson n_memtypes = internal_conf->num_hugepage_sizes * rte_socket_count();
7799a2dd95SBruce Richardson memtypes = calloc(n_memtypes, sizeof(*memtypes));
7899a2dd95SBruce Richardson if (memtypes == NULL) {
79*ae67895bSDavid Marchand EAL_LOG(ERR, "Cannot allocate space for memory types");
8099a2dd95SBruce Richardson return -1;
8199a2dd95SBruce Richardson }
8299a2dd95SBruce Richardson
8399a2dd95SBruce Richardson /* populate mem types */
8499a2dd95SBruce Richardson cur_type = 0;
8599a2dd95SBruce Richardson for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
8699a2dd95SBruce Richardson hpi_idx++) {
8799a2dd95SBruce Richardson struct hugepage_info *hpi;
8899a2dd95SBruce Richardson uint64_t hugepage_sz;
8999a2dd95SBruce Richardson
9099a2dd95SBruce Richardson hpi = &internal_conf->hugepage_info[hpi_idx];
9199a2dd95SBruce Richardson hugepage_sz = hpi->hugepage_sz;
9299a2dd95SBruce Richardson
9399a2dd95SBruce Richardson for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
9499a2dd95SBruce Richardson int socket_id = rte_socket_id_by_idx(i);
9599a2dd95SBruce Richardson
9699a2dd95SBruce Richardson #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
9799a2dd95SBruce Richardson /* we can still sort pages by socket in legacy mode */
9899a2dd95SBruce Richardson if (!internal_conf->legacy_mem && socket_id > 0)
9999a2dd95SBruce Richardson break;
10099a2dd95SBruce Richardson #endif
10199a2dd95SBruce Richardson memtypes[cur_type].page_sz = hugepage_sz;
10299a2dd95SBruce Richardson memtypes[cur_type].socket_id = socket_id;
10399a2dd95SBruce Richardson
104*ae67895bSDavid Marchand EAL_LOG(DEBUG, "Detected memory type: "
105*ae67895bSDavid Marchand "socket_id:%u hugepage_sz:%" PRIu64,
10699a2dd95SBruce Richardson socket_id, hugepage_sz);
10799a2dd95SBruce Richardson }
10899a2dd95SBruce Richardson }
10999a2dd95SBruce Richardson /* number of memtypes could have been lower due to no NUMA support */
11099a2dd95SBruce Richardson n_memtypes = cur_type;
11199a2dd95SBruce Richardson
11299a2dd95SBruce Richardson /* set up limits for types */
11399a2dd95SBruce Richardson max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
11499a2dd95SBruce Richardson max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
11599a2dd95SBruce Richardson max_mem / n_memtypes);
11699a2dd95SBruce Richardson /*
11799a2dd95SBruce Richardson * limit maximum number of segment lists per type to ensure there's
11899a2dd95SBruce Richardson * space for memseg lists for all NUMA nodes with all page sizes
11999a2dd95SBruce Richardson */
12099a2dd95SBruce Richardson max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
12199a2dd95SBruce Richardson
12299a2dd95SBruce Richardson if (max_seglists_per_type == 0) {
123*ae67895bSDavid Marchand EAL_LOG(ERR, "Cannot accommodate all memory types, please increase RTE_MAX_MEMSEG_LISTS");
12499a2dd95SBruce Richardson goto out;
12599a2dd95SBruce Richardson }
12699a2dd95SBruce Richardson
12799a2dd95SBruce Richardson /* go through all mem types and create segment lists */
12899a2dd95SBruce Richardson msl_idx = 0;
12999a2dd95SBruce Richardson for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
13099a2dd95SBruce Richardson unsigned int cur_seglist, n_seglists, n_segs;
13199a2dd95SBruce Richardson unsigned int max_segs_per_type, max_segs_per_list;
13299a2dd95SBruce Richardson struct memtype *type = &memtypes[cur_type];
13399a2dd95SBruce Richardson uint64_t max_mem_per_list, pagesz;
13499a2dd95SBruce Richardson int socket_id;
13599a2dd95SBruce Richardson
13699a2dd95SBruce Richardson pagesz = type->page_sz;
13799a2dd95SBruce Richardson socket_id = type->socket_id;
13899a2dd95SBruce Richardson
13999a2dd95SBruce Richardson /*
14099a2dd95SBruce Richardson * we need to create segment lists for this type. we must take
14199a2dd95SBruce Richardson * into account the following things:
14299a2dd95SBruce Richardson *
14399a2dd95SBruce Richardson * 1. total amount of memory we can use for this memory type
14499a2dd95SBruce Richardson * 2. total amount of memory per memseg list allowed
14599a2dd95SBruce Richardson * 3. number of segments needed to fit the amount of memory
14699a2dd95SBruce Richardson * 4. number of segments allowed per type
14799a2dd95SBruce Richardson * 5. number of segments allowed per memseg list
14899a2dd95SBruce Richardson * 6. number of memseg lists we are allowed to take up
14999a2dd95SBruce Richardson */
15099a2dd95SBruce Richardson
15199a2dd95SBruce Richardson /* calculate how much segments we will need in total */
15299a2dd95SBruce Richardson max_segs_per_type = max_mem_per_type / pagesz;
15399a2dd95SBruce Richardson /* limit number of segments to maximum allowed per type */
15499a2dd95SBruce Richardson max_segs_per_type = RTE_MIN(max_segs_per_type,
15599a2dd95SBruce Richardson (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
15699a2dd95SBruce Richardson /* limit number of segments to maximum allowed per list */
15799a2dd95SBruce Richardson max_segs_per_list = RTE_MIN(max_segs_per_type,
15899a2dd95SBruce Richardson (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
15999a2dd95SBruce Richardson
16099a2dd95SBruce Richardson /* calculate how much memory we can have per segment list */
16199a2dd95SBruce Richardson max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
16299a2dd95SBruce Richardson (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
16399a2dd95SBruce Richardson
16499a2dd95SBruce Richardson /* calculate how many segments each segment list will have */
16599a2dd95SBruce Richardson n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
16699a2dd95SBruce Richardson
16799a2dd95SBruce Richardson /* calculate how many segment lists we can have */
16899a2dd95SBruce Richardson n_seglists = RTE_MIN(max_segs_per_type / n_segs,
16999a2dd95SBruce Richardson max_mem_per_type / max_mem_per_list);
17099a2dd95SBruce Richardson
17199a2dd95SBruce Richardson /* limit number of segment lists according to our maximum */
17299a2dd95SBruce Richardson n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
17399a2dd95SBruce Richardson
174*ae67895bSDavid Marchand EAL_LOG(DEBUG, "Creating %i segment lists: "
175*ae67895bSDavid Marchand "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64,
17699a2dd95SBruce Richardson n_seglists, n_segs, socket_id, pagesz);
17799a2dd95SBruce Richardson
17899a2dd95SBruce Richardson /* create all segment lists */
17999a2dd95SBruce Richardson for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
18099a2dd95SBruce Richardson if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
181*ae67895bSDavid Marchand EAL_LOG(ERR,
182*ae67895bSDavid Marchand "No more space in memseg lists, please increase RTE_MAX_MEMSEG_LISTS");
18399a2dd95SBruce Richardson goto out;
18499a2dd95SBruce Richardson }
18599a2dd95SBruce Richardson msl = &mcfg->memsegs[msl_idx++];
18699a2dd95SBruce Richardson
18799a2dd95SBruce Richardson if (eal_memseg_list_init(msl, pagesz, n_segs,
18899a2dd95SBruce Richardson socket_id, cur_seglist, true))
18999a2dd95SBruce Richardson goto out;
19099a2dd95SBruce Richardson
19199a2dd95SBruce Richardson if (eal_memseg_list_alloc(msl, 0)) {
192*ae67895bSDavid Marchand EAL_LOG(ERR, "Cannot allocate VA space for memseg list");
19399a2dd95SBruce Richardson goto out;
19499a2dd95SBruce Richardson }
19599a2dd95SBruce Richardson }
19699a2dd95SBruce Richardson }
19799a2dd95SBruce Richardson /* we're successful */
19899a2dd95SBruce Richardson ret = 0;
19999a2dd95SBruce Richardson out:
20099a2dd95SBruce Richardson free(memtypes);
20199a2dd95SBruce Richardson return ret;
20299a2dd95SBruce Richardson }
20399a2dd95SBruce Richardson
20499a2dd95SBruce Richardson static int __rte_unused
hugepage_count_walk(const struct rte_memseg_list * msl,void * arg)20599a2dd95SBruce Richardson hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
20699a2dd95SBruce Richardson {
20799a2dd95SBruce Richardson struct hugepage_info *hpi = arg;
20899a2dd95SBruce Richardson
20999a2dd95SBruce Richardson if (msl->page_sz != hpi->hugepage_sz)
21099a2dd95SBruce Richardson return 0;
21199a2dd95SBruce Richardson
21299a2dd95SBruce Richardson hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
21399a2dd95SBruce Richardson return 0;
21499a2dd95SBruce Richardson }
21599a2dd95SBruce Richardson
21699a2dd95SBruce Richardson static int
limits_callback(int socket_id,size_t cur_limit,size_t new_len)21799a2dd95SBruce Richardson limits_callback(int socket_id, size_t cur_limit, size_t new_len)
21899a2dd95SBruce Richardson {
21999a2dd95SBruce Richardson RTE_SET_USED(socket_id);
22099a2dd95SBruce Richardson RTE_SET_USED(cur_limit);
22199a2dd95SBruce Richardson RTE_SET_USED(new_len);
22299a2dd95SBruce Richardson return -1;
22399a2dd95SBruce Richardson }
22499a2dd95SBruce Richardson
22599a2dd95SBruce Richardson int
eal_dynmem_hugepage_init(void)22699a2dd95SBruce Richardson eal_dynmem_hugepage_init(void)
22799a2dd95SBruce Richardson {
22899a2dd95SBruce Richardson struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
22999a2dd95SBruce Richardson uint64_t memory[RTE_MAX_NUMA_NODES];
23099a2dd95SBruce Richardson int hp_sz_idx, socket_id;
23199a2dd95SBruce Richardson struct internal_config *internal_conf =
23299a2dd95SBruce Richardson eal_get_internal_configuration();
23399a2dd95SBruce Richardson
23499a2dd95SBruce Richardson memset(used_hp, 0, sizeof(used_hp));
23599a2dd95SBruce Richardson
23699a2dd95SBruce Richardson for (hp_sz_idx = 0;
23799a2dd95SBruce Richardson hp_sz_idx < (int) internal_conf->num_hugepage_sizes;
23899a2dd95SBruce Richardson hp_sz_idx++) {
23999a2dd95SBruce Richardson #ifndef RTE_ARCH_64
24099a2dd95SBruce Richardson struct hugepage_info dummy;
24199a2dd95SBruce Richardson unsigned int i;
24299a2dd95SBruce Richardson #endif
24399a2dd95SBruce Richardson /* also initialize used_hp hugepage sizes in used_hp */
24499a2dd95SBruce Richardson struct hugepage_info *hpi;
24599a2dd95SBruce Richardson hpi = &internal_conf->hugepage_info[hp_sz_idx];
24699a2dd95SBruce Richardson used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
24799a2dd95SBruce Richardson
24899a2dd95SBruce Richardson #ifndef RTE_ARCH_64
24999a2dd95SBruce Richardson /* for 32-bit, limit number of pages on socket to whatever we've
25099a2dd95SBruce Richardson * preallocated, as we cannot allocate more.
25199a2dd95SBruce Richardson */
25299a2dd95SBruce Richardson memset(&dummy, 0, sizeof(dummy));
25399a2dd95SBruce Richardson dummy.hugepage_sz = hpi->hugepage_sz;
254f82c02d3SArtemy Kovalyov /* memory_hotplug_lock is held during initialization, so it's
255f82c02d3SArtemy Kovalyov * safe to call thread-unsafe version.
256f82c02d3SArtemy Kovalyov */
257f82c02d3SArtemy Kovalyov if (rte_memseg_list_walk_thread_unsafe(hugepage_count_walk, &dummy) < 0)
25899a2dd95SBruce Richardson return -1;
25999a2dd95SBruce Richardson
26099a2dd95SBruce Richardson for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
26199a2dd95SBruce Richardson hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
26299a2dd95SBruce Richardson dummy.num_pages[i]);
26399a2dd95SBruce Richardson }
26499a2dd95SBruce Richardson #endif
26599a2dd95SBruce Richardson }
26699a2dd95SBruce Richardson
26799a2dd95SBruce Richardson /* make a copy of socket_mem, needed for balanced allocation. */
26899a2dd95SBruce Richardson for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
26999a2dd95SBruce Richardson memory[hp_sz_idx] = internal_conf->socket_mem[hp_sz_idx];
27099a2dd95SBruce Richardson
27199a2dd95SBruce Richardson /* calculate final number of pages */
27299a2dd95SBruce Richardson if (eal_dynmem_calc_num_pages_per_socket(memory,
27399a2dd95SBruce Richardson internal_conf->hugepage_info, used_hp,
27499a2dd95SBruce Richardson internal_conf->num_hugepage_sizes) < 0)
27599a2dd95SBruce Richardson return -1;
27699a2dd95SBruce Richardson
27799a2dd95SBruce Richardson for (hp_sz_idx = 0;
27899a2dd95SBruce Richardson hp_sz_idx < (int)internal_conf->num_hugepage_sizes;
27999a2dd95SBruce Richardson hp_sz_idx++) {
28099a2dd95SBruce Richardson for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
28199a2dd95SBruce Richardson socket_id++) {
28299a2dd95SBruce Richardson struct rte_memseg **pages;
28399a2dd95SBruce Richardson struct hugepage_info *hpi = &used_hp[hp_sz_idx];
28499a2dd95SBruce Richardson unsigned int num_pages = hpi->num_pages[socket_id];
28599a2dd95SBruce Richardson unsigned int num_pages_alloc;
28699a2dd95SBruce Richardson
28799a2dd95SBruce Richardson if (num_pages == 0)
28899a2dd95SBruce Richardson continue;
28999a2dd95SBruce Richardson
290*ae67895bSDavid Marchand EAL_LOG(DEBUG,
29199a2dd95SBruce Richardson "Allocating %u pages of size %" PRIu64 "M "
292*ae67895bSDavid Marchand "on socket %i",
29399a2dd95SBruce Richardson num_pages, hpi->hugepage_sz >> 20, socket_id);
29499a2dd95SBruce Richardson
29599a2dd95SBruce Richardson /* we may not be able to allocate all pages in one go,
29699a2dd95SBruce Richardson * because we break up our memory map into multiple
29799a2dd95SBruce Richardson * memseg lists. therefore, try allocating multiple
29899a2dd95SBruce Richardson * times and see if we can get the desired number of
29999a2dd95SBruce Richardson * pages from multiple allocations.
30099a2dd95SBruce Richardson */
30199a2dd95SBruce Richardson
30299a2dd95SBruce Richardson num_pages_alloc = 0;
30399a2dd95SBruce Richardson do {
30499a2dd95SBruce Richardson int i, cur_pages, needed;
30599a2dd95SBruce Richardson
30699a2dd95SBruce Richardson needed = num_pages - num_pages_alloc;
30799a2dd95SBruce Richardson
30899a2dd95SBruce Richardson pages = malloc(sizeof(*pages) * needed);
3095f69ebbdSYunjian Wang if (pages == NULL) {
310*ae67895bSDavid Marchand EAL_LOG(ERR, "Failed to malloc pages");
3115f69ebbdSYunjian Wang return -1;
3125f69ebbdSYunjian Wang }
31399a2dd95SBruce Richardson
31499a2dd95SBruce Richardson /* do not request exact number of pages */
31599a2dd95SBruce Richardson cur_pages = eal_memalloc_alloc_seg_bulk(pages,
31699a2dd95SBruce Richardson needed, hpi->hugepage_sz,
31799a2dd95SBruce Richardson socket_id, false);
31899a2dd95SBruce Richardson if (cur_pages <= 0) {
31999a2dd95SBruce Richardson free(pages);
32099a2dd95SBruce Richardson return -1;
32199a2dd95SBruce Richardson }
32299a2dd95SBruce Richardson
32399a2dd95SBruce Richardson /* mark preallocated pages as unfreeable */
32499a2dd95SBruce Richardson for (i = 0; i < cur_pages; i++) {
32599a2dd95SBruce Richardson struct rte_memseg *ms = pages[i];
32699a2dd95SBruce Richardson ms->flags |=
32799a2dd95SBruce Richardson RTE_MEMSEG_FLAG_DO_NOT_FREE;
32899a2dd95SBruce Richardson }
32999a2dd95SBruce Richardson free(pages);
33099a2dd95SBruce Richardson
33199a2dd95SBruce Richardson num_pages_alloc += cur_pages;
33299a2dd95SBruce Richardson } while (num_pages_alloc != num_pages);
33399a2dd95SBruce Richardson }
33499a2dd95SBruce Richardson }
33599a2dd95SBruce Richardson
33699a2dd95SBruce Richardson /* if socket limits were specified, set them */
33799a2dd95SBruce Richardson if (internal_conf->force_socket_limits) {
33899a2dd95SBruce Richardson unsigned int i;
33999a2dd95SBruce Richardson for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
34099a2dd95SBruce Richardson uint64_t limit = internal_conf->socket_limit[i];
34199a2dd95SBruce Richardson if (limit == 0)
34299a2dd95SBruce Richardson continue;
34399a2dd95SBruce Richardson if (rte_mem_alloc_validator_register("socket-limit",
34499a2dd95SBruce Richardson limits_callback, i, limit))
345*ae67895bSDavid Marchand EAL_LOG(ERR, "Failed to register socket limits validator callback");
34699a2dd95SBruce Richardson }
34799a2dd95SBruce Richardson }
34899a2dd95SBruce Richardson return 0;
34999a2dd95SBruce Richardson }
35099a2dd95SBruce Richardson
35199a2dd95SBruce Richardson __rte_unused /* function is unused on 32-bit builds */
35299a2dd95SBruce Richardson static inline uint64_t
get_socket_mem_size(int socket)35399a2dd95SBruce Richardson get_socket_mem_size(int socket)
35499a2dd95SBruce Richardson {
35599a2dd95SBruce Richardson uint64_t size = 0;
35699a2dd95SBruce Richardson unsigned int i;
35799a2dd95SBruce Richardson struct internal_config *internal_conf =
35899a2dd95SBruce Richardson eal_get_internal_configuration();
35999a2dd95SBruce Richardson
36099a2dd95SBruce Richardson for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
36199a2dd95SBruce Richardson struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
36299a2dd95SBruce Richardson size += hpi->hugepage_sz * hpi->num_pages[socket];
36399a2dd95SBruce Richardson }
36499a2dd95SBruce Richardson
36599a2dd95SBruce Richardson return size;
36699a2dd95SBruce Richardson }
36799a2dd95SBruce Richardson
36899a2dd95SBruce Richardson int
eal_dynmem_calc_num_pages_per_socket(uint64_t * memory,struct hugepage_info * hp_info,struct hugepage_info * hp_used,unsigned int num_hp_info)36999a2dd95SBruce Richardson eal_dynmem_calc_num_pages_per_socket(
37099a2dd95SBruce Richardson uint64_t *memory, struct hugepage_info *hp_info,
37199a2dd95SBruce Richardson struct hugepage_info *hp_used, unsigned int num_hp_info)
37299a2dd95SBruce Richardson {
37399a2dd95SBruce Richardson unsigned int socket, j, i = 0;
37499a2dd95SBruce Richardson unsigned int requested, available;
37599a2dd95SBruce Richardson int total_num_pages = 0;
37699a2dd95SBruce Richardson uint64_t remaining_mem, cur_mem;
37799a2dd95SBruce Richardson const struct internal_config *internal_conf =
37899a2dd95SBruce Richardson eal_get_internal_configuration();
37999a2dd95SBruce Richardson uint64_t total_mem = internal_conf->memory;
38099a2dd95SBruce Richardson
38199a2dd95SBruce Richardson if (num_hp_info == 0)
38299a2dd95SBruce Richardson return -1;
38399a2dd95SBruce Richardson
38499a2dd95SBruce Richardson /* if specific memory amounts per socket weren't requested */
38599a2dd95SBruce Richardson if (internal_conf->force_sockets == 0) {
38699a2dd95SBruce Richardson size_t total_size;
38799a2dd95SBruce Richardson #ifdef RTE_ARCH_64
38899a2dd95SBruce Richardson int cpu_per_socket[RTE_MAX_NUMA_NODES];
38999a2dd95SBruce Richardson size_t default_size;
39099a2dd95SBruce Richardson unsigned int lcore_id;
39199a2dd95SBruce Richardson
39299a2dd95SBruce Richardson /* Compute number of cores per socket */
39399a2dd95SBruce Richardson memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
39499a2dd95SBruce Richardson RTE_LCORE_FOREACH(lcore_id) {
39599a2dd95SBruce Richardson cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
39699a2dd95SBruce Richardson }
39799a2dd95SBruce Richardson
39899a2dd95SBruce Richardson /*
39999a2dd95SBruce Richardson * Automatically spread requested memory amongst detected
40099a2dd95SBruce Richardson * sockets according to number of cores from CPU mask present
40199a2dd95SBruce Richardson * on each socket.
40299a2dd95SBruce Richardson */
40399a2dd95SBruce Richardson total_size = internal_conf->memory;
40499a2dd95SBruce Richardson for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
40599a2dd95SBruce Richardson socket++) {
40699a2dd95SBruce Richardson
40799a2dd95SBruce Richardson /* Set memory amount per socket */
40899a2dd95SBruce Richardson default_size = internal_conf->memory *
40999a2dd95SBruce Richardson cpu_per_socket[socket] / rte_lcore_count();
41099a2dd95SBruce Richardson
41199a2dd95SBruce Richardson /* Limit to maximum available memory on socket */
41299a2dd95SBruce Richardson default_size = RTE_MIN(
41399a2dd95SBruce Richardson default_size, get_socket_mem_size(socket));
41499a2dd95SBruce Richardson
41599a2dd95SBruce Richardson /* Update sizes */
41699a2dd95SBruce Richardson memory[socket] = default_size;
41799a2dd95SBruce Richardson total_size -= default_size;
41899a2dd95SBruce Richardson }
41999a2dd95SBruce Richardson
42099a2dd95SBruce Richardson /*
42199a2dd95SBruce Richardson * If some memory is remaining, try to allocate it by getting
42299a2dd95SBruce Richardson * all available memory from sockets, one after the other.
42399a2dd95SBruce Richardson */
42499a2dd95SBruce Richardson for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
42599a2dd95SBruce Richardson socket++) {
42699a2dd95SBruce Richardson /* take whatever is available */
42799a2dd95SBruce Richardson default_size = RTE_MIN(
42899a2dd95SBruce Richardson get_socket_mem_size(socket) - memory[socket],
42999a2dd95SBruce Richardson total_size);
43099a2dd95SBruce Richardson
43199a2dd95SBruce Richardson /* Update sizes */
43299a2dd95SBruce Richardson memory[socket] += default_size;
43399a2dd95SBruce Richardson total_size -= default_size;
43499a2dd95SBruce Richardson }
43599a2dd95SBruce Richardson #else
43699a2dd95SBruce Richardson /* in 32-bit mode, allocate all of the memory only on main
43799a2dd95SBruce Richardson * lcore socket
43899a2dd95SBruce Richardson */
43999a2dd95SBruce Richardson total_size = internal_conf->memory;
44099a2dd95SBruce Richardson for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
44199a2dd95SBruce Richardson socket++) {
44299a2dd95SBruce Richardson struct rte_config *cfg = rte_eal_get_configuration();
44399a2dd95SBruce Richardson unsigned int main_lcore_socket;
44499a2dd95SBruce Richardson
44599a2dd95SBruce Richardson main_lcore_socket =
44699a2dd95SBruce Richardson rte_lcore_to_socket_id(cfg->main_lcore);
44799a2dd95SBruce Richardson
44899a2dd95SBruce Richardson if (main_lcore_socket != socket)
44999a2dd95SBruce Richardson continue;
45099a2dd95SBruce Richardson
45199a2dd95SBruce Richardson /* Update sizes */
45299a2dd95SBruce Richardson memory[socket] = total_size;
45399a2dd95SBruce Richardson break;
45499a2dd95SBruce Richardson }
45599a2dd95SBruce Richardson #endif
45699a2dd95SBruce Richardson }
45799a2dd95SBruce Richardson
45899a2dd95SBruce Richardson for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0;
45999a2dd95SBruce Richardson socket++) {
46099a2dd95SBruce Richardson /* skips if the memory on specific socket wasn't requested */
46199a2dd95SBruce Richardson for (i = 0; i < num_hp_info && memory[socket] != 0; i++) {
46299a2dd95SBruce Richardson rte_strscpy(hp_used[i].hugedir, hp_info[i].hugedir,
46399a2dd95SBruce Richardson sizeof(hp_used[i].hugedir));
46499a2dd95SBruce Richardson hp_used[i].num_pages[socket] = RTE_MIN(
46599a2dd95SBruce Richardson memory[socket] / hp_info[i].hugepage_sz,
46699a2dd95SBruce Richardson hp_info[i].num_pages[socket]);
46799a2dd95SBruce Richardson
46899a2dd95SBruce Richardson cur_mem = hp_used[i].num_pages[socket] *
46999a2dd95SBruce Richardson hp_used[i].hugepage_sz;
47099a2dd95SBruce Richardson
47199a2dd95SBruce Richardson memory[socket] -= cur_mem;
47299a2dd95SBruce Richardson total_mem -= cur_mem;
47399a2dd95SBruce Richardson
47499a2dd95SBruce Richardson total_num_pages += hp_used[i].num_pages[socket];
47599a2dd95SBruce Richardson
47699a2dd95SBruce Richardson /* check if we have met all memory requests */
47799a2dd95SBruce Richardson if (memory[socket] == 0)
47899a2dd95SBruce Richardson break;
47999a2dd95SBruce Richardson
48099a2dd95SBruce Richardson /* Check if we have any more pages left at this size,
48199a2dd95SBruce Richardson * if so, move on to next size.
48299a2dd95SBruce Richardson */
48399a2dd95SBruce Richardson if (hp_used[i].num_pages[socket] ==
48499a2dd95SBruce Richardson hp_info[i].num_pages[socket])
48599a2dd95SBruce Richardson continue;
48699a2dd95SBruce Richardson /* At this point we know that there are more pages
48799a2dd95SBruce Richardson * available that are bigger than the memory we want,
48899a2dd95SBruce Richardson * so lets see if we can get enough from other page
48999a2dd95SBruce Richardson * sizes.
49099a2dd95SBruce Richardson */
49199a2dd95SBruce Richardson remaining_mem = 0;
49299a2dd95SBruce Richardson for (j = i+1; j < num_hp_info; j++)
49399a2dd95SBruce Richardson remaining_mem += hp_info[j].hugepage_sz *
49499a2dd95SBruce Richardson hp_info[j].num_pages[socket];
49599a2dd95SBruce Richardson
49699a2dd95SBruce Richardson /* Is there enough other memory?
49799a2dd95SBruce Richardson * If not, allocate another page and quit.
49899a2dd95SBruce Richardson */
49999a2dd95SBruce Richardson if (remaining_mem < memory[socket]) {
50099a2dd95SBruce Richardson cur_mem = RTE_MIN(
50199a2dd95SBruce Richardson memory[socket], hp_info[i].hugepage_sz);
50299a2dd95SBruce Richardson memory[socket] -= cur_mem;
50399a2dd95SBruce Richardson total_mem -= cur_mem;
50499a2dd95SBruce Richardson hp_used[i].num_pages[socket]++;
50599a2dd95SBruce Richardson total_num_pages++;
50699a2dd95SBruce Richardson break; /* we are done with this socket*/
50799a2dd95SBruce Richardson }
50899a2dd95SBruce Richardson }
50999a2dd95SBruce Richardson
51099a2dd95SBruce Richardson /* if we didn't satisfy all memory requirements per socket */
51199a2dd95SBruce Richardson if (memory[socket] > 0 &&
51299a2dd95SBruce Richardson internal_conf->socket_mem[socket] != 0) {
51399a2dd95SBruce Richardson /* to prevent icc errors */
51499a2dd95SBruce Richardson requested = (unsigned int)(
51599a2dd95SBruce Richardson internal_conf->socket_mem[socket] / 0x100000);
51699a2dd95SBruce Richardson available = requested -
51799a2dd95SBruce Richardson ((unsigned int)(memory[socket] / 0x100000));
518*ae67895bSDavid Marchand EAL_LOG(ERR, "Not enough memory available on "
519*ae67895bSDavid Marchand "socket %u! Requested: %uMB, available: %uMB",
52099a2dd95SBruce Richardson socket, requested, available);
52199a2dd95SBruce Richardson return -1;
52299a2dd95SBruce Richardson }
52399a2dd95SBruce Richardson }
52499a2dd95SBruce Richardson
52599a2dd95SBruce Richardson /* if we didn't satisfy total memory requirements */
52699a2dd95SBruce Richardson if (total_mem > 0) {
52799a2dd95SBruce Richardson requested = (unsigned int)(internal_conf->memory / 0x100000);
52899a2dd95SBruce Richardson available = requested - (unsigned int)(total_mem / 0x100000);
529*ae67895bSDavid Marchand EAL_LOG(ERR, "Not enough memory available! "
530*ae67895bSDavid Marchand "Requested: %uMB, available: %uMB",
53199a2dd95SBruce Richardson requested, available);
53299a2dd95SBruce Richardson return -1;
53399a2dd95SBruce Richardson }
53499a2dd95SBruce Richardson return total_num_pages;
53599a2dd95SBruce Richardson }
536