199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson #include <stdint.h> 599a2dd95SBruce Richardson #include <stddef.h> 699a2dd95SBruce Richardson #include <stdlib.h> 799a2dd95SBruce Richardson #include <stdio.h> 899a2dd95SBruce Richardson #include <stdarg.h> 999a2dd95SBruce Richardson #include <errno.h> 1099a2dd95SBruce Richardson #include <sys/queue.h> 1199a2dd95SBruce Richardson 1299a2dd95SBruce Richardson #include <rte_memory.h> 1399a2dd95SBruce Richardson #include <rte_errno.h> 1499a2dd95SBruce Richardson #include <rte_eal.h> 1599a2dd95SBruce Richardson #include <rte_eal_memconfig.h> 1699a2dd95SBruce Richardson #include <rte_launch.h> 1799a2dd95SBruce Richardson #include <rte_per_lcore.h> 1899a2dd95SBruce Richardson #include <rte_lcore.h> 1999a2dd95SBruce Richardson #include <rte_common.h> 2099a2dd95SBruce Richardson #include <rte_string_fns.h> 2199a2dd95SBruce Richardson #include <rte_spinlock.h> 2299a2dd95SBruce Richardson #include <rte_memcpy.h> 2399a2dd95SBruce Richardson #include <rte_memzone.h> 2499a2dd95SBruce Richardson #include <rte_atomic.h> 2599a2dd95SBruce Richardson #include <rte_fbarray.h> 2699a2dd95SBruce Richardson 2799a2dd95SBruce Richardson #include "eal_internal_cfg.h" 2899a2dd95SBruce Richardson #include "eal_memalloc.h" 2999a2dd95SBruce Richardson #include "eal_memcfg.h" 3099a2dd95SBruce Richardson #include "eal_private.h" 3199a2dd95SBruce Richardson #include "malloc_elem.h" 3299a2dd95SBruce Richardson #include "malloc_heap.h" 3399a2dd95SBruce Richardson #include "malloc_mp.h" 3499a2dd95SBruce Richardson 3599a2dd95SBruce Richardson /* start external socket ID's at a very high number */ 3699a2dd95SBruce Richardson #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */ 3799a2dd95SBruce Richardson #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES)) 3899a2dd95SBruce Richardson 3999a2dd95SBruce Richardson static unsigned 4099a2dd95SBruce Richardson check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) 4199a2dd95SBruce Richardson { 4299a2dd95SBruce Richardson unsigned check_flag = 0; 4399a2dd95SBruce Richardson 4499a2dd95SBruce Richardson if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) 4599a2dd95SBruce Richardson return 1; 4699a2dd95SBruce Richardson 4799a2dd95SBruce Richardson switch (hugepage_sz) { 4899a2dd95SBruce Richardson case RTE_PGSIZE_256K: 4999a2dd95SBruce Richardson check_flag = RTE_MEMZONE_256KB; 5099a2dd95SBruce Richardson break; 5199a2dd95SBruce Richardson case RTE_PGSIZE_2M: 5299a2dd95SBruce Richardson check_flag = RTE_MEMZONE_2MB; 5399a2dd95SBruce Richardson break; 5499a2dd95SBruce Richardson case RTE_PGSIZE_16M: 5599a2dd95SBruce Richardson check_flag = RTE_MEMZONE_16MB; 5699a2dd95SBruce Richardson break; 5799a2dd95SBruce Richardson case RTE_PGSIZE_256M: 5899a2dd95SBruce Richardson check_flag = RTE_MEMZONE_256MB; 5999a2dd95SBruce Richardson break; 6099a2dd95SBruce Richardson case RTE_PGSIZE_512M: 6199a2dd95SBruce Richardson check_flag = RTE_MEMZONE_512MB; 6299a2dd95SBruce Richardson break; 6399a2dd95SBruce Richardson case RTE_PGSIZE_1G: 6499a2dd95SBruce Richardson check_flag = RTE_MEMZONE_1GB; 6599a2dd95SBruce Richardson break; 6699a2dd95SBruce Richardson case RTE_PGSIZE_4G: 6799a2dd95SBruce Richardson check_flag = RTE_MEMZONE_4GB; 6899a2dd95SBruce Richardson break; 6999a2dd95SBruce Richardson case RTE_PGSIZE_16G: 7099a2dd95SBruce Richardson check_flag = RTE_MEMZONE_16GB; 7199a2dd95SBruce Richardson } 7299a2dd95SBruce Richardson 7399a2dd95SBruce Richardson return check_flag & flags; 7499a2dd95SBruce Richardson } 7599a2dd95SBruce Richardson 7699a2dd95SBruce Richardson int 7799a2dd95SBruce Richardson malloc_socket_to_heap_id(unsigned int socket_id) 7899a2dd95SBruce Richardson { 7999a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 8099a2dd95SBruce Richardson int i; 8199a2dd95SBruce Richardson 8299a2dd95SBruce Richardson for (i = 0; i < RTE_MAX_HEAPS; i++) { 8399a2dd95SBruce Richardson struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 8499a2dd95SBruce Richardson 8599a2dd95SBruce Richardson if (heap->socket_id == socket_id) 8699a2dd95SBruce Richardson return i; 8799a2dd95SBruce Richardson } 8899a2dd95SBruce Richardson return -1; 8999a2dd95SBruce Richardson } 9099a2dd95SBruce Richardson 9199a2dd95SBruce Richardson /* 9299a2dd95SBruce Richardson * Expand the heap with a memory area. 9399a2dd95SBruce Richardson */ 9499a2dd95SBruce Richardson static struct malloc_elem * 9599a2dd95SBruce Richardson malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, 9699a2dd95SBruce Richardson void *start, size_t len) 9799a2dd95SBruce Richardson { 9899a2dd95SBruce Richardson struct malloc_elem *elem = start; 9999a2dd95SBruce Richardson 10099a2dd95SBruce Richardson malloc_elem_init(elem, heap, msl, len, elem, len); 10199a2dd95SBruce Richardson 10299a2dd95SBruce Richardson malloc_elem_insert(elem); 10399a2dd95SBruce Richardson 10499a2dd95SBruce Richardson elem = malloc_elem_join_adjacent_free(elem); 10599a2dd95SBruce Richardson 10699a2dd95SBruce Richardson malloc_elem_free_list_insert(elem); 10799a2dd95SBruce Richardson 10899a2dd95SBruce Richardson return elem; 10999a2dd95SBruce Richardson } 11099a2dd95SBruce Richardson 11199a2dd95SBruce Richardson static int 11299a2dd95SBruce Richardson malloc_add_seg(const struct rte_memseg_list *msl, 11399a2dd95SBruce Richardson const struct rte_memseg *ms, size_t len, void *arg __rte_unused) 11499a2dd95SBruce Richardson { 11599a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 11699a2dd95SBruce Richardson struct rte_memseg_list *found_msl; 11799a2dd95SBruce Richardson struct malloc_heap *heap; 11899a2dd95SBruce Richardson int msl_idx, heap_idx; 11999a2dd95SBruce Richardson 12099a2dd95SBruce Richardson if (msl->external) 12199a2dd95SBruce Richardson return 0; 12299a2dd95SBruce Richardson 12399a2dd95SBruce Richardson heap_idx = malloc_socket_to_heap_id(msl->socket_id); 12499a2dd95SBruce Richardson if (heap_idx < 0) { 12599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n"); 12699a2dd95SBruce Richardson return -1; 12799a2dd95SBruce Richardson } 12899a2dd95SBruce Richardson heap = &mcfg->malloc_heaps[heap_idx]; 12999a2dd95SBruce Richardson 13099a2dd95SBruce Richardson /* msl is const, so find it */ 13199a2dd95SBruce Richardson msl_idx = msl - mcfg->memsegs; 13299a2dd95SBruce Richardson 13399a2dd95SBruce Richardson if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS) 13499a2dd95SBruce Richardson return -1; 13599a2dd95SBruce Richardson 13699a2dd95SBruce Richardson found_msl = &mcfg->memsegs[msl_idx]; 13799a2dd95SBruce Richardson 13899a2dd95SBruce Richardson malloc_heap_add_memory(heap, found_msl, ms->addr, len); 13999a2dd95SBruce Richardson 14099a2dd95SBruce Richardson heap->total_size += len; 14199a2dd95SBruce Richardson 14299a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20, 14399a2dd95SBruce Richardson msl->socket_id); 14499a2dd95SBruce Richardson return 0; 14599a2dd95SBruce Richardson } 14699a2dd95SBruce Richardson 14799a2dd95SBruce Richardson /* 14899a2dd95SBruce Richardson * Iterates through the freelist for a heap to find a free element 14999a2dd95SBruce Richardson * which can store data of the required size and with the requested alignment. 15099a2dd95SBruce Richardson * If size is 0, find the biggest available elem. 15199a2dd95SBruce Richardson * Returns null on failure, or pointer to element on success. 15299a2dd95SBruce Richardson */ 15399a2dd95SBruce Richardson static struct malloc_elem * 15499a2dd95SBruce Richardson find_suitable_element(struct malloc_heap *heap, size_t size, 15599a2dd95SBruce Richardson unsigned int flags, size_t align, size_t bound, bool contig) 15699a2dd95SBruce Richardson { 15799a2dd95SBruce Richardson size_t idx; 15899a2dd95SBruce Richardson struct malloc_elem *elem, *alt_elem = NULL; 15999a2dd95SBruce Richardson 16099a2dd95SBruce Richardson for (idx = malloc_elem_free_list_index(size); 16199a2dd95SBruce Richardson idx < RTE_HEAP_NUM_FREELISTS; idx++) { 16299a2dd95SBruce Richardson for (elem = LIST_FIRST(&heap->free_head[idx]); 16399a2dd95SBruce Richardson !!elem; elem = LIST_NEXT(elem, free_list)) { 16499a2dd95SBruce Richardson if (malloc_elem_can_hold(elem, size, align, bound, 16599a2dd95SBruce Richardson contig)) { 16699a2dd95SBruce Richardson if (check_hugepage_sz(flags, 16799a2dd95SBruce Richardson elem->msl->page_sz)) 16899a2dd95SBruce Richardson return elem; 16999a2dd95SBruce Richardson if (alt_elem == NULL) 17099a2dd95SBruce Richardson alt_elem = elem; 17199a2dd95SBruce Richardson } 17299a2dd95SBruce Richardson } 17399a2dd95SBruce Richardson } 17499a2dd95SBruce Richardson 17599a2dd95SBruce Richardson if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) 17699a2dd95SBruce Richardson return alt_elem; 17799a2dd95SBruce Richardson 17899a2dd95SBruce Richardson return NULL; 17999a2dd95SBruce Richardson } 18099a2dd95SBruce Richardson 18199a2dd95SBruce Richardson /* 18299a2dd95SBruce Richardson * Iterates through the freelist for a heap to find a free element with the 18399a2dd95SBruce Richardson * biggest size and requested alignment. Will also set size to whatever element 18499a2dd95SBruce Richardson * size that was found. 18599a2dd95SBruce Richardson * Returns null on failure, or pointer to element on success. 18699a2dd95SBruce Richardson */ 18799a2dd95SBruce Richardson static struct malloc_elem * 18899a2dd95SBruce Richardson find_biggest_element(struct malloc_heap *heap, size_t *size, 18999a2dd95SBruce Richardson unsigned int flags, size_t align, bool contig) 19099a2dd95SBruce Richardson { 19199a2dd95SBruce Richardson struct malloc_elem *elem, *max_elem = NULL; 19299a2dd95SBruce Richardson size_t idx, max_size = 0; 19399a2dd95SBruce Richardson 19499a2dd95SBruce Richardson for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 19599a2dd95SBruce Richardson for (elem = LIST_FIRST(&heap->free_head[idx]); 19699a2dd95SBruce Richardson !!elem; elem = LIST_NEXT(elem, free_list)) { 19799a2dd95SBruce Richardson size_t cur_size; 19899a2dd95SBruce Richardson if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 && 19999a2dd95SBruce Richardson !check_hugepage_sz(flags, 20099a2dd95SBruce Richardson elem->msl->page_sz)) 20199a2dd95SBruce Richardson continue; 20299a2dd95SBruce Richardson if (contig) { 20399a2dd95SBruce Richardson cur_size = 20499a2dd95SBruce Richardson malloc_elem_find_max_iova_contig(elem, 20599a2dd95SBruce Richardson align); 20699a2dd95SBruce Richardson } else { 20799a2dd95SBruce Richardson void *data_start = RTE_PTR_ADD(elem, 20899a2dd95SBruce Richardson MALLOC_ELEM_HEADER_LEN); 20999a2dd95SBruce Richardson void *data_end = RTE_PTR_ADD(elem, elem->size - 21099a2dd95SBruce Richardson MALLOC_ELEM_TRAILER_LEN); 21199a2dd95SBruce Richardson void *aligned = RTE_PTR_ALIGN_CEIL(data_start, 21299a2dd95SBruce Richardson align); 21399a2dd95SBruce Richardson /* check if aligned data start is beyond end */ 21499a2dd95SBruce Richardson if (aligned >= data_end) 21599a2dd95SBruce Richardson continue; 21699a2dd95SBruce Richardson cur_size = RTE_PTR_DIFF(data_end, aligned); 21799a2dd95SBruce Richardson } 21899a2dd95SBruce Richardson if (cur_size > max_size) { 21999a2dd95SBruce Richardson max_size = cur_size; 22099a2dd95SBruce Richardson max_elem = elem; 22199a2dd95SBruce Richardson } 22299a2dd95SBruce Richardson } 22399a2dd95SBruce Richardson } 22499a2dd95SBruce Richardson 22599a2dd95SBruce Richardson *size = max_size; 22699a2dd95SBruce Richardson return max_elem; 22799a2dd95SBruce Richardson } 22899a2dd95SBruce Richardson 22999a2dd95SBruce Richardson /* 23099a2dd95SBruce Richardson * Main function to allocate a block of memory from the heap. 23199a2dd95SBruce Richardson * It locks the free list, scans it, and adds a new memseg if the 23299a2dd95SBruce Richardson * scan fails. Once the new memseg is added, it re-scans and should return 23399a2dd95SBruce Richardson * the new element after releasing the lock. 23499a2dd95SBruce Richardson */ 23599a2dd95SBruce Richardson static void * 23699a2dd95SBruce Richardson heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, 23799a2dd95SBruce Richardson unsigned int flags, size_t align, size_t bound, bool contig) 23899a2dd95SBruce Richardson { 23999a2dd95SBruce Richardson struct malloc_elem *elem; 240*6cc51b12SZhihong Peng size_t user_size = size; 24199a2dd95SBruce Richardson 24299a2dd95SBruce Richardson size = RTE_CACHE_LINE_ROUNDUP(size); 24399a2dd95SBruce Richardson align = RTE_CACHE_LINE_ROUNDUP(align); 24499a2dd95SBruce Richardson 24599a2dd95SBruce Richardson /* roundup might cause an overflow */ 24699a2dd95SBruce Richardson if (size == 0) 24799a2dd95SBruce Richardson return NULL; 24899a2dd95SBruce Richardson elem = find_suitable_element(heap, size, flags, align, bound, contig); 24999a2dd95SBruce Richardson if (elem != NULL) { 25099a2dd95SBruce Richardson elem = malloc_elem_alloc(elem, size, align, bound, contig); 25199a2dd95SBruce Richardson 25299a2dd95SBruce Richardson /* increase heap's count of allocated elements */ 25399a2dd95SBruce Richardson heap->alloc_count++; 254*6cc51b12SZhihong Peng 255*6cc51b12SZhihong Peng asan_set_redzone(elem, user_size); 25699a2dd95SBruce Richardson } 25799a2dd95SBruce Richardson 25899a2dd95SBruce Richardson return elem == NULL ? NULL : (void *)(&elem[1]); 25999a2dd95SBruce Richardson } 26099a2dd95SBruce Richardson 26199a2dd95SBruce Richardson static void * 26299a2dd95SBruce Richardson heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused, 26399a2dd95SBruce Richardson unsigned int flags, size_t align, bool contig) 26499a2dd95SBruce Richardson { 26599a2dd95SBruce Richardson struct malloc_elem *elem; 26699a2dd95SBruce Richardson size_t size; 26799a2dd95SBruce Richardson 26899a2dd95SBruce Richardson align = RTE_CACHE_LINE_ROUNDUP(align); 26999a2dd95SBruce Richardson 27099a2dd95SBruce Richardson elem = find_biggest_element(heap, &size, flags, align, contig); 27199a2dd95SBruce Richardson if (elem != NULL) { 27299a2dd95SBruce Richardson elem = malloc_elem_alloc(elem, size, align, 0, contig); 27399a2dd95SBruce Richardson 27499a2dd95SBruce Richardson /* increase heap's count of allocated elements */ 27599a2dd95SBruce Richardson heap->alloc_count++; 276*6cc51b12SZhihong Peng 277*6cc51b12SZhihong Peng asan_set_redzone(elem, size); 27899a2dd95SBruce Richardson } 27999a2dd95SBruce Richardson 28099a2dd95SBruce Richardson return elem == NULL ? NULL : (void *)(&elem[1]); 28199a2dd95SBruce Richardson } 28299a2dd95SBruce Richardson 28399a2dd95SBruce Richardson /* this function is exposed in malloc_mp.h */ 28499a2dd95SBruce Richardson void 28599a2dd95SBruce Richardson rollback_expand_heap(struct rte_memseg **ms, int n_segs, 28699a2dd95SBruce Richardson struct malloc_elem *elem, void *map_addr, size_t map_len) 28799a2dd95SBruce Richardson { 28899a2dd95SBruce Richardson if (elem != NULL) { 28999a2dd95SBruce Richardson malloc_elem_free_list_remove(elem); 29099a2dd95SBruce Richardson malloc_elem_hide_region(elem, map_addr, map_len); 29199a2dd95SBruce Richardson } 29299a2dd95SBruce Richardson 29399a2dd95SBruce Richardson eal_memalloc_free_seg_bulk(ms, n_segs); 29499a2dd95SBruce Richardson } 29599a2dd95SBruce Richardson 29699a2dd95SBruce Richardson /* this function is exposed in malloc_mp.h */ 29799a2dd95SBruce Richardson struct malloc_elem * 29899a2dd95SBruce Richardson alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 29999a2dd95SBruce Richardson int socket, unsigned int flags, size_t align, size_t bound, 30099a2dd95SBruce Richardson bool contig, struct rte_memseg **ms, int n_segs) 30199a2dd95SBruce Richardson { 30299a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 30399a2dd95SBruce Richardson struct rte_memseg_list *msl; 30499a2dd95SBruce Richardson struct malloc_elem *elem = NULL; 30599a2dd95SBruce Richardson size_t alloc_sz; 30699a2dd95SBruce Richardson int allocd_pages; 30799a2dd95SBruce Richardson void *ret, *map_addr; 30899a2dd95SBruce Richardson 30999a2dd95SBruce Richardson alloc_sz = (size_t)pg_sz * n_segs; 31099a2dd95SBruce Richardson 31199a2dd95SBruce Richardson /* first, check if we're allowed to allocate this memory */ 31299a2dd95SBruce Richardson if (eal_memalloc_mem_alloc_validate(socket, 31399a2dd95SBruce Richardson heap->total_size + alloc_sz) < 0) { 31499a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n"); 31599a2dd95SBruce Richardson return NULL; 31699a2dd95SBruce Richardson } 31799a2dd95SBruce Richardson 31899a2dd95SBruce Richardson allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz, 31999a2dd95SBruce Richardson socket, true); 32099a2dd95SBruce Richardson 32199a2dd95SBruce Richardson /* make sure we've allocated our pages... */ 32299a2dd95SBruce Richardson if (allocd_pages < 0) 32399a2dd95SBruce Richardson return NULL; 32499a2dd95SBruce Richardson 32599a2dd95SBruce Richardson map_addr = ms[0]->addr; 32699a2dd95SBruce Richardson msl = rte_mem_virt2memseg_list(map_addr); 32799a2dd95SBruce Richardson 32899a2dd95SBruce Richardson /* check if we wanted contiguous memory but didn't get it */ 32999a2dd95SBruce Richardson if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) { 33099a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n", 33199a2dd95SBruce Richardson __func__); 33299a2dd95SBruce Richardson goto fail; 33399a2dd95SBruce Richardson } 33499a2dd95SBruce Richardson 33599a2dd95SBruce Richardson /* 33699a2dd95SBruce Richardson * Once we have all the memseg lists configured, if there is a dma mask 33799a2dd95SBruce Richardson * set, check iova addresses are not out of range. Otherwise the device 33899a2dd95SBruce Richardson * setting the dma mask could have problems with the mapped memory. 33999a2dd95SBruce Richardson * 34099a2dd95SBruce Richardson * There are two situations when this can happen: 34199a2dd95SBruce Richardson * 1) memory initialization 34299a2dd95SBruce Richardson * 2) dynamic memory allocation 34399a2dd95SBruce Richardson * 34499a2dd95SBruce Richardson * For 1), an error when checking dma mask implies app can not be 34599a2dd95SBruce Richardson * executed. For 2) implies the new memory can not be added. 34699a2dd95SBruce Richardson */ 34799a2dd95SBruce Richardson if (mcfg->dma_maskbits && 34899a2dd95SBruce Richardson rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { 34999a2dd95SBruce Richardson /* 35099a2dd95SBruce Richardson * Currently this can only happen if IOMMU is enabled 35199a2dd95SBruce Richardson * and the address width supported by the IOMMU hw is 35299a2dd95SBruce Richardson * not enough for using the memory mapped IOVAs. 35399a2dd95SBruce Richardson * 35499a2dd95SBruce Richardson * If IOVA is VA, advice to try with '--iova-mode pa' 35599a2dd95SBruce Richardson * which could solve some situations when IOVA VA is not 35699a2dd95SBruce Richardson * really needed. 35799a2dd95SBruce Richardson */ 35899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, 35999a2dd95SBruce Richardson "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n", 36099a2dd95SBruce Richardson __func__); 36199a2dd95SBruce Richardson 36299a2dd95SBruce Richardson /* 36399a2dd95SBruce Richardson * If IOVA is VA and it is possible to run with IOVA PA, 36499a2dd95SBruce Richardson * because user is root, give and advice for solving the 36599a2dd95SBruce Richardson * problem. 36699a2dd95SBruce Richardson */ 36799a2dd95SBruce Richardson if ((rte_eal_iova_mode() == RTE_IOVA_VA) && 36899a2dd95SBruce Richardson rte_eal_using_phys_addrs()) 36999a2dd95SBruce Richardson RTE_LOG(ERR, EAL, 37099a2dd95SBruce Richardson "%s(): Please try initializing EAL with --iova-mode=pa parameter\n", 37199a2dd95SBruce Richardson __func__); 37299a2dd95SBruce Richardson goto fail; 37399a2dd95SBruce Richardson } 37499a2dd95SBruce Richardson 37599a2dd95SBruce Richardson /* add newly minted memsegs to malloc heap */ 37699a2dd95SBruce Richardson elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz); 37799a2dd95SBruce Richardson 37899a2dd95SBruce Richardson /* try once more, as now we have allocated new memory */ 37999a2dd95SBruce Richardson ret = find_suitable_element(heap, elt_size, flags, align, bound, 38099a2dd95SBruce Richardson contig); 38199a2dd95SBruce Richardson 38299a2dd95SBruce Richardson if (ret == NULL) 38399a2dd95SBruce Richardson goto fail; 38499a2dd95SBruce Richardson 38599a2dd95SBruce Richardson return elem; 38699a2dd95SBruce Richardson 38799a2dd95SBruce Richardson fail: 38899a2dd95SBruce Richardson rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 38999a2dd95SBruce Richardson return NULL; 39099a2dd95SBruce Richardson } 39199a2dd95SBruce Richardson 39299a2dd95SBruce Richardson static int 39399a2dd95SBruce Richardson try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, 39499a2dd95SBruce Richardson size_t elt_size, int socket, unsigned int flags, size_t align, 39599a2dd95SBruce Richardson size_t bound, bool contig) 39699a2dd95SBruce Richardson { 39799a2dd95SBruce Richardson struct malloc_elem *elem; 39899a2dd95SBruce Richardson struct rte_memseg **ms; 39999a2dd95SBruce Richardson void *map_addr; 40099a2dd95SBruce Richardson size_t alloc_sz; 40199a2dd95SBruce Richardson int n_segs; 40299a2dd95SBruce Richardson bool callback_triggered = false; 40399a2dd95SBruce Richardson 40499a2dd95SBruce Richardson alloc_sz = RTE_ALIGN_CEIL(align + elt_size + 40599a2dd95SBruce Richardson MALLOC_ELEM_TRAILER_LEN, pg_sz); 40699a2dd95SBruce Richardson n_segs = alloc_sz / pg_sz; 40799a2dd95SBruce Richardson 40899a2dd95SBruce Richardson /* we can't know in advance how many pages we'll need, so we malloc */ 40999a2dd95SBruce Richardson ms = malloc(sizeof(*ms) * n_segs); 41099a2dd95SBruce Richardson if (ms == NULL) 41199a2dd95SBruce Richardson return -1; 41299a2dd95SBruce Richardson memset(ms, 0, sizeof(*ms) * n_segs); 41399a2dd95SBruce Richardson 41499a2dd95SBruce Richardson elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align, 41599a2dd95SBruce Richardson bound, contig, ms, n_segs); 41699a2dd95SBruce Richardson 41799a2dd95SBruce Richardson if (elem == NULL) 41899a2dd95SBruce Richardson goto free_ms; 41999a2dd95SBruce Richardson 42099a2dd95SBruce Richardson map_addr = ms[0]->addr; 42199a2dd95SBruce Richardson 42299a2dd95SBruce Richardson /* notify user about changes in memory map */ 42399a2dd95SBruce Richardson eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); 42499a2dd95SBruce Richardson 42599a2dd95SBruce Richardson /* notify other processes that this has happened */ 42699a2dd95SBruce Richardson if (request_sync()) { 42799a2dd95SBruce Richardson /* we couldn't ensure all processes have mapped memory, 42899a2dd95SBruce Richardson * so free it back and notify everyone that it's been 42999a2dd95SBruce Richardson * freed back. 43099a2dd95SBruce Richardson * 43199a2dd95SBruce Richardson * technically, we could've avoided adding memory addresses to 43299a2dd95SBruce Richardson * the map, but that would've led to inconsistent behavior 43399a2dd95SBruce Richardson * between primary and secondary processes, as those get 43499a2dd95SBruce Richardson * callbacks during sync. therefore, force primary process to 43599a2dd95SBruce Richardson * do alloc-and-rollback syncs as well. 43699a2dd95SBruce Richardson */ 43799a2dd95SBruce Richardson callback_triggered = true; 43899a2dd95SBruce Richardson goto free_elem; 43999a2dd95SBruce Richardson } 44099a2dd95SBruce Richardson heap->total_size += alloc_sz; 44199a2dd95SBruce Richardson 44299a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n", 44399a2dd95SBruce Richardson socket, alloc_sz >> 20ULL); 44499a2dd95SBruce Richardson 44599a2dd95SBruce Richardson free(ms); 44699a2dd95SBruce Richardson 44799a2dd95SBruce Richardson return 0; 44899a2dd95SBruce Richardson 44999a2dd95SBruce Richardson free_elem: 45099a2dd95SBruce Richardson if (callback_triggered) 45199a2dd95SBruce Richardson eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 45299a2dd95SBruce Richardson map_addr, alloc_sz); 45399a2dd95SBruce Richardson 45499a2dd95SBruce Richardson rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 45599a2dd95SBruce Richardson 45699a2dd95SBruce Richardson request_sync(); 45799a2dd95SBruce Richardson free_ms: 45899a2dd95SBruce Richardson free(ms); 45999a2dd95SBruce Richardson 46099a2dd95SBruce Richardson return -1; 46199a2dd95SBruce Richardson } 46299a2dd95SBruce Richardson 46399a2dd95SBruce Richardson static int 46499a2dd95SBruce Richardson try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz, 46599a2dd95SBruce Richardson size_t elt_size, int socket, unsigned int flags, size_t align, 46699a2dd95SBruce Richardson size_t bound, bool contig) 46799a2dd95SBruce Richardson { 46899a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 46999a2dd95SBruce Richardson struct malloc_mp_req req; 47099a2dd95SBruce Richardson int req_result; 47199a2dd95SBruce Richardson 47299a2dd95SBruce Richardson memset(&req, 0, sizeof(req)); 47399a2dd95SBruce Richardson 47499a2dd95SBruce Richardson req.t = REQ_TYPE_ALLOC; 47599a2dd95SBruce Richardson req.alloc_req.align = align; 47699a2dd95SBruce Richardson req.alloc_req.bound = bound; 47799a2dd95SBruce Richardson req.alloc_req.contig = contig; 47899a2dd95SBruce Richardson req.alloc_req.flags = flags; 47999a2dd95SBruce Richardson req.alloc_req.elt_size = elt_size; 48099a2dd95SBruce Richardson req.alloc_req.page_sz = pg_sz; 48199a2dd95SBruce Richardson req.alloc_req.socket = socket; 48299a2dd95SBruce Richardson req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps; 48399a2dd95SBruce Richardson 48499a2dd95SBruce Richardson req_result = request_to_primary(&req); 48599a2dd95SBruce Richardson 48699a2dd95SBruce Richardson if (req_result != 0) 48799a2dd95SBruce Richardson return -1; 48899a2dd95SBruce Richardson 48999a2dd95SBruce Richardson if (req.result != REQ_RESULT_SUCCESS) 49099a2dd95SBruce Richardson return -1; 49199a2dd95SBruce Richardson 49299a2dd95SBruce Richardson return 0; 49399a2dd95SBruce Richardson } 49499a2dd95SBruce Richardson 49599a2dd95SBruce Richardson static int 49699a2dd95SBruce Richardson try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 49799a2dd95SBruce Richardson int socket, unsigned int flags, size_t align, size_t bound, 49899a2dd95SBruce Richardson bool contig) 49999a2dd95SBruce Richardson { 50099a2dd95SBruce Richardson int ret; 50199a2dd95SBruce Richardson 50299a2dd95SBruce Richardson rte_mcfg_mem_write_lock(); 50399a2dd95SBruce Richardson 50499a2dd95SBruce Richardson if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 50599a2dd95SBruce Richardson ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket, 50699a2dd95SBruce Richardson flags, align, bound, contig); 50799a2dd95SBruce Richardson } else { 50899a2dd95SBruce Richardson ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket, 50999a2dd95SBruce Richardson flags, align, bound, contig); 51099a2dd95SBruce Richardson } 51199a2dd95SBruce Richardson 51299a2dd95SBruce Richardson rte_mcfg_mem_write_unlock(); 51399a2dd95SBruce Richardson return ret; 51499a2dd95SBruce Richardson } 51599a2dd95SBruce Richardson 51699a2dd95SBruce Richardson static int 51799a2dd95SBruce Richardson compare_pagesz(const void *a, const void *b) 51899a2dd95SBruce Richardson { 51999a2dd95SBruce Richardson const struct rte_memseg_list * const*mpa = a; 52099a2dd95SBruce Richardson const struct rte_memseg_list * const*mpb = b; 52199a2dd95SBruce Richardson const struct rte_memseg_list *msla = *mpa; 52299a2dd95SBruce Richardson const struct rte_memseg_list *mslb = *mpb; 52399a2dd95SBruce Richardson uint64_t pg_sz_a = msla->page_sz; 52499a2dd95SBruce Richardson uint64_t pg_sz_b = mslb->page_sz; 52599a2dd95SBruce Richardson 52699a2dd95SBruce Richardson if (pg_sz_a < pg_sz_b) 52799a2dd95SBruce Richardson return -1; 52899a2dd95SBruce Richardson if (pg_sz_a > pg_sz_b) 52999a2dd95SBruce Richardson return 1; 53099a2dd95SBruce Richardson return 0; 53199a2dd95SBruce Richardson } 53299a2dd95SBruce Richardson 53399a2dd95SBruce Richardson static int 53499a2dd95SBruce Richardson alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, 53599a2dd95SBruce Richardson unsigned int flags, size_t align, size_t bound, bool contig) 53699a2dd95SBruce Richardson { 53799a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 53899a2dd95SBruce Richardson struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS]; 53999a2dd95SBruce Richardson struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS]; 54099a2dd95SBruce Richardson uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS]; 54199a2dd95SBruce Richardson uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS]; 54299a2dd95SBruce Richardson uint64_t prev_pg_sz; 54399a2dd95SBruce Richardson int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz; 54499a2dd95SBruce Richardson bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0; 54599a2dd95SBruce Richardson unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 54699a2dd95SBruce Richardson void *ret; 54799a2dd95SBruce Richardson 54899a2dd95SBruce Richardson memset(requested_msls, 0, sizeof(requested_msls)); 54999a2dd95SBruce Richardson memset(other_msls, 0, sizeof(other_msls)); 55099a2dd95SBruce Richardson memset(requested_pg_sz, 0, sizeof(requested_pg_sz)); 55199a2dd95SBruce Richardson memset(other_pg_sz, 0, sizeof(other_pg_sz)); 55299a2dd95SBruce Richardson 55399a2dd95SBruce Richardson /* 55499a2dd95SBruce Richardson * go through memseg list and take note of all the page sizes available, 55599a2dd95SBruce Richardson * and if any of them were specifically requested by the user. 55699a2dd95SBruce Richardson */ 55799a2dd95SBruce Richardson n_requested_msls = 0; 55899a2dd95SBruce Richardson n_other_msls = 0; 55999a2dd95SBruce Richardson for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 56099a2dd95SBruce Richardson struct rte_memseg_list *msl = &mcfg->memsegs[i]; 56199a2dd95SBruce Richardson 56299a2dd95SBruce Richardson if (msl->socket_id != socket) 56399a2dd95SBruce Richardson continue; 56499a2dd95SBruce Richardson 56599a2dd95SBruce Richardson if (msl->base_va == NULL) 56699a2dd95SBruce Richardson continue; 56799a2dd95SBruce Richardson 56899a2dd95SBruce Richardson /* if pages of specific size were requested */ 56999a2dd95SBruce Richardson if (size_flags != 0 && check_hugepage_sz(size_flags, 57099a2dd95SBruce Richardson msl->page_sz)) 57199a2dd95SBruce Richardson requested_msls[n_requested_msls++] = msl; 57299a2dd95SBruce Richardson else if (size_flags == 0 || size_hint) 57399a2dd95SBruce Richardson other_msls[n_other_msls++] = msl; 57499a2dd95SBruce Richardson } 57599a2dd95SBruce Richardson 57699a2dd95SBruce Richardson /* sort the lists, smallest first */ 57799a2dd95SBruce Richardson qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]), 57899a2dd95SBruce Richardson compare_pagesz); 57999a2dd95SBruce Richardson qsort(other_msls, n_other_msls, sizeof(other_msls[0]), 58099a2dd95SBruce Richardson compare_pagesz); 58199a2dd95SBruce Richardson 58299a2dd95SBruce Richardson /* now, extract page sizes we are supposed to try */ 58399a2dd95SBruce Richardson prev_pg_sz = 0; 58499a2dd95SBruce Richardson n_requested_pg_sz = 0; 58599a2dd95SBruce Richardson for (i = 0; i < n_requested_msls; i++) { 58699a2dd95SBruce Richardson uint64_t pg_sz = requested_msls[i]->page_sz; 58799a2dd95SBruce Richardson 58899a2dd95SBruce Richardson if (prev_pg_sz != pg_sz) { 58999a2dd95SBruce Richardson requested_pg_sz[n_requested_pg_sz++] = pg_sz; 59099a2dd95SBruce Richardson prev_pg_sz = pg_sz; 59199a2dd95SBruce Richardson } 59299a2dd95SBruce Richardson } 59399a2dd95SBruce Richardson prev_pg_sz = 0; 59499a2dd95SBruce Richardson n_other_pg_sz = 0; 59599a2dd95SBruce Richardson for (i = 0; i < n_other_msls; i++) { 59699a2dd95SBruce Richardson uint64_t pg_sz = other_msls[i]->page_sz; 59799a2dd95SBruce Richardson 59899a2dd95SBruce Richardson if (prev_pg_sz != pg_sz) { 59999a2dd95SBruce Richardson other_pg_sz[n_other_pg_sz++] = pg_sz; 60099a2dd95SBruce Richardson prev_pg_sz = pg_sz; 60199a2dd95SBruce Richardson } 60299a2dd95SBruce Richardson } 60399a2dd95SBruce Richardson 60499a2dd95SBruce Richardson /* finally, try allocating memory of specified page sizes, starting from 60599a2dd95SBruce Richardson * the smallest sizes 60699a2dd95SBruce Richardson */ 60799a2dd95SBruce Richardson for (i = 0; i < n_requested_pg_sz; i++) { 60899a2dd95SBruce Richardson uint64_t pg_sz = requested_pg_sz[i]; 60999a2dd95SBruce Richardson 61099a2dd95SBruce Richardson /* 61199a2dd95SBruce Richardson * do not pass the size hint here, as user expects other page 61299a2dd95SBruce Richardson * sizes first, before resorting to best effort allocation. 61399a2dd95SBruce Richardson */ 61499a2dd95SBruce Richardson if (!try_expand_heap(heap, pg_sz, size, socket, size_flags, 61599a2dd95SBruce Richardson align, bound, contig)) 61699a2dd95SBruce Richardson return 0; 61799a2dd95SBruce Richardson } 61899a2dd95SBruce Richardson if (n_other_pg_sz == 0) 61999a2dd95SBruce Richardson return -1; 62099a2dd95SBruce Richardson 62199a2dd95SBruce Richardson /* now, check if we can reserve anything with size hint */ 62299a2dd95SBruce Richardson ret = find_suitable_element(heap, size, flags, align, bound, contig); 62399a2dd95SBruce Richardson if (ret != NULL) 62499a2dd95SBruce Richardson return 0; 62599a2dd95SBruce Richardson 62699a2dd95SBruce Richardson /* 62799a2dd95SBruce Richardson * we still couldn't reserve memory, so try expanding heap with other 62899a2dd95SBruce Richardson * page sizes, if there are any 62999a2dd95SBruce Richardson */ 63099a2dd95SBruce Richardson for (i = 0; i < n_other_pg_sz; i++) { 63199a2dd95SBruce Richardson uint64_t pg_sz = other_pg_sz[i]; 63299a2dd95SBruce Richardson 63399a2dd95SBruce Richardson if (!try_expand_heap(heap, pg_sz, size, socket, flags, 63499a2dd95SBruce Richardson align, bound, contig)) 63599a2dd95SBruce Richardson return 0; 63699a2dd95SBruce Richardson } 63799a2dd95SBruce Richardson return -1; 63899a2dd95SBruce Richardson } 63999a2dd95SBruce Richardson 64099a2dd95SBruce Richardson /* this will try lower page sizes first */ 64199a2dd95SBruce Richardson static void * 64299a2dd95SBruce Richardson malloc_heap_alloc_on_heap_id(const char *type, size_t size, 64399a2dd95SBruce Richardson unsigned int heap_id, unsigned int flags, size_t align, 64499a2dd95SBruce Richardson size_t bound, bool contig) 64599a2dd95SBruce Richardson { 64699a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 64799a2dd95SBruce Richardson struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 64899a2dd95SBruce Richardson unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 64999a2dd95SBruce Richardson int socket_id; 65099a2dd95SBruce Richardson void *ret; 65199a2dd95SBruce Richardson const struct internal_config *internal_conf = 65299a2dd95SBruce Richardson eal_get_internal_configuration(); 65399a2dd95SBruce Richardson 65499a2dd95SBruce Richardson rte_spinlock_lock(&(heap->lock)); 65599a2dd95SBruce Richardson 65699a2dd95SBruce Richardson align = align == 0 ? 1 : align; 65799a2dd95SBruce Richardson 65899a2dd95SBruce Richardson /* for legacy mode, try once and with all flags */ 65999a2dd95SBruce Richardson if (internal_conf->legacy_mem) { 66099a2dd95SBruce Richardson ret = heap_alloc(heap, type, size, flags, align, bound, contig); 66199a2dd95SBruce Richardson goto alloc_unlock; 66299a2dd95SBruce Richardson } 66399a2dd95SBruce Richardson 66499a2dd95SBruce Richardson /* 66599a2dd95SBruce Richardson * we do not pass the size hint here, because even if allocation fails, 66699a2dd95SBruce Richardson * we may still be able to allocate memory from appropriate page sizes, 66799a2dd95SBruce Richardson * we just need to request more memory first. 66899a2dd95SBruce Richardson */ 66999a2dd95SBruce Richardson 67099a2dd95SBruce Richardson socket_id = rte_socket_id_by_idx(heap_id); 67199a2dd95SBruce Richardson /* 67299a2dd95SBruce Richardson * if socket ID is negative, we cannot find a socket ID for this heap - 67399a2dd95SBruce Richardson * which means it's an external heap. those can have unexpected page 67499a2dd95SBruce Richardson * sizes, so if the user asked to allocate from there - assume user 67599a2dd95SBruce Richardson * knows what they're doing, and allow allocating from there with any 67699a2dd95SBruce Richardson * page size flags. 67799a2dd95SBruce Richardson */ 67899a2dd95SBruce Richardson if (socket_id < 0) 67999a2dd95SBruce Richardson size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY; 68099a2dd95SBruce Richardson 68199a2dd95SBruce Richardson ret = heap_alloc(heap, type, size, size_flags, align, bound, contig); 68299a2dd95SBruce Richardson if (ret != NULL) 68399a2dd95SBruce Richardson goto alloc_unlock; 68499a2dd95SBruce Richardson 68599a2dd95SBruce Richardson /* if socket ID is invalid, this is an external heap */ 68699a2dd95SBruce Richardson if (socket_id < 0) 68799a2dd95SBruce Richardson goto alloc_unlock; 68899a2dd95SBruce Richardson 68999a2dd95SBruce Richardson if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align, 69099a2dd95SBruce Richardson bound, contig)) { 69199a2dd95SBruce Richardson ret = heap_alloc(heap, type, size, flags, align, bound, contig); 69299a2dd95SBruce Richardson 69399a2dd95SBruce Richardson /* this should have succeeded */ 69499a2dd95SBruce Richardson if (ret == NULL) 69599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Error allocating from heap\n"); 69699a2dd95SBruce Richardson } 69799a2dd95SBruce Richardson alloc_unlock: 69899a2dd95SBruce Richardson rte_spinlock_unlock(&(heap->lock)); 69999a2dd95SBruce Richardson return ret; 70099a2dd95SBruce Richardson } 70199a2dd95SBruce Richardson 70299a2dd95SBruce Richardson void * 70399a2dd95SBruce Richardson malloc_heap_alloc(const char *type, size_t size, int socket_arg, 70499a2dd95SBruce Richardson unsigned int flags, size_t align, size_t bound, bool contig) 70599a2dd95SBruce Richardson { 70699a2dd95SBruce Richardson int socket, heap_id, i; 70799a2dd95SBruce Richardson void *ret; 70899a2dd95SBruce Richardson 70999a2dd95SBruce Richardson /* return NULL if size is 0 or alignment is not power-of-2 */ 71099a2dd95SBruce Richardson if (size == 0 || (align && !rte_is_power_of_2(align))) 71199a2dd95SBruce Richardson return NULL; 71299a2dd95SBruce Richardson 71399a2dd95SBruce Richardson if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES) 71499a2dd95SBruce Richardson socket_arg = SOCKET_ID_ANY; 71599a2dd95SBruce Richardson 71699a2dd95SBruce Richardson if (socket_arg == SOCKET_ID_ANY) 71799a2dd95SBruce Richardson socket = malloc_get_numa_socket(); 71899a2dd95SBruce Richardson else 71999a2dd95SBruce Richardson socket = socket_arg; 72099a2dd95SBruce Richardson 72199a2dd95SBruce Richardson /* turn socket ID into heap ID */ 72299a2dd95SBruce Richardson heap_id = malloc_socket_to_heap_id(socket); 72399a2dd95SBruce Richardson /* if heap id is negative, socket ID was invalid */ 72499a2dd95SBruce Richardson if (heap_id < 0) 72599a2dd95SBruce Richardson return NULL; 72699a2dd95SBruce Richardson 72799a2dd95SBruce Richardson ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align, 72899a2dd95SBruce Richardson bound, contig); 72999a2dd95SBruce Richardson if (ret != NULL || socket_arg != SOCKET_ID_ANY) 73099a2dd95SBruce Richardson return ret; 73199a2dd95SBruce Richardson 73299a2dd95SBruce Richardson /* try other heaps. we are only iterating through native DPDK sockets, 73399a2dd95SBruce Richardson * so external heaps won't be included. 73499a2dd95SBruce Richardson */ 73599a2dd95SBruce Richardson for (i = 0; i < (int) rte_socket_count(); i++) { 73699a2dd95SBruce Richardson if (i == heap_id) 73799a2dd95SBruce Richardson continue; 73899a2dd95SBruce Richardson ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align, 73999a2dd95SBruce Richardson bound, contig); 74099a2dd95SBruce Richardson if (ret != NULL) 74199a2dd95SBruce Richardson return ret; 74299a2dd95SBruce Richardson } 74399a2dd95SBruce Richardson return NULL; 74499a2dd95SBruce Richardson } 74599a2dd95SBruce Richardson 74699a2dd95SBruce Richardson static void * 74799a2dd95SBruce Richardson heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id, 74899a2dd95SBruce Richardson unsigned int flags, size_t align, bool contig) 74999a2dd95SBruce Richardson { 75099a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 75199a2dd95SBruce Richardson struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 75299a2dd95SBruce Richardson void *ret; 75399a2dd95SBruce Richardson 75499a2dd95SBruce Richardson rte_spinlock_lock(&(heap->lock)); 75599a2dd95SBruce Richardson 75699a2dd95SBruce Richardson align = align == 0 ? 1 : align; 75799a2dd95SBruce Richardson 75899a2dd95SBruce Richardson ret = heap_alloc_biggest(heap, type, flags, align, contig); 75999a2dd95SBruce Richardson 76099a2dd95SBruce Richardson rte_spinlock_unlock(&(heap->lock)); 76199a2dd95SBruce Richardson 76299a2dd95SBruce Richardson return ret; 76399a2dd95SBruce Richardson } 76499a2dd95SBruce Richardson 76599a2dd95SBruce Richardson void * 76699a2dd95SBruce Richardson malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, 76799a2dd95SBruce Richardson size_t align, bool contig) 76899a2dd95SBruce Richardson { 76999a2dd95SBruce Richardson int socket, i, cur_socket, heap_id; 77099a2dd95SBruce Richardson void *ret; 77199a2dd95SBruce Richardson 77299a2dd95SBruce Richardson /* return NULL if align is not power-of-2 */ 77399a2dd95SBruce Richardson if ((align && !rte_is_power_of_2(align))) 77499a2dd95SBruce Richardson return NULL; 77599a2dd95SBruce Richardson 77699a2dd95SBruce Richardson if (!rte_eal_has_hugepages()) 77799a2dd95SBruce Richardson socket_arg = SOCKET_ID_ANY; 77899a2dd95SBruce Richardson 77999a2dd95SBruce Richardson if (socket_arg == SOCKET_ID_ANY) 78099a2dd95SBruce Richardson socket = malloc_get_numa_socket(); 78199a2dd95SBruce Richardson else 78299a2dd95SBruce Richardson socket = socket_arg; 78399a2dd95SBruce Richardson 78499a2dd95SBruce Richardson /* turn socket ID into heap ID */ 78599a2dd95SBruce Richardson heap_id = malloc_socket_to_heap_id(socket); 78699a2dd95SBruce Richardson /* if heap id is negative, socket ID was invalid */ 78799a2dd95SBruce Richardson if (heap_id < 0) 78899a2dd95SBruce Richardson return NULL; 78999a2dd95SBruce Richardson 79099a2dd95SBruce Richardson ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align, 79199a2dd95SBruce Richardson contig); 79299a2dd95SBruce Richardson if (ret != NULL || socket_arg != SOCKET_ID_ANY) 79399a2dd95SBruce Richardson return ret; 79499a2dd95SBruce Richardson 79599a2dd95SBruce Richardson /* try other heaps */ 79699a2dd95SBruce Richardson for (i = 0; i < (int) rte_socket_count(); i++) { 79799a2dd95SBruce Richardson cur_socket = rte_socket_id_by_idx(i); 79899a2dd95SBruce Richardson if (cur_socket == socket) 79999a2dd95SBruce Richardson continue; 80099a2dd95SBruce Richardson ret = heap_alloc_biggest_on_heap_id(type, i, flags, align, 80199a2dd95SBruce Richardson contig); 80299a2dd95SBruce Richardson if (ret != NULL) 80399a2dd95SBruce Richardson return ret; 80499a2dd95SBruce Richardson } 80599a2dd95SBruce Richardson return NULL; 80699a2dd95SBruce Richardson } 80799a2dd95SBruce Richardson 80899a2dd95SBruce Richardson /* this function is exposed in malloc_mp.h */ 80999a2dd95SBruce Richardson int 81099a2dd95SBruce Richardson malloc_heap_free_pages(void *aligned_start, size_t aligned_len) 81199a2dd95SBruce Richardson { 81299a2dd95SBruce Richardson int n_segs, seg_idx, max_seg_idx; 81399a2dd95SBruce Richardson struct rte_memseg_list *msl; 81499a2dd95SBruce Richardson size_t page_sz; 81599a2dd95SBruce Richardson 81699a2dd95SBruce Richardson msl = rte_mem_virt2memseg_list(aligned_start); 81799a2dd95SBruce Richardson if (msl == NULL) 81899a2dd95SBruce Richardson return -1; 81999a2dd95SBruce Richardson 82099a2dd95SBruce Richardson page_sz = (size_t)msl->page_sz; 82199a2dd95SBruce Richardson n_segs = aligned_len / page_sz; 82299a2dd95SBruce Richardson seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz; 82399a2dd95SBruce Richardson max_seg_idx = seg_idx + n_segs; 82499a2dd95SBruce Richardson 82599a2dd95SBruce Richardson for (; seg_idx < max_seg_idx; seg_idx++) { 82699a2dd95SBruce Richardson struct rte_memseg *ms; 82799a2dd95SBruce Richardson 82899a2dd95SBruce Richardson ms = rte_fbarray_get(&msl->memseg_arr, seg_idx); 82999a2dd95SBruce Richardson eal_memalloc_free_seg(ms); 83099a2dd95SBruce Richardson } 83199a2dd95SBruce Richardson return 0; 83299a2dd95SBruce Richardson } 83399a2dd95SBruce Richardson 83499a2dd95SBruce Richardson int 83599a2dd95SBruce Richardson malloc_heap_free(struct malloc_elem *elem) 83699a2dd95SBruce Richardson { 83799a2dd95SBruce Richardson struct malloc_heap *heap; 83899a2dd95SBruce Richardson void *start, *aligned_start, *end, *aligned_end; 83999a2dd95SBruce Richardson size_t len, aligned_len, page_sz; 84099a2dd95SBruce Richardson struct rte_memseg_list *msl; 84199a2dd95SBruce Richardson unsigned int i, n_segs, before_space, after_space; 84299a2dd95SBruce Richardson int ret; 84399a2dd95SBruce Richardson const struct internal_config *internal_conf = 84499a2dd95SBruce Richardson eal_get_internal_configuration(); 84599a2dd95SBruce Richardson 84699a2dd95SBruce Richardson if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 84799a2dd95SBruce Richardson return -1; 84899a2dd95SBruce Richardson 849*6cc51b12SZhihong Peng asan_clear_redzone(elem); 850*6cc51b12SZhihong Peng 85199a2dd95SBruce Richardson /* elem may be merged with previous element, so keep heap address */ 85299a2dd95SBruce Richardson heap = elem->heap; 85399a2dd95SBruce Richardson msl = elem->msl; 85499a2dd95SBruce Richardson page_sz = (size_t)msl->page_sz; 85599a2dd95SBruce Richardson 85699a2dd95SBruce Richardson rte_spinlock_lock(&(heap->lock)); 85799a2dd95SBruce Richardson 858*6cc51b12SZhihong Peng void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad); 859*6cc51b12SZhihong Peng size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad; 860*6cc51b12SZhihong Peng 86199a2dd95SBruce Richardson /* mark element as free */ 86299a2dd95SBruce Richardson elem->state = ELEM_FREE; 86399a2dd95SBruce Richardson 86499a2dd95SBruce Richardson elem = malloc_elem_free(elem); 86599a2dd95SBruce Richardson 86699a2dd95SBruce Richardson /* anything after this is a bonus */ 86799a2dd95SBruce Richardson ret = 0; 86899a2dd95SBruce Richardson 86999a2dd95SBruce Richardson /* ...of which we can't avail if we are in legacy mode, or if this is an 87099a2dd95SBruce Richardson * externally allocated segment. 87199a2dd95SBruce Richardson */ 87299a2dd95SBruce Richardson if (internal_conf->legacy_mem || (msl->external > 0)) 87399a2dd95SBruce Richardson goto free_unlock; 87499a2dd95SBruce Richardson 87599a2dd95SBruce Richardson /* check if we can free any memory back to the system */ 87699a2dd95SBruce Richardson if (elem->size < page_sz) 87799a2dd95SBruce Richardson goto free_unlock; 87899a2dd95SBruce Richardson 87999a2dd95SBruce Richardson /* if user requested to match allocations, the sizes must match - if not, 88099a2dd95SBruce Richardson * we will defer freeing these hugepages until the entire original allocation 88199a2dd95SBruce Richardson * can be freed 88299a2dd95SBruce Richardson */ 88399a2dd95SBruce Richardson if (internal_conf->match_allocations && elem->size != elem->orig_size) 88499a2dd95SBruce Richardson goto free_unlock; 88599a2dd95SBruce Richardson 88699a2dd95SBruce Richardson /* probably, but let's make sure, as we may not be using up full page */ 88799a2dd95SBruce Richardson start = elem; 88899a2dd95SBruce Richardson len = elem->size; 88999a2dd95SBruce Richardson aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz); 89099a2dd95SBruce Richardson end = RTE_PTR_ADD(elem, len); 89199a2dd95SBruce Richardson aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz); 89299a2dd95SBruce Richardson 89399a2dd95SBruce Richardson aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 89499a2dd95SBruce Richardson 89599a2dd95SBruce Richardson /* can't free anything */ 89699a2dd95SBruce Richardson if (aligned_len < page_sz) 89799a2dd95SBruce Richardson goto free_unlock; 89899a2dd95SBruce Richardson 89999a2dd95SBruce Richardson /* we can free something. however, some of these pages may be marked as 90099a2dd95SBruce Richardson * unfreeable, so also check that as well 90199a2dd95SBruce Richardson */ 90299a2dd95SBruce Richardson n_segs = aligned_len / page_sz; 90399a2dd95SBruce Richardson for (i = 0; i < n_segs; i++) { 90499a2dd95SBruce Richardson const struct rte_memseg *tmp = 90599a2dd95SBruce Richardson rte_mem_virt2memseg(aligned_start, msl); 90699a2dd95SBruce Richardson 90799a2dd95SBruce Richardson if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { 90899a2dd95SBruce Richardson /* this is an unfreeable segment, so move start */ 90999a2dd95SBruce Richardson aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len); 91099a2dd95SBruce Richardson } 91199a2dd95SBruce Richardson } 91299a2dd95SBruce Richardson 91399a2dd95SBruce Richardson /* recalculate length and number of segments */ 91499a2dd95SBruce Richardson aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 91599a2dd95SBruce Richardson n_segs = aligned_len / page_sz; 91699a2dd95SBruce Richardson 91799a2dd95SBruce Richardson /* check if we can still free some pages */ 91899a2dd95SBruce Richardson if (n_segs == 0) 91999a2dd95SBruce Richardson goto free_unlock; 92099a2dd95SBruce Richardson 92199a2dd95SBruce Richardson /* We're not done yet. We also have to check if by freeing space we will 92299a2dd95SBruce Richardson * be leaving free elements that are too small to store new elements. 92399a2dd95SBruce Richardson * Check if we have enough space in the beginning and at the end, or if 92499a2dd95SBruce Richardson * start/end are exactly page aligned. 92599a2dd95SBruce Richardson */ 92699a2dd95SBruce Richardson before_space = RTE_PTR_DIFF(aligned_start, elem); 92799a2dd95SBruce Richardson after_space = RTE_PTR_DIFF(end, aligned_end); 92899a2dd95SBruce Richardson if (before_space != 0 && 92999a2dd95SBruce Richardson before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 93099a2dd95SBruce Richardson /* There is not enough space before start, but we may be able to 93199a2dd95SBruce Richardson * move the start forward by one page. 93299a2dd95SBruce Richardson */ 93399a2dd95SBruce Richardson if (n_segs == 1) 93499a2dd95SBruce Richardson goto free_unlock; 93599a2dd95SBruce Richardson 93699a2dd95SBruce Richardson /* move start */ 93799a2dd95SBruce Richardson aligned_start = RTE_PTR_ADD(aligned_start, page_sz); 93899a2dd95SBruce Richardson aligned_len -= page_sz; 93999a2dd95SBruce Richardson n_segs--; 94099a2dd95SBruce Richardson } 94199a2dd95SBruce Richardson if (after_space != 0 && after_space < 94299a2dd95SBruce Richardson MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 94399a2dd95SBruce Richardson /* There is not enough space after end, but we may be able to 94499a2dd95SBruce Richardson * move the end backwards by one page. 94599a2dd95SBruce Richardson */ 94699a2dd95SBruce Richardson if (n_segs == 1) 94799a2dd95SBruce Richardson goto free_unlock; 94899a2dd95SBruce Richardson 94999a2dd95SBruce Richardson /* move end */ 95099a2dd95SBruce Richardson aligned_end = RTE_PTR_SUB(aligned_end, page_sz); 95199a2dd95SBruce Richardson aligned_len -= page_sz; 95299a2dd95SBruce Richardson n_segs--; 95399a2dd95SBruce Richardson } 95499a2dd95SBruce Richardson 95599a2dd95SBruce Richardson /* now we can finally free us some pages */ 95699a2dd95SBruce Richardson 95799a2dd95SBruce Richardson rte_mcfg_mem_write_lock(); 95899a2dd95SBruce Richardson 95999a2dd95SBruce Richardson /* 96099a2dd95SBruce Richardson * we allow secondary processes to clear the heap of this allocated 96199a2dd95SBruce Richardson * memory because it is safe to do so, as even if notifications about 96299a2dd95SBruce Richardson * unmapped pages don't make it to other processes, heap is shared 96399a2dd95SBruce Richardson * across all processes, and will become empty of this memory anyway, 96499a2dd95SBruce Richardson * and nothing can allocate it back unless primary process will be able 96599a2dd95SBruce Richardson * to deliver allocation message to every single running process. 96699a2dd95SBruce Richardson */ 96799a2dd95SBruce Richardson 96899a2dd95SBruce Richardson malloc_elem_free_list_remove(elem); 96999a2dd95SBruce Richardson 97099a2dd95SBruce Richardson malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len); 97199a2dd95SBruce Richardson 97299a2dd95SBruce Richardson heap->total_size -= aligned_len; 97399a2dd95SBruce Richardson 97499a2dd95SBruce Richardson if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 97599a2dd95SBruce Richardson /* notify user about changes in memory map */ 97699a2dd95SBruce Richardson eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 97799a2dd95SBruce Richardson aligned_start, aligned_len); 97899a2dd95SBruce Richardson 97999a2dd95SBruce Richardson /* don't care if any of this fails */ 98099a2dd95SBruce Richardson malloc_heap_free_pages(aligned_start, aligned_len); 98199a2dd95SBruce Richardson 98299a2dd95SBruce Richardson request_sync(); 98399a2dd95SBruce Richardson } else { 98499a2dd95SBruce Richardson struct malloc_mp_req req; 98599a2dd95SBruce Richardson 98699a2dd95SBruce Richardson memset(&req, 0, sizeof(req)); 98799a2dd95SBruce Richardson 98899a2dd95SBruce Richardson req.t = REQ_TYPE_FREE; 98999a2dd95SBruce Richardson req.free_req.addr = aligned_start; 99099a2dd95SBruce Richardson req.free_req.len = aligned_len; 99199a2dd95SBruce Richardson 99299a2dd95SBruce Richardson /* 99399a2dd95SBruce Richardson * we request primary to deallocate pages, but we don't do it 99499a2dd95SBruce Richardson * in this thread. instead, we notify primary that we would like 99599a2dd95SBruce Richardson * to deallocate pages, and this process will receive another 99699a2dd95SBruce Richardson * request (in parallel) that will do it for us on another 99799a2dd95SBruce Richardson * thread. 99899a2dd95SBruce Richardson * 99999a2dd95SBruce Richardson * we also don't really care if this succeeds - the data is 100099a2dd95SBruce Richardson * already removed from the heap, so it is, for all intents and 100199a2dd95SBruce Richardson * purposes, hidden from the rest of DPDK even if some other 100299a2dd95SBruce Richardson * process (including this one) may have these pages mapped. 100399a2dd95SBruce Richardson * 100499a2dd95SBruce Richardson * notifications about deallocated memory happen during sync. 100599a2dd95SBruce Richardson */ 100699a2dd95SBruce Richardson request_to_primary(&req); 100799a2dd95SBruce Richardson } 100899a2dd95SBruce Richardson 100999a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n", 101099a2dd95SBruce Richardson msl->socket_id, aligned_len >> 20ULL); 101199a2dd95SBruce Richardson 101299a2dd95SBruce Richardson rte_mcfg_mem_write_unlock(); 101399a2dd95SBruce Richardson free_unlock: 1014*6cc51b12SZhihong Peng asan_set_freezone(asan_ptr, asan_data_len); 1015*6cc51b12SZhihong Peng 101699a2dd95SBruce Richardson rte_spinlock_unlock(&(heap->lock)); 101799a2dd95SBruce Richardson return ret; 101899a2dd95SBruce Richardson } 101999a2dd95SBruce Richardson 102099a2dd95SBruce Richardson int 102199a2dd95SBruce Richardson malloc_heap_resize(struct malloc_elem *elem, size_t size) 102299a2dd95SBruce Richardson { 102399a2dd95SBruce Richardson int ret; 102499a2dd95SBruce Richardson 102599a2dd95SBruce Richardson if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 102699a2dd95SBruce Richardson return -1; 102799a2dd95SBruce Richardson 102899a2dd95SBruce Richardson rte_spinlock_lock(&(elem->heap->lock)); 102999a2dd95SBruce Richardson 103099a2dd95SBruce Richardson ret = malloc_elem_resize(elem, size); 103199a2dd95SBruce Richardson 103299a2dd95SBruce Richardson rte_spinlock_unlock(&(elem->heap->lock)); 103399a2dd95SBruce Richardson 103499a2dd95SBruce Richardson return ret; 103599a2dd95SBruce Richardson } 103699a2dd95SBruce Richardson 103799a2dd95SBruce Richardson /* 103899a2dd95SBruce Richardson * Function to retrieve data for a given heap 103999a2dd95SBruce Richardson */ 104099a2dd95SBruce Richardson int 104199a2dd95SBruce Richardson malloc_heap_get_stats(struct malloc_heap *heap, 104299a2dd95SBruce Richardson struct rte_malloc_socket_stats *socket_stats) 104399a2dd95SBruce Richardson { 104499a2dd95SBruce Richardson size_t idx; 104599a2dd95SBruce Richardson struct malloc_elem *elem; 104699a2dd95SBruce Richardson 104799a2dd95SBruce Richardson rte_spinlock_lock(&heap->lock); 104899a2dd95SBruce Richardson 104999a2dd95SBruce Richardson /* Initialise variables for heap */ 105099a2dd95SBruce Richardson socket_stats->free_count = 0; 105199a2dd95SBruce Richardson socket_stats->heap_freesz_bytes = 0; 105299a2dd95SBruce Richardson socket_stats->greatest_free_size = 0; 105399a2dd95SBruce Richardson 105499a2dd95SBruce Richardson /* Iterate through free list */ 105599a2dd95SBruce Richardson for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 105699a2dd95SBruce Richardson for (elem = LIST_FIRST(&heap->free_head[idx]); 105799a2dd95SBruce Richardson !!elem; elem = LIST_NEXT(elem, free_list)) 105899a2dd95SBruce Richardson { 105999a2dd95SBruce Richardson socket_stats->free_count++; 106099a2dd95SBruce Richardson socket_stats->heap_freesz_bytes += elem->size; 106199a2dd95SBruce Richardson if (elem->size > socket_stats->greatest_free_size) 106299a2dd95SBruce Richardson socket_stats->greatest_free_size = elem->size; 106399a2dd95SBruce Richardson } 106499a2dd95SBruce Richardson } 106599a2dd95SBruce Richardson /* Get stats on overall heap and allocated memory on this heap */ 106699a2dd95SBruce Richardson socket_stats->heap_totalsz_bytes = heap->total_size; 106799a2dd95SBruce Richardson socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - 106899a2dd95SBruce Richardson socket_stats->heap_freesz_bytes); 106999a2dd95SBruce Richardson socket_stats->alloc_count = heap->alloc_count; 107099a2dd95SBruce Richardson 107199a2dd95SBruce Richardson rte_spinlock_unlock(&heap->lock); 107299a2dd95SBruce Richardson return 0; 107399a2dd95SBruce Richardson } 107499a2dd95SBruce Richardson 107599a2dd95SBruce Richardson /* 107699a2dd95SBruce Richardson * Function to retrieve data for a given heap 107799a2dd95SBruce Richardson */ 107899a2dd95SBruce Richardson void 107999a2dd95SBruce Richardson malloc_heap_dump(struct malloc_heap *heap, FILE *f) 108099a2dd95SBruce Richardson { 108199a2dd95SBruce Richardson struct malloc_elem *elem; 108299a2dd95SBruce Richardson 108399a2dd95SBruce Richardson rte_spinlock_lock(&heap->lock); 108499a2dd95SBruce Richardson 108599a2dd95SBruce Richardson fprintf(f, "Heap size: 0x%zx\n", heap->total_size); 108699a2dd95SBruce Richardson fprintf(f, "Heap alloc count: %u\n", heap->alloc_count); 108799a2dd95SBruce Richardson 108899a2dd95SBruce Richardson elem = heap->first; 108999a2dd95SBruce Richardson while (elem) { 109099a2dd95SBruce Richardson malloc_elem_dump(elem, f); 109199a2dd95SBruce Richardson elem = elem->next; 109299a2dd95SBruce Richardson } 109399a2dd95SBruce Richardson 109499a2dd95SBruce Richardson rte_spinlock_unlock(&heap->lock); 109599a2dd95SBruce Richardson } 109699a2dd95SBruce Richardson 109799a2dd95SBruce Richardson static int 109899a2dd95SBruce Richardson destroy_elem(struct malloc_elem *elem, size_t len) 109999a2dd95SBruce Richardson { 110099a2dd95SBruce Richardson struct malloc_heap *heap = elem->heap; 110199a2dd95SBruce Richardson 110299a2dd95SBruce Richardson /* notify all subscribers that a memory area is going to be removed */ 110399a2dd95SBruce Richardson eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len); 110499a2dd95SBruce Richardson 110599a2dd95SBruce Richardson /* this element can be removed */ 110699a2dd95SBruce Richardson malloc_elem_free_list_remove(elem); 110799a2dd95SBruce Richardson malloc_elem_hide_region(elem, elem, len); 110899a2dd95SBruce Richardson 110999a2dd95SBruce Richardson heap->total_size -= len; 111099a2dd95SBruce Richardson 111199a2dd95SBruce Richardson memset(elem, 0, sizeof(*elem)); 111299a2dd95SBruce Richardson 111399a2dd95SBruce Richardson return 0; 111499a2dd95SBruce Richardson } 111599a2dd95SBruce Richardson 111699a2dd95SBruce Richardson struct rte_memseg_list * 111799a2dd95SBruce Richardson malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], 111899a2dd95SBruce Richardson unsigned int n_pages, size_t page_sz, const char *seg_name, 111999a2dd95SBruce Richardson unsigned int socket_id) 112099a2dd95SBruce Richardson { 112199a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 112299a2dd95SBruce Richardson char fbarray_name[RTE_FBARRAY_NAME_LEN]; 112399a2dd95SBruce Richardson struct rte_memseg_list *msl = NULL; 112499a2dd95SBruce Richardson struct rte_fbarray *arr; 112599a2dd95SBruce Richardson size_t seg_len = n_pages * page_sz; 112699a2dd95SBruce Richardson unsigned int i; 112799a2dd95SBruce Richardson 112899a2dd95SBruce Richardson /* first, find a free memseg list */ 112999a2dd95SBruce Richardson for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 113099a2dd95SBruce Richardson struct rte_memseg_list *tmp = &mcfg->memsegs[i]; 113199a2dd95SBruce Richardson if (tmp->base_va == NULL) { 113299a2dd95SBruce Richardson msl = tmp; 113399a2dd95SBruce Richardson break; 113499a2dd95SBruce Richardson } 113599a2dd95SBruce Richardson } 113699a2dd95SBruce Richardson if (msl == NULL) { 113799a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n"); 113899a2dd95SBruce Richardson rte_errno = ENOSPC; 113999a2dd95SBruce Richardson return NULL; 114099a2dd95SBruce Richardson } 114199a2dd95SBruce Richardson 114299a2dd95SBruce Richardson snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p", 114399a2dd95SBruce Richardson seg_name, va_addr); 114499a2dd95SBruce Richardson 114599a2dd95SBruce Richardson /* create the backing fbarray */ 114699a2dd95SBruce Richardson if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages, 114799a2dd95SBruce Richardson sizeof(struct rte_memseg)) < 0) { 114899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n"); 114999a2dd95SBruce Richardson return NULL; 115099a2dd95SBruce Richardson } 115199a2dd95SBruce Richardson arr = &msl->memseg_arr; 115299a2dd95SBruce Richardson 115399a2dd95SBruce Richardson /* fbarray created, fill it up */ 115499a2dd95SBruce Richardson for (i = 0; i < n_pages; i++) { 115599a2dd95SBruce Richardson struct rte_memseg *ms; 115699a2dd95SBruce Richardson 115799a2dd95SBruce Richardson rte_fbarray_set_used(arr, i); 115899a2dd95SBruce Richardson ms = rte_fbarray_get(arr, i); 115999a2dd95SBruce Richardson ms->addr = RTE_PTR_ADD(va_addr, i * page_sz); 116099a2dd95SBruce Richardson ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i]; 116199a2dd95SBruce Richardson ms->hugepage_sz = page_sz; 116299a2dd95SBruce Richardson ms->len = page_sz; 116399a2dd95SBruce Richardson ms->nchannel = rte_memory_get_nchannel(); 116499a2dd95SBruce Richardson ms->nrank = rte_memory_get_nrank(); 116599a2dd95SBruce Richardson ms->socket_id = socket_id; 116699a2dd95SBruce Richardson } 116799a2dd95SBruce Richardson 116899a2dd95SBruce Richardson /* set up the memseg list */ 116999a2dd95SBruce Richardson msl->base_va = va_addr; 117099a2dd95SBruce Richardson msl->page_sz = page_sz; 117199a2dd95SBruce Richardson msl->socket_id = socket_id; 117299a2dd95SBruce Richardson msl->len = seg_len; 117399a2dd95SBruce Richardson msl->version = 0; 117499a2dd95SBruce Richardson msl->external = 1; 117599a2dd95SBruce Richardson 117699a2dd95SBruce Richardson return msl; 117799a2dd95SBruce Richardson } 117899a2dd95SBruce Richardson 117999a2dd95SBruce Richardson struct extseg_walk_arg { 118099a2dd95SBruce Richardson void *va_addr; 118199a2dd95SBruce Richardson size_t len; 118299a2dd95SBruce Richardson struct rte_memseg_list *msl; 118399a2dd95SBruce Richardson }; 118499a2dd95SBruce Richardson 118599a2dd95SBruce Richardson static int 118699a2dd95SBruce Richardson extseg_walk(const struct rte_memseg_list *msl, void *arg) 118799a2dd95SBruce Richardson { 118899a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 118999a2dd95SBruce Richardson struct extseg_walk_arg *wa = arg; 119099a2dd95SBruce Richardson 119199a2dd95SBruce Richardson if (msl->base_va == wa->va_addr && msl->len == wa->len) { 119299a2dd95SBruce Richardson unsigned int found_idx; 119399a2dd95SBruce Richardson 119499a2dd95SBruce Richardson /* msl is const */ 119599a2dd95SBruce Richardson found_idx = msl - mcfg->memsegs; 119699a2dd95SBruce Richardson wa->msl = &mcfg->memsegs[found_idx]; 119799a2dd95SBruce Richardson return 1; 119899a2dd95SBruce Richardson } 119999a2dd95SBruce Richardson return 0; 120099a2dd95SBruce Richardson } 120199a2dd95SBruce Richardson 120299a2dd95SBruce Richardson struct rte_memseg_list * 120399a2dd95SBruce Richardson malloc_heap_find_external_seg(void *va_addr, size_t len) 120499a2dd95SBruce Richardson { 120599a2dd95SBruce Richardson struct extseg_walk_arg wa; 120699a2dd95SBruce Richardson int res; 120799a2dd95SBruce Richardson 120899a2dd95SBruce Richardson wa.va_addr = va_addr; 120999a2dd95SBruce Richardson wa.len = len; 121099a2dd95SBruce Richardson 121199a2dd95SBruce Richardson res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa); 121299a2dd95SBruce Richardson 121399a2dd95SBruce Richardson if (res != 1) { 121499a2dd95SBruce Richardson /* 0 means nothing was found, -1 shouldn't happen */ 121599a2dd95SBruce Richardson if (res == 0) 121699a2dd95SBruce Richardson rte_errno = ENOENT; 121799a2dd95SBruce Richardson return NULL; 121899a2dd95SBruce Richardson } 121999a2dd95SBruce Richardson return wa.msl; 122099a2dd95SBruce Richardson } 122199a2dd95SBruce Richardson 122299a2dd95SBruce Richardson int 122399a2dd95SBruce Richardson malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) 122499a2dd95SBruce Richardson { 122599a2dd95SBruce Richardson /* destroy the fbarray backing this memory */ 122699a2dd95SBruce Richardson if (rte_fbarray_destroy(&msl->memseg_arr) < 0) 122799a2dd95SBruce Richardson return -1; 122899a2dd95SBruce Richardson 122999a2dd95SBruce Richardson /* reset the memseg list */ 123099a2dd95SBruce Richardson memset(msl, 0, sizeof(*msl)); 123199a2dd95SBruce Richardson 123299a2dd95SBruce Richardson return 0; 123399a2dd95SBruce Richardson } 123499a2dd95SBruce Richardson 123599a2dd95SBruce Richardson int 123699a2dd95SBruce Richardson malloc_heap_add_external_memory(struct malloc_heap *heap, 123799a2dd95SBruce Richardson struct rte_memseg_list *msl) 123899a2dd95SBruce Richardson { 123999a2dd95SBruce Richardson /* erase contents of new memory */ 124099a2dd95SBruce Richardson memset(msl->base_va, 0, msl->len); 124199a2dd95SBruce Richardson 124299a2dd95SBruce Richardson /* now, add newly minted memory to the malloc heap */ 124399a2dd95SBruce Richardson malloc_heap_add_memory(heap, msl, msl->base_va, msl->len); 124499a2dd95SBruce Richardson 124599a2dd95SBruce Richardson heap->total_size += msl->len; 124699a2dd95SBruce Richardson 124799a2dd95SBruce Richardson /* all done! */ 124899a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n", 124999a2dd95SBruce Richardson heap->name, msl->base_va); 125099a2dd95SBruce Richardson 125199a2dd95SBruce Richardson /* notify all subscribers that a new memory area has been added */ 125299a2dd95SBruce Richardson eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, 125399a2dd95SBruce Richardson msl->base_va, msl->len); 125499a2dd95SBruce Richardson 125599a2dd95SBruce Richardson return 0; 125699a2dd95SBruce Richardson } 125799a2dd95SBruce Richardson 125899a2dd95SBruce Richardson int 125999a2dd95SBruce Richardson malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, 126099a2dd95SBruce Richardson size_t len) 126199a2dd95SBruce Richardson { 126299a2dd95SBruce Richardson struct malloc_elem *elem = heap->first; 126399a2dd95SBruce Richardson 126499a2dd95SBruce Richardson /* find element with specified va address */ 126599a2dd95SBruce Richardson while (elem != NULL && elem != va_addr) { 126699a2dd95SBruce Richardson elem = elem->next; 126799a2dd95SBruce Richardson /* stop if we've blown past our VA */ 126899a2dd95SBruce Richardson if (elem > (struct malloc_elem *)va_addr) { 126999a2dd95SBruce Richardson rte_errno = ENOENT; 127099a2dd95SBruce Richardson return -1; 127199a2dd95SBruce Richardson } 127299a2dd95SBruce Richardson } 127399a2dd95SBruce Richardson /* check if element was found */ 127499a2dd95SBruce Richardson if (elem == NULL || elem->msl->len != len) { 127599a2dd95SBruce Richardson rte_errno = ENOENT; 127699a2dd95SBruce Richardson return -1; 127799a2dd95SBruce Richardson } 127899a2dd95SBruce Richardson /* if element's size is not equal to segment len, segment is busy */ 127999a2dd95SBruce Richardson if (elem->state == ELEM_BUSY || elem->size != len) { 128099a2dd95SBruce Richardson rte_errno = EBUSY; 128199a2dd95SBruce Richardson return -1; 128299a2dd95SBruce Richardson } 128399a2dd95SBruce Richardson return destroy_elem(elem, len); 128499a2dd95SBruce Richardson } 128599a2dd95SBruce Richardson 128699a2dd95SBruce Richardson int 128799a2dd95SBruce Richardson malloc_heap_create(struct malloc_heap *heap, const char *heap_name) 128899a2dd95SBruce Richardson { 128999a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 129099a2dd95SBruce Richardson uint32_t next_socket_id = mcfg->next_socket_id; 129199a2dd95SBruce Richardson 129299a2dd95SBruce Richardson /* prevent overflow. did you really create 2 billion heaps??? */ 129399a2dd95SBruce Richardson if (next_socket_id > INT32_MAX) { 129499a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n"); 129599a2dd95SBruce Richardson rte_errno = ENOSPC; 129699a2dd95SBruce Richardson return -1; 129799a2dd95SBruce Richardson } 129899a2dd95SBruce Richardson 129999a2dd95SBruce Richardson /* initialize empty heap */ 130099a2dd95SBruce Richardson heap->alloc_count = 0; 130199a2dd95SBruce Richardson heap->first = NULL; 130299a2dd95SBruce Richardson heap->last = NULL; 130399a2dd95SBruce Richardson LIST_INIT(heap->free_head); 130499a2dd95SBruce Richardson rte_spinlock_init(&heap->lock); 130599a2dd95SBruce Richardson heap->total_size = 0; 130699a2dd95SBruce Richardson heap->socket_id = next_socket_id; 130799a2dd95SBruce Richardson 130899a2dd95SBruce Richardson /* we hold a global mem hotplug writelock, so it's safe to increment */ 130999a2dd95SBruce Richardson mcfg->next_socket_id++; 131099a2dd95SBruce Richardson 131199a2dd95SBruce Richardson /* set up name */ 131299a2dd95SBruce Richardson strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 131399a2dd95SBruce Richardson return 0; 131499a2dd95SBruce Richardson } 131599a2dd95SBruce Richardson 131699a2dd95SBruce Richardson int 131799a2dd95SBruce Richardson malloc_heap_destroy(struct malloc_heap *heap) 131899a2dd95SBruce Richardson { 131999a2dd95SBruce Richardson if (heap->alloc_count != 0) { 132099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Heap is still in use\n"); 132199a2dd95SBruce Richardson rte_errno = EBUSY; 132299a2dd95SBruce Richardson return -1; 132399a2dd95SBruce Richardson } 132499a2dd95SBruce Richardson if (heap->first != NULL || heap->last != NULL) { 132599a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Heap still contains memory segments\n"); 132699a2dd95SBruce Richardson rte_errno = EBUSY; 132799a2dd95SBruce Richardson return -1; 132899a2dd95SBruce Richardson } 132999a2dd95SBruce Richardson if (heap->total_size != 0) 133099a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n"); 133199a2dd95SBruce Richardson 133299a2dd95SBruce Richardson /* after this, the lock will be dropped */ 133399a2dd95SBruce Richardson memset(heap, 0, sizeof(*heap)); 133499a2dd95SBruce Richardson 133599a2dd95SBruce Richardson return 0; 133699a2dd95SBruce Richardson } 133799a2dd95SBruce Richardson 133899a2dd95SBruce Richardson int 133999a2dd95SBruce Richardson rte_eal_malloc_heap_init(void) 134099a2dd95SBruce Richardson { 134199a2dd95SBruce Richardson struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 134299a2dd95SBruce Richardson unsigned int i; 134399a2dd95SBruce Richardson const struct internal_config *internal_conf = 134499a2dd95SBruce Richardson eal_get_internal_configuration(); 134599a2dd95SBruce Richardson 134699a2dd95SBruce Richardson if (internal_conf->match_allocations) 134799a2dd95SBruce Richardson RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n"); 134899a2dd95SBruce Richardson 134999a2dd95SBruce Richardson if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 135099a2dd95SBruce Richardson /* assign min socket ID to external heaps */ 135199a2dd95SBruce Richardson mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID; 135299a2dd95SBruce Richardson 135399a2dd95SBruce Richardson /* assign names to default DPDK heaps */ 135499a2dd95SBruce Richardson for (i = 0; i < rte_socket_count(); i++) { 135599a2dd95SBruce Richardson struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 135699a2dd95SBruce Richardson char heap_name[RTE_HEAP_NAME_MAX_LEN]; 135799a2dd95SBruce Richardson int socket_id = rte_socket_id_by_idx(i); 135899a2dd95SBruce Richardson 135999a2dd95SBruce Richardson snprintf(heap_name, sizeof(heap_name), 136099a2dd95SBruce Richardson "socket_%i", socket_id); 136199a2dd95SBruce Richardson strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 136299a2dd95SBruce Richardson heap->socket_id = socket_id; 136399a2dd95SBruce Richardson } 136499a2dd95SBruce Richardson } 136599a2dd95SBruce Richardson 136699a2dd95SBruce Richardson 136799a2dd95SBruce Richardson if (register_mp_requests()) { 136899a2dd95SBruce Richardson RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n"); 136999a2dd95SBruce Richardson rte_mcfg_mem_read_unlock(); 137099a2dd95SBruce Richardson return -1; 137199a2dd95SBruce Richardson } 137299a2dd95SBruce Richardson 137399a2dd95SBruce Richardson /* unlock mem hotplug here. it's safe for primary as no requests can 137499a2dd95SBruce Richardson * even come before primary itself is fully initialized, and secondaries 137599a2dd95SBruce Richardson * do not need to initialize the heap. 137699a2dd95SBruce Richardson */ 137799a2dd95SBruce Richardson rte_mcfg_mem_read_unlock(); 137899a2dd95SBruce Richardson 137999a2dd95SBruce Richardson /* secondary process does not need to initialize anything */ 138099a2dd95SBruce Richardson if (rte_eal_process_type() != RTE_PROC_PRIMARY) 138199a2dd95SBruce Richardson return 0; 138299a2dd95SBruce Richardson 138399a2dd95SBruce Richardson /* add all IOVA-contiguous areas to the heap */ 138499a2dd95SBruce Richardson return rte_memseg_contig_walk(malloc_add_seg, NULL); 138599a2dd95SBruce Richardson } 1386