1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 #include <stdint.h> 5 #include <stddef.h> 6 #include <stdlib.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <sys/queue.h> 10 11 #include <rte_memory.h> 12 #include <rte_errno.h> 13 #include <rte_eal.h> 14 #include <rte_eal_memconfig.h> 15 #include <rte_lcore.h> 16 #include <rte_common.h> 17 #include <rte_string_fns.h> 18 #include <rte_spinlock.h> 19 #include <rte_memzone.h> 20 #include <rte_fbarray.h> 21 22 #include "eal_internal_cfg.h" 23 #include "eal_memalloc.h" 24 #include "eal_memcfg.h" 25 #include "eal_private.h" 26 #include "malloc_elem.h" 27 #include "malloc_heap.h" 28 #include "malloc_mp.h" 29 30 /* start external socket ID's at a very high number */ 31 #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */ 32 #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES)) 33 34 static unsigned 35 check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) 36 { 37 unsigned check_flag = 0; 38 39 if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) 40 return 1; 41 42 switch (hugepage_sz) { 43 case RTE_PGSIZE_256K: 44 check_flag = RTE_MEMZONE_256KB; 45 break; 46 case RTE_PGSIZE_2M: 47 check_flag = RTE_MEMZONE_2MB; 48 break; 49 case RTE_PGSIZE_16M: 50 check_flag = RTE_MEMZONE_16MB; 51 break; 52 case RTE_PGSIZE_256M: 53 check_flag = RTE_MEMZONE_256MB; 54 break; 55 case RTE_PGSIZE_512M: 56 check_flag = RTE_MEMZONE_512MB; 57 break; 58 case RTE_PGSIZE_1G: 59 check_flag = RTE_MEMZONE_1GB; 60 break; 61 case RTE_PGSIZE_4G: 62 check_flag = RTE_MEMZONE_4GB; 63 break; 64 case RTE_PGSIZE_16G: 65 check_flag = RTE_MEMZONE_16GB; 66 } 67 68 return check_flag & flags; 69 } 70 71 int 72 malloc_socket_to_heap_id(unsigned int socket_id) 73 { 74 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 75 int i; 76 77 for (i = 0; i < RTE_MAX_HEAPS; i++) { 78 struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 79 80 if (heap->socket_id == socket_id) 81 return i; 82 } 83 return -1; 84 } 85 86 /* 87 * Expand the heap with a memory area. 88 */ 89 static struct malloc_elem * 90 malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, 91 void *start, size_t len, bool dirty) 92 { 93 struct malloc_elem *elem = start; 94 95 malloc_elem_init(elem, heap, msl, len, elem, len, dirty); 96 97 malloc_elem_insert(elem); 98 99 elem = malloc_elem_join_adjacent_free(elem); 100 101 malloc_elem_free_list_insert(elem); 102 103 return elem; 104 } 105 106 static int 107 malloc_add_seg(const struct rte_memseg_list *msl, 108 const struct rte_memseg *ms, size_t len, void *arg __rte_unused) 109 { 110 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 111 struct rte_memseg_list *found_msl; 112 struct malloc_heap *heap; 113 int msl_idx, heap_idx; 114 115 if (msl->external) 116 return 0; 117 118 heap_idx = malloc_socket_to_heap_id(msl->socket_id); 119 if (heap_idx < 0) { 120 EAL_LOG(ERR, "Memseg list has invalid socket id"); 121 return -1; 122 } 123 heap = &mcfg->malloc_heaps[heap_idx]; 124 125 /* msl is const, so find it */ 126 msl_idx = msl - mcfg->memsegs; 127 128 if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS) 129 return -1; 130 131 found_msl = &mcfg->memsegs[msl_idx]; 132 133 malloc_heap_add_memory(heap, found_msl, ms->addr, len, 134 ms->flags & RTE_MEMSEG_FLAG_DIRTY); 135 136 heap->total_size += len; 137 138 EAL_LOG(DEBUG, "Added %zuM to heap on socket %i", len >> 20, 139 msl->socket_id); 140 return 0; 141 } 142 143 /* 144 * Iterates through the freelist for a heap to find a free element 145 * which can store data of the required size and with the requested alignment. 146 * If size is 0, find the biggest available elem. 147 * Returns null on failure, or pointer to element on success. 148 */ 149 static struct malloc_elem * 150 find_suitable_element(struct malloc_heap *heap, size_t size, 151 unsigned int flags, size_t align, size_t bound, bool contig) 152 { 153 size_t idx; 154 struct malloc_elem *elem, *alt_elem = NULL; 155 156 for (idx = malloc_elem_free_list_index(size); 157 idx < RTE_HEAP_NUM_FREELISTS; idx++) { 158 for (elem = LIST_FIRST(&heap->free_head[idx]); 159 !!elem; elem = LIST_NEXT(elem, free_list)) { 160 if (malloc_elem_can_hold(elem, size, align, bound, 161 contig)) { 162 if (check_hugepage_sz(flags, 163 elem->msl->page_sz)) 164 return elem; 165 if (alt_elem == NULL) 166 alt_elem = elem; 167 } 168 } 169 } 170 171 if (flags & RTE_MEMZONE_SIZE_HINT_ONLY) 172 return alt_elem; 173 174 return NULL; 175 } 176 177 /* 178 * Iterates through the freelist for a heap to find a free element with the 179 * biggest size and requested alignment. Will also set size to whatever element 180 * size that was found. 181 * Returns null on failure, or pointer to element on success. 182 */ 183 static struct malloc_elem * 184 find_biggest_element(struct malloc_heap *heap, size_t *size, 185 unsigned int flags, size_t align, bool contig) 186 { 187 struct malloc_elem *elem, *max_elem = NULL; 188 size_t idx, max_size = 0; 189 190 for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 191 for (elem = LIST_FIRST(&heap->free_head[idx]); 192 !!elem; elem = LIST_NEXT(elem, free_list)) { 193 size_t cur_size; 194 if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 && 195 !check_hugepage_sz(flags, 196 elem->msl->page_sz)) 197 continue; 198 if (contig) { 199 cur_size = 200 malloc_elem_find_max_iova_contig(elem, 201 align); 202 } else { 203 void *data_start = RTE_PTR_ADD(elem, 204 MALLOC_ELEM_HEADER_LEN); 205 void *data_end = RTE_PTR_ADD(elem, elem->size - 206 MALLOC_ELEM_TRAILER_LEN); 207 void *aligned = RTE_PTR_ALIGN_CEIL(data_start, 208 align); 209 /* check if aligned data start is beyond end */ 210 if (aligned >= data_end) 211 continue; 212 cur_size = RTE_PTR_DIFF(data_end, aligned); 213 } 214 if (cur_size > max_size) { 215 max_size = cur_size; 216 max_elem = elem; 217 } 218 } 219 } 220 221 *size = max_size; 222 return max_elem; 223 } 224 225 /* 226 * Main function to allocate a block of memory from the heap. 227 * It locks the free list, scans it, and adds a new memseg if the 228 * scan fails. Once the new memseg is added, it re-scans and should return 229 * the new element after releasing the lock. 230 */ 231 static void * 232 heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, 233 unsigned int flags, size_t align, size_t bound, bool contig) 234 { 235 struct malloc_elem *elem; 236 size_t user_size = size; 237 238 size = RTE_CACHE_LINE_ROUNDUP(size); 239 align = RTE_CACHE_LINE_ROUNDUP(align); 240 241 /* roundup might cause an overflow */ 242 if (size == 0) 243 return NULL; 244 elem = find_suitable_element(heap, size, flags, align, bound, contig); 245 if (elem != NULL) { 246 elem = malloc_elem_alloc(elem, size, align, bound, contig); 247 248 /* increase heap's count of allocated elements */ 249 heap->alloc_count++; 250 251 asan_set_redzone(elem, user_size); 252 } 253 254 return elem == NULL ? NULL : (void *)(&elem[1]); 255 } 256 257 static void * 258 heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused, 259 unsigned int flags, size_t align, bool contig) 260 { 261 struct malloc_elem *elem; 262 size_t size; 263 264 align = RTE_CACHE_LINE_ROUNDUP(align); 265 266 elem = find_biggest_element(heap, &size, flags, align, contig); 267 if (elem != NULL) { 268 elem = malloc_elem_alloc(elem, size, align, 0, contig); 269 270 /* increase heap's count of allocated elements */ 271 heap->alloc_count++; 272 273 asan_set_redzone(elem, size); 274 } 275 276 return elem == NULL ? NULL : (void *)(&elem[1]); 277 } 278 279 /* this function is exposed in malloc_mp.h */ 280 void 281 rollback_expand_heap(struct rte_memseg **ms, int n_segs, 282 struct malloc_elem *elem, void *map_addr, size_t map_len) 283 { 284 if (elem != NULL) { 285 malloc_elem_free_list_remove(elem); 286 malloc_elem_hide_region(elem, map_addr, map_len); 287 } 288 289 eal_memalloc_free_seg_bulk(ms, n_segs); 290 } 291 292 /* this function is exposed in malloc_mp.h */ 293 struct malloc_elem * 294 alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 295 int socket, unsigned int flags, size_t align, size_t bound, 296 bool contig, struct rte_memseg **ms, int n_segs) 297 { 298 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 299 struct rte_memseg_list *msl; 300 struct malloc_elem *elem = NULL; 301 size_t alloc_sz; 302 int allocd_pages, i; 303 bool dirty = false; 304 void *ret, *map_addr; 305 306 alloc_sz = (size_t)pg_sz * n_segs; 307 308 /* first, check if we're allowed to allocate this memory */ 309 if (eal_memalloc_mem_alloc_validate(socket, 310 heap->total_size + alloc_sz) < 0) { 311 EAL_LOG(DEBUG, "User has disallowed allocation"); 312 return NULL; 313 } 314 315 allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz, 316 socket, true); 317 318 /* make sure we've allocated our pages... */ 319 if (allocd_pages < 0) 320 return NULL; 321 322 map_addr = ms[0]->addr; 323 msl = rte_mem_virt2memseg_list(map_addr); 324 325 /* check if we wanted contiguous memory but didn't get it */ 326 if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) { 327 EAL_LOG(DEBUG, "%s(): couldn't allocate physically contiguous space", 328 __func__); 329 goto fail; 330 } 331 332 /* 333 * Once we have all the memseg lists configured, if there is a dma mask 334 * set, check iova addresses are not out of range. Otherwise the device 335 * setting the dma mask could have problems with the mapped memory. 336 * 337 * There are two situations when this can happen: 338 * 1) memory initialization 339 * 2) dynamic memory allocation 340 * 341 * For 1), an error when checking dma mask implies app can not be 342 * executed. For 2) implies the new memory can not be added. 343 */ 344 if (mcfg->dma_maskbits && 345 rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { 346 /* 347 * Currently this can only happen if IOMMU is enabled 348 * and the address width supported by the IOMMU hw is 349 * not enough for using the memory mapped IOVAs. 350 * 351 * If IOVA is VA, advice to try with '--iova-mode pa' 352 * which could solve some situations when IOVA VA is not 353 * really needed. 354 */ 355 EAL_LOG(ERR, 356 "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask", 357 __func__); 358 359 /* 360 * If IOVA is VA and it is possible to run with IOVA PA, 361 * because user is root, give and advice for solving the 362 * problem. 363 */ 364 if ((rte_eal_iova_mode() == RTE_IOVA_VA) && 365 rte_eal_using_phys_addrs()) 366 EAL_LOG(ERR, 367 "%s(): Please try initializing EAL with --iova-mode=pa parameter", 368 __func__); 369 goto fail; 370 } 371 372 /* Element is dirty if it contains at least one dirty page. */ 373 for (i = 0; i < allocd_pages; i++) 374 dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY; 375 376 /* add newly minted memsegs to malloc heap */ 377 elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty); 378 379 /* try once more, as now we have allocated new memory */ 380 ret = find_suitable_element(heap, elt_size, flags, align, bound, 381 contig); 382 383 if (ret == NULL) 384 goto fail; 385 386 return elem; 387 388 fail: 389 rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 390 return NULL; 391 } 392 393 static int 394 try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, 395 size_t elt_size, int socket, unsigned int flags, size_t align, 396 size_t bound, bool contig) 397 { 398 struct malloc_elem *elem; 399 struct rte_memseg **ms; 400 void *map_addr; 401 size_t alloc_sz; 402 int n_segs; 403 bool callback_triggered = false; 404 405 alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) + 406 MALLOC_ELEM_OVERHEAD, pg_sz); 407 n_segs = alloc_sz / pg_sz; 408 409 /* we can't know in advance how many pages we'll need, so we malloc */ 410 ms = malloc(sizeof(*ms) * n_segs); 411 if (ms == NULL) 412 return -1; 413 memset(ms, 0, sizeof(*ms) * n_segs); 414 415 elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align, 416 bound, contig, ms, n_segs); 417 418 if (elem == NULL) 419 goto free_ms; 420 421 map_addr = ms[0]->addr; 422 423 /* notify user about changes in memory map */ 424 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); 425 426 /* notify other processes that this has happened */ 427 if (request_sync()) { 428 /* we couldn't ensure all processes have mapped memory, 429 * so free it back and notify everyone that it's been 430 * freed back. 431 * 432 * technically, we could've avoided adding memory addresses to 433 * the map, but that would've led to inconsistent behavior 434 * between primary and secondary processes, as those get 435 * callbacks during sync. therefore, force primary process to 436 * do alloc-and-rollback syncs as well. 437 */ 438 callback_triggered = true; 439 goto free_elem; 440 } 441 heap->total_size += alloc_sz; 442 443 EAL_LOG(DEBUG, "Heap on socket %d was expanded by %zdMB", 444 socket, alloc_sz >> 20ULL); 445 446 free(ms); 447 448 return 0; 449 450 free_elem: 451 if (callback_triggered) 452 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 453 map_addr, alloc_sz); 454 455 rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 456 457 request_sync(); 458 free_ms: 459 free(ms); 460 461 return -1; 462 } 463 464 static int 465 try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz, 466 size_t elt_size, int socket, unsigned int flags, size_t align, 467 size_t bound, bool contig) 468 { 469 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 470 struct malloc_mp_req req; 471 int req_result; 472 473 memset(&req, 0, sizeof(req)); 474 475 req.t = REQ_TYPE_ALLOC; 476 req.alloc_req.align = align; 477 req.alloc_req.bound = bound; 478 req.alloc_req.contig = contig; 479 req.alloc_req.flags = flags; 480 req.alloc_req.elt_size = elt_size; 481 req.alloc_req.page_sz = pg_sz; 482 req.alloc_req.socket = socket; 483 req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps; 484 485 req_result = request_to_primary(&req); 486 487 if (req_result != 0) 488 return -1; 489 490 if (req.result != REQ_RESULT_SUCCESS) 491 return -1; 492 493 return 0; 494 } 495 496 static int 497 try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 498 int socket, unsigned int flags, size_t align, size_t bound, 499 bool contig) 500 { 501 int ret; 502 503 rte_mcfg_mem_write_lock(); 504 505 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 506 ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket, 507 flags, align, bound, contig); 508 } else { 509 ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket, 510 flags, align, bound, contig); 511 } 512 513 rte_mcfg_mem_write_unlock(); 514 return ret; 515 } 516 517 static int 518 compare_pagesz(const void *a, const void *b) 519 { 520 const struct rte_memseg_list * const*mpa = a; 521 const struct rte_memseg_list * const*mpb = b; 522 const struct rte_memseg_list *msla = *mpa; 523 const struct rte_memseg_list *mslb = *mpb; 524 uint64_t pg_sz_a = msla->page_sz; 525 uint64_t pg_sz_b = mslb->page_sz; 526 527 if (pg_sz_a < pg_sz_b) 528 return -1; 529 if (pg_sz_a > pg_sz_b) 530 return 1; 531 return 0; 532 } 533 534 static int 535 alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, 536 unsigned int flags, size_t align, size_t bound, bool contig) 537 { 538 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 539 struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS]; 540 struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS]; 541 uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS]; 542 uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS]; 543 uint64_t prev_pg_sz; 544 int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz; 545 bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0; 546 unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 547 void *ret; 548 549 memset(requested_msls, 0, sizeof(requested_msls)); 550 memset(other_msls, 0, sizeof(other_msls)); 551 memset(requested_pg_sz, 0, sizeof(requested_pg_sz)); 552 memset(other_pg_sz, 0, sizeof(other_pg_sz)); 553 554 /* 555 * go through memseg list and take note of all the page sizes available, 556 * and if any of them were specifically requested by the user. 557 */ 558 n_requested_msls = 0; 559 n_other_msls = 0; 560 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 561 struct rte_memseg_list *msl = &mcfg->memsegs[i]; 562 563 if (msl->socket_id != socket) 564 continue; 565 566 if (msl->base_va == NULL) 567 continue; 568 569 /* if pages of specific size were requested */ 570 if (size_flags != 0 && check_hugepage_sz(size_flags, 571 msl->page_sz)) 572 requested_msls[n_requested_msls++] = msl; 573 else if (size_flags == 0 || size_hint) 574 other_msls[n_other_msls++] = msl; 575 } 576 577 /* sort the lists, smallest first */ 578 qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]), 579 compare_pagesz); 580 qsort(other_msls, n_other_msls, sizeof(other_msls[0]), 581 compare_pagesz); 582 583 /* now, extract page sizes we are supposed to try */ 584 prev_pg_sz = 0; 585 n_requested_pg_sz = 0; 586 for (i = 0; i < n_requested_msls; i++) { 587 uint64_t pg_sz = requested_msls[i]->page_sz; 588 589 if (prev_pg_sz != pg_sz) { 590 requested_pg_sz[n_requested_pg_sz++] = pg_sz; 591 prev_pg_sz = pg_sz; 592 } 593 } 594 prev_pg_sz = 0; 595 n_other_pg_sz = 0; 596 for (i = 0; i < n_other_msls; i++) { 597 uint64_t pg_sz = other_msls[i]->page_sz; 598 599 if (prev_pg_sz != pg_sz) { 600 other_pg_sz[n_other_pg_sz++] = pg_sz; 601 prev_pg_sz = pg_sz; 602 } 603 } 604 605 /* finally, try allocating memory of specified page sizes, starting from 606 * the smallest sizes 607 */ 608 for (i = 0; i < n_requested_pg_sz; i++) { 609 uint64_t pg_sz = requested_pg_sz[i]; 610 611 /* 612 * do not pass the size hint here, as user expects other page 613 * sizes first, before resorting to best effort allocation. 614 */ 615 if (!try_expand_heap(heap, pg_sz, size, socket, size_flags, 616 align, bound, contig)) 617 return 0; 618 } 619 if (n_other_pg_sz == 0) 620 return -1; 621 622 /* now, check if we can reserve anything with size hint */ 623 ret = find_suitable_element(heap, size, flags, align, bound, contig); 624 if (ret != NULL) 625 return 0; 626 627 /* 628 * we still couldn't reserve memory, so try expanding heap with other 629 * page sizes, if there are any 630 */ 631 for (i = 0; i < n_other_pg_sz; i++) { 632 uint64_t pg_sz = other_pg_sz[i]; 633 634 if (!try_expand_heap(heap, pg_sz, size, socket, flags, 635 align, bound, contig)) 636 return 0; 637 } 638 return -1; 639 } 640 641 /* this will try lower page sizes first */ 642 static void * 643 malloc_heap_alloc_on_heap_id(const char *type, size_t size, 644 unsigned int heap_id, unsigned int flags, size_t align, 645 size_t bound, bool contig) 646 { 647 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 648 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 649 unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 650 int socket_id; 651 void *ret; 652 const struct internal_config *internal_conf = 653 eal_get_internal_configuration(); 654 655 rte_spinlock_lock(&(heap->lock)); 656 657 align = align == 0 ? 1 : align; 658 659 /* for legacy mode, try once and with all flags */ 660 if (internal_conf->legacy_mem) { 661 ret = heap_alloc(heap, type, size, flags, align, bound, contig); 662 goto alloc_unlock; 663 } 664 665 /* 666 * we do not pass the size hint here, because even if allocation fails, 667 * we may still be able to allocate memory from appropriate page sizes, 668 * we just need to request more memory first. 669 */ 670 671 socket_id = rte_socket_id_by_idx(heap_id); 672 /* 673 * if socket ID is negative, we cannot find a socket ID for this heap - 674 * which means it's an external heap. those can have unexpected page 675 * sizes, so if the user asked to allocate from there - assume user 676 * knows what they're doing, and allow allocating from there with any 677 * page size flags. 678 */ 679 if (socket_id < 0) 680 size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY; 681 682 ret = heap_alloc(heap, type, size, size_flags, align, bound, contig); 683 if (ret != NULL) 684 goto alloc_unlock; 685 686 /* if socket ID is invalid, this is an external heap */ 687 if (socket_id < 0) 688 goto alloc_unlock; 689 690 if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align, 691 bound, contig)) { 692 ret = heap_alloc(heap, type, size, flags, align, bound, contig); 693 694 /* this should have succeeded */ 695 if (ret == NULL) 696 EAL_LOG(ERR, "Error allocating from heap"); 697 } 698 alloc_unlock: 699 rte_spinlock_unlock(&(heap->lock)); 700 return ret; 701 } 702 703 static unsigned int 704 malloc_get_numa_socket(void) 705 { 706 const struct internal_config *conf = eal_get_internal_configuration(); 707 unsigned int socket_id = rte_socket_id(); 708 unsigned int idx; 709 710 if (socket_id != (unsigned int)SOCKET_ID_ANY) 711 return socket_id; 712 713 /* for control threads, return first socket where memory is available */ 714 for (idx = 0; idx < rte_socket_count(); idx++) { 715 socket_id = rte_socket_id_by_idx(idx); 716 if (conf->socket_mem[socket_id] != 0) 717 return socket_id; 718 } 719 /* We couldn't quickly find a NUMA node where memory was available, 720 * so fall back to using main lcore socket ID. 721 */ 722 socket_id = rte_lcore_to_socket_id(rte_get_main_lcore()); 723 /* Main lcore socket ID may be SOCKET_ID_ANY 724 * when main lcore thread is affinitized to multiple NUMA nodes. 725 */ 726 if (socket_id != (unsigned int)SOCKET_ID_ANY) 727 return socket_id; 728 /* Failed to find meaningful socket ID, so use the first one available. */ 729 return rte_socket_id_by_idx(0); 730 } 731 732 void * 733 malloc_heap_alloc(const char *type, size_t size, int socket_arg, 734 unsigned int flags, size_t align, size_t bound, bool contig) 735 { 736 int socket, heap_id, i; 737 void *ret; 738 739 /* return NULL if size is 0 or alignment is not power-of-2 */ 740 if (size == 0 || (align && !rte_is_power_of_2(align))) 741 return NULL; 742 743 if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES) 744 socket_arg = SOCKET_ID_ANY; 745 746 if (socket_arg == SOCKET_ID_ANY) 747 socket = malloc_get_numa_socket(); 748 else 749 socket = socket_arg; 750 751 /* turn socket ID into heap ID */ 752 heap_id = malloc_socket_to_heap_id(socket); 753 /* if heap id is negative, socket ID was invalid */ 754 if (heap_id < 0) 755 return NULL; 756 757 ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align, 758 bound, contig); 759 if (ret != NULL || socket_arg != SOCKET_ID_ANY) 760 return ret; 761 762 /* try other heaps. we are only iterating through native DPDK sockets, 763 * so external heaps won't be included. 764 */ 765 for (i = 0; i < (int) rte_socket_count(); i++) { 766 if (i == heap_id) 767 continue; 768 ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align, 769 bound, contig); 770 if (ret != NULL) 771 return ret; 772 } 773 return NULL; 774 } 775 776 static void * 777 heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id, 778 unsigned int flags, size_t align, bool contig) 779 { 780 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 781 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 782 void *ret; 783 784 rte_spinlock_lock(&(heap->lock)); 785 786 align = align == 0 ? 1 : align; 787 788 ret = heap_alloc_biggest(heap, type, flags, align, contig); 789 790 rte_spinlock_unlock(&(heap->lock)); 791 792 return ret; 793 } 794 795 void * 796 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, 797 size_t align, bool contig) 798 { 799 int socket, i, cur_socket, heap_id; 800 void *ret; 801 802 /* return NULL if align is not power-of-2 */ 803 if ((align && !rte_is_power_of_2(align))) 804 return NULL; 805 806 if (!rte_eal_has_hugepages()) 807 socket_arg = SOCKET_ID_ANY; 808 809 if (socket_arg == SOCKET_ID_ANY) 810 socket = malloc_get_numa_socket(); 811 else 812 socket = socket_arg; 813 814 /* turn socket ID into heap ID */ 815 heap_id = malloc_socket_to_heap_id(socket); 816 /* if heap id is negative, socket ID was invalid */ 817 if (heap_id < 0) 818 return NULL; 819 820 ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align, 821 contig); 822 if (ret != NULL || socket_arg != SOCKET_ID_ANY) 823 return ret; 824 825 /* try other heaps */ 826 for (i = 0; i < (int) rte_socket_count(); i++) { 827 cur_socket = rte_socket_id_by_idx(i); 828 if (cur_socket == socket) 829 continue; 830 ret = heap_alloc_biggest_on_heap_id(type, i, flags, align, 831 contig); 832 if (ret != NULL) 833 return ret; 834 } 835 return NULL; 836 } 837 838 /* this function is exposed in malloc_mp.h */ 839 int 840 malloc_heap_free_pages(void *aligned_start, size_t aligned_len) 841 { 842 int n_segs, seg_idx, max_seg_idx; 843 struct rte_memseg_list *msl; 844 size_t page_sz; 845 846 msl = rte_mem_virt2memseg_list(aligned_start); 847 if (msl == NULL) 848 return -1; 849 850 page_sz = (size_t)msl->page_sz; 851 n_segs = aligned_len / page_sz; 852 seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz; 853 max_seg_idx = seg_idx + n_segs; 854 855 for (; seg_idx < max_seg_idx; seg_idx++) { 856 struct rte_memseg *ms; 857 858 ms = rte_fbarray_get(&msl->memseg_arr, seg_idx); 859 eal_memalloc_free_seg(ms); 860 } 861 return 0; 862 } 863 864 int 865 malloc_heap_free(struct malloc_elem *elem) 866 { 867 struct malloc_heap *heap; 868 void *start, *aligned_start, *end, *aligned_end; 869 size_t len, aligned_len, page_sz; 870 struct rte_memseg_list *msl; 871 unsigned int i, n_segs, before_space, after_space; 872 int ret; 873 bool unmapped = false; 874 const struct internal_config *internal_conf = 875 eal_get_internal_configuration(); 876 877 if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 878 return -1; 879 880 asan_clear_redzone(elem); 881 882 /* elem may be merged with previous element, so keep heap address */ 883 heap = elem->heap; 884 msl = elem->msl; 885 page_sz = (size_t)msl->page_sz; 886 887 rte_spinlock_lock(&(heap->lock)); 888 889 void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad); 890 size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad; 891 892 /* mark element as free */ 893 elem->state = ELEM_FREE; 894 895 elem = malloc_elem_free(elem); 896 897 /* anything after this is a bonus */ 898 ret = 0; 899 900 /* ...of which we can't avail if we are in legacy mode, or if this is an 901 * externally allocated segment. 902 */ 903 if (internal_conf->legacy_mem || (msl->external > 0)) 904 goto free_unlock; 905 906 /* check if we can free any memory back to the system */ 907 if (elem->size < page_sz) 908 goto free_unlock; 909 910 /* if user requested to match allocations, the sizes must match - if not, 911 * we will defer freeing these hugepages until the entire original allocation 912 * can be freed 913 */ 914 if (internal_conf->match_allocations && elem->size != elem->orig_size) 915 goto free_unlock; 916 917 /* probably, but let's make sure, as we may not be using up full page */ 918 start = elem; 919 len = elem->size; 920 aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz); 921 end = RTE_PTR_ADD(elem, len); 922 aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz); 923 924 aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 925 926 /* can't free anything */ 927 if (aligned_len < page_sz) 928 goto free_unlock; 929 930 /* we can free something. however, some of these pages may be marked as 931 * unfreeable, so also check that as well 932 */ 933 n_segs = aligned_len / page_sz; 934 for (i = 0; i < n_segs; i++) { 935 const struct rte_memseg *tmp = 936 rte_mem_virt2memseg(aligned_start, msl); 937 938 if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { 939 /* this is an unfreeable segment, so move start */ 940 aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len); 941 } 942 } 943 944 /* recalculate length and number of segments */ 945 aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 946 n_segs = aligned_len / page_sz; 947 948 /* check if we can still free some pages */ 949 if (n_segs == 0) 950 goto free_unlock; 951 952 /* We're not done yet. We also have to check if by freeing space we will 953 * be leaving free elements that are too small to store new elements. 954 * Check if we have enough space in the beginning and at the end, or if 955 * start/end are exactly page aligned. 956 */ 957 before_space = RTE_PTR_DIFF(aligned_start, elem); 958 after_space = RTE_PTR_DIFF(end, aligned_end); 959 if (before_space != 0 && 960 before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 961 /* There is not enough space before start, but we may be able to 962 * move the start forward by one page. 963 */ 964 if (n_segs == 1) 965 goto free_unlock; 966 967 /* move start */ 968 aligned_start = RTE_PTR_ADD(aligned_start, page_sz); 969 aligned_len -= page_sz; 970 n_segs--; 971 } 972 if (after_space != 0 && after_space < 973 MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 974 /* There is not enough space after end, but we may be able to 975 * move the end backwards by one page. 976 */ 977 if (n_segs == 1) 978 goto free_unlock; 979 980 /* move end */ 981 aligned_end = RTE_PTR_SUB(aligned_end, page_sz); 982 aligned_len -= page_sz; 983 n_segs--; 984 } 985 986 /* now we can finally free us some pages */ 987 988 rte_mcfg_mem_write_lock(); 989 990 /* 991 * we allow secondary processes to clear the heap of this allocated 992 * memory because it is safe to do so, as even if notifications about 993 * unmapped pages don't make it to other processes, heap is shared 994 * across all processes, and will become empty of this memory anyway, 995 * and nothing can allocate it back unless primary process will be able 996 * to deliver allocation message to every single running process. 997 */ 998 999 malloc_elem_free_list_remove(elem); 1000 1001 malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len); 1002 1003 heap->total_size -= aligned_len; 1004 1005 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1006 /* notify user about changes in memory map */ 1007 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 1008 aligned_start, aligned_len); 1009 1010 /* don't care if any of this fails */ 1011 malloc_heap_free_pages(aligned_start, aligned_len); 1012 1013 request_sync(); 1014 } else { 1015 struct malloc_mp_req req; 1016 1017 memset(&req, 0, sizeof(req)); 1018 1019 req.t = REQ_TYPE_FREE; 1020 req.free_req.addr = aligned_start; 1021 req.free_req.len = aligned_len; 1022 1023 /* 1024 * we request primary to deallocate pages, but we don't do it 1025 * in this thread. instead, we notify primary that we would like 1026 * to deallocate pages, and this process will receive another 1027 * request (in parallel) that will do it for us on another 1028 * thread. 1029 * 1030 * we also don't really care if this succeeds - the data is 1031 * already removed from the heap, so it is, for all intents and 1032 * purposes, hidden from the rest of DPDK even if some other 1033 * process (including this one) may have these pages mapped. 1034 * 1035 * notifications about deallocated memory happen during sync. 1036 */ 1037 request_to_primary(&req); 1038 } 1039 1040 /* we didn't exit early, meaning we have unmapped some pages */ 1041 unmapped = true; 1042 1043 EAL_LOG(DEBUG, "Heap on socket %d was shrunk by %zdMB", 1044 msl->socket_id, aligned_len >> 20ULL); 1045 1046 rte_mcfg_mem_write_unlock(); 1047 free_unlock: 1048 asan_set_freezone(asan_ptr, asan_data_len); 1049 1050 /* if we unmapped some memory, we need to do additional work for ASan */ 1051 if (unmapped) { 1052 void *asan_end = RTE_PTR_ADD(asan_ptr, asan_data_len); 1053 void *aligned_end = RTE_PTR_ADD(aligned_start, aligned_len); 1054 void *aligned_trailer = RTE_PTR_SUB(aligned_start, 1055 MALLOC_ELEM_TRAILER_LEN); 1056 1057 /* 1058 * There was a memory area that was unmapped. This memory area 1059 * will have to be marked as available for ASan, because we will 1060 * want to use it next time it gets mapped again. The OS memory 1061 * protection should trigger a fault on access to these areas 1062 * anyway, so we are not giving up any protection. 1063 */ 1064 asan_set_zone(aligned_start, aligned_len, 0x00); 1065 1066 /* 1067 * ...however, when we unmap pages, we create new free elements 1068 * which might have been marked as "freed" with an earlier 1069 * `asan_set_freezone` call. So, if there is an area past the 1070 * unmapped space that was marked as freezone for ASan, we need 1071 * to mark the malloc header as available. 1072 */ 1073 if (asan_end > aligned_end) 1074 asan_set_zone(aligned_end, MALLOC_ELEM_HEADER_LEN, 0x00); 1075 1076 /* if there's space before unmapped memory, mark as available */ 1077 if (asan_ptr < aligned_start) 1078 asan_set_zone(aligned_trailer, MALLOC_ELEM_TRAILER_LEN, 0x00); 1079 } 1080 1081 rte_spinlock_unlock(&(heap->lock)); 1082 return ret; 1083 } 1084 1085 int 1086 malloc_heap_resize(struct malloc_elem *elem, size_t size) 1087 { 1088 int ret; 1089 1090 if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 1091 return -1; 1092 1093 rte_spinlock_lock(&(elem->heap->lock)); 1094 1095 ret = malloc_elem_resize(elem, size); 1096 1097 rte_spinlock_unlock(&(elem->heap->lock)); 1098 1099 return ret; 1100 } 1101 1102 /* 1103 * Function to retrieve data for a given heap 1104 */ 1105 int 1106 malloc_heap_get_stats(struct malloc_heap *heap, 1107 struct rte_malloc_socket_stats *socket_stats) 1108 { 1109 size_t idx; 1110 struct malloc_elem *elem; 1111 1112 rte_spinlock_lock(&heap->lock); 1113 1114 /* Initialise variables for heap */ 1115 socket_stats->free_count = 0; 1116 socket_stats->heap_freesz_bytes = 0; 1117 socket_stats->greatest_free_size = 0; 1118 1119 /* Iterate through free list */ 1120 for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 1121 for (elem = LIST_FIRST(&heap->free_head[idx]); 1122 !!elem; elem = LIST_NEXT(elem, free_list)) 1123 { 1124 socket_stats->free_count++; 1125 socket_stats->heap_freesz_bytes += elem->size; 1126 if (elem->size > socket_stats->greatest_free_size) 1127 socket_stats->greatest_free_size = elem->size; 1128 } 1129 } 1130 /* Get stats on overall heap and allocated memory on this heap */ 1131 socket_stats->heap_totalsz_bytes = heap->total_size; 1132 socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - 1133 socket_stats->heap_freesz_bytes); 1134 socket_stats->alloc_count = heap->alloc_count; 1135 1136 rte_spinlock_unlock(&heap->lock); 1137 return 0; 1138 } 1139 1140 /* 1141 * Function to retrieve data for a given heap 1142 */ 1143 void 1144 malloc_heap_dump(struct malloc_heap *heap, FILE *f) 1145 { 1146 struct malloc_elem *elem; 1147 1148 rte_spinlock_lock(&heap->lock); 1149 1150 fprintf(f, "Heap size: 0x%zx\n", heap->total_size); 1151 fprintf(f, "Heap alloc count: %u\n", heap->alloc_count); 1152 1153 elem = heap->first; 1154 while (elem) { 1155 malloc_elem_dump(elem, f); 1156 elem = elem->next; 1157 } 1158 1159 rte_spinlock_unlock(&heap->lock); 1160 } 1161 1162 static int 1163 destroy_elem(struct malloc_elem *elem, size_t len) 1164 { 1165 struct malloc_heap *heap = elem->heap; 1166 1167 /* notify all subscribers that a memory area is going to be removed */ 1168 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len); 1169 1170 /* this element can be removed */ 1171 malloc_elem_free_list_remove(elem); 1172 malloc_elem_hide_region(elem, elem, len); 1173 1174 heap->total_size -= len; 1175 1176 memset(elem, 0, sizeof(*elem)); 1177 1178 return 0; 1179 } 1180 1181 struct rte_memseg_list * 1182 malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], 1183 unsigned int n_pages, size_t page_sz, const char *seg_name, 1184 unsigned int socket_id) 1185 { 1186 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1187 char fbarray_name[RTE_FBARRAY_NAME_LEN]; 1188 struct rte_memseg_list *msl = NULL; 1189 struct rte_fbarray *arr; 1190 size_t seg_len = n_pages * page_sz; 1191 unsigned int i; 1192 1193 /* first, find a free memseg list */ 1194 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 1195 struct rte_memseg_list *tmp = &mcfg->memsegs[i]; 1196 if (tmp->base_va == NULL) { 1197 msl = tmp; 1198 break; 1199 } 1200 } 1201 if (msl == NULL) { 1202 EAL_LOG(ERR, "Couldn't find empty memseg list"); 1203 rte_errno = ENOSPC; 1204 return NULL; 1205 } 1206 1207 snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p", 1208 seg_name, va_addr); 1209 1210 /* create the backing fbarray */ 1211 if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages, 1212 sizeof(struct rte_memseg)) < 0) { 1213 EAL_LOG(ERR, "Couldn't create fbarray backing the memseg list"); 1214 return NULL; 1215 } 1216 arr = &msl->memseg_arr; 1217 1218 /* fbarray created, fill it up */ 1219 for (i = 0; i < n_pages; i++) { 1220 struct rte_memseg *ms; 1221 1222 rte_fbarray_set_used(arr, i); 1223 ms = rte_fbarray_get(arr, i); 1224 ms->addr = RTE_PTR_ADD(va_addr, i * page_sz); 1225 ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i]; 1226 ms->hugepage_sz = page_sz; 1227 ms->len = page_sz; 1228 ms->nchannel = rte_memory_get_nchannel(); 1229 ms->nrank = rte_memory_get_nrank(); 1230 ms->socket_id = socket_id; 1231 } 1232 1233 /* set up the memseg list */ 1234 msl->base_va = va_addr; 1235 msl->page_sz = page_sz; 1236 msl->socket_id = socket_id; 1237 msl->len = seg_len; 1238 msl->version = 0; 1239 msl->external = 1; 1240 1241 return msl; 1242 } 1243 1244 struct extseg_walk_arg { 1245 void *va_addr; 1246 size_t len; 1247 struct rte_memseg_list *msl; 1248 }; 1249 1250 static int 1251 extseg_walk(const struct rte_memseg_list *msl, void *arg) 1252 { 1253 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1254 struct extseg_walk_arg *wa = arg; 1255 1256 if (msl->base_va == wa->va_addr && msl->len == wa->len) { 1257 unsigned int found_idx; 1258 1259 /* msl is const */ 1260 found_idx = msl - mcfg->memsegs; 1261 wa->msl = &mcfg->memsegs[found_idx]; 1262 return 1; 1263 } 1264 return 0; 1265 } 1266 1267 struct rte_memseg_list * 1268 malloc_heap_find_external_seg(void *va_addr, size_t len) 1269 { 1270 struct extseg_walk_arg wa; 1271 int res; 1272 1273 wa.va_addr = va_addr; 1274 wa.len = len; 1275 1276 res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa); 1277 1278 if (res != 1) { 1279 /* 0 means nothing was found, -1 shouldn't happen */ 1280 if (res == 0) 1281 rte_errno = ENOENT; 1282 return NULL; 1283 } 1284 return wa.msl; 1285 } 1286 1287 int 1288 malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) 1289 { 1290 /* destroy the fbarray backing this memory */ 1291 if (rte_fbarray_destroy(&msl->memseg_arr) < 0) 1292 return -1; 1293 1294 /* reset the memseg list */ 1295 memset(msl, 0, sizeof(*msl)); 1296 1297 return 0; 1298 } 1299 1300 int 1301 malloc_heap_add_external_memory(struct malloc_heap *heap, 1302 struct rte_memseg_list *msl) 1303 { 1304 /* erase contents of new memory */ 1305 memset(msl->base_va, 0, msl->len); 1306 1307 /* now, add newly minted memory to the malloc heap */ 1308 malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false); 1309 1310 heap->total_size += msl->len; 1311 1312 /* all done! */ 1313 EAL_LOG(DEBUG, "Added segment for heap %s starting at %p", 1314 heap->name, msl->base_va); 1315 1316 /* notify all subscribers that a new memory area has been added */ 1317 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, 1318 msl->base_va, msl->len); 1319 1320 return 0; 1321 } 1322 1323 int 1324 malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, 1325 size_t len) 1326 { 1327 struct malloc_elem *elem = heap->first; 1328 1329 /* find element with specified va address */ 1330 while (elem != NULL && elem != va_addr) { 1331 elem = elem->next; 1332 /* stop if we've blown past our VA */ 1333 if (elem > (struct malloc_elem *)va_addr) { 1334 rte_errno = ENOENT; 1335 return -1; 1336 } 1337 } 1338 /* check if element was found */ 1339 if (elem == NULL || elem->msl->len != len) { 1340 rte_errno = ENOENT; 1341 return -1; 1342 } 1343 /* if element's size is not equal to segment len, segment is busy */ 1344 if (elem->state == ELEM_BUSY || elem->size != len) { 1345 rte_errno = EBUSY; 1346 return -1; 1347 } 1348 return destroy_elem(elem, len); 1349 } 1350 1351 int 1352 malloc_heap_create(struct malloc_heap *heap, const char *heap_name) 1353 { 1354 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1355 uint32_t next_socket_id = mcfg->next_socket_id; 1356 1357 /* prevent overflow. did you really create 2 billion heaps??? */ 1358 if (next_socket_id > INT32_MAX) { 1359 EAL_LOG(ERR, "Cannot assign new socket ID's"); 1360 rte_errno = ENOSPC; 1361 return -1; 1362 } 1363 1364 /* initialize empty heap */ 1365 heap->alloc_count = 0; 1366 heap->first = NULL; 1367 heap->last = NULL; 1368 LIST_INIT(heap->free_head); 1369 rte_spinlock_init(&heap->lock); 1370 heap->total_size = 0; 1371 heap->socket_id = next_socket_id; 1372 1373 /* we hold a global mem hotplug writelock, so it's safe to increment */ 1374 mcfg->next_socket_id++; 1375 1376 /* set up name */ 1377 strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 1378 return 0; 1379 } 1380 1381 int 1382 malloc_heap_destroy(struct malloc_heap *heap) 1383 { 1384 if (heap->alloc_count != 0) { 1385 EAL_LOG(ERR, "Heap is still in use"); 1386 rte_errno = EBUSY; 1387 return -1; 1388 } 1389 if (heap->first != NULL || heap->last != NULL) { 1390 EAL_LOG(ERR, "Heap still contains memory segments"); 1391 rte_errno = EBUSY; 1392 return -1; 1393 } 1394 if (heap->total_size != 0) 1395 EAL_LOG(ERR, "Total size not zero, heap is likely corrupt"); 1396 1397 /* Reset all of the heap but the (hold) lock so caller can release it. */ 1398 RTE_BUILD_BUG_ON(offsetof(struct malloc_heap, lock) != 0); 1399 memset(RTE_PTR_ADD(heap, sizeof(heap->lock)), 0, 1400 sizeof(*heap) - sizeof(heap->lock)); 1401 1402 return 0; 1403 } 1404 1405 int 1406 rte_eal_malloc_heap_init(void) 1407 { 1408 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1409 unsigned int i; 1410 const struct internal_config *internal_conf = 1411 eal_get_internal_configuration(); 1412 1413 if (internal_conf->match_allocations) 1414 EAL_LOG(DEBUG, "Hugepages will be freed exactly as allocated."); 1415 1416 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1417 /* assign min socket ID to external heaps */ 1418 mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID; 1419 1420 /* assign names to default DPDK heaps */ 1421 for (i = 0; i < rte_socket_count(); i++) { 1422 struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 1423 char heap_name[RTE_HEAP_NAME_MAX_LEN]; 1424 int socket_id = rte_socket_id_by_idx(i); 1425 1426 snprintf(heap_name, sizeof(heap_name), 1427 "socket_%i", socket_id); 1428 strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 1429 heap->socket_id = socket_id; 1430 } 1431 } 1432 1433 if (register_mp_requests()) { 1434 EAL_LOG(ERR, "Couldn't register malloc multiprocess actions"); 1435 return -1; 1436 } 1437 1438 return 0; 1439 } 1440 1441 int rte_eal_malloc_heap_populate(void) 1442 { 1443 /* mem hotplug is unlocked here. it's safe for primary as no requests can 1444 * even come before primary itself is fully initialized, and secondaries 1445 * do not need to initialize the heap. 1446 */ 1447 1448 /* secondary process does not need to initialize anything */ 1449 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1450 return 0; 1451 1452 /* add all IOVA-contiguous areas to the heap */ 1453 return rte_memseg_contig_walk(malloc_add_seg, NULL); 1454 } 1455 1456 void 1457 rte_eal_malloc_heap_cleanup(void) 1458 { 1459 unregister_mp_requests(); 1460 } 1461