1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 #include <stdint.h> 5 #include <stddef.h> 6 #include <stdlib.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <sys/queue.h> 10 11 #include <rte_memory.h> 12 #include <rte_errno.h> 13 #include <rte_eal.h> 14 #include <rte_eal_memconfig.h> 15 #include <rte_lcore.h> 16 #include <rte_common.h> 17 #include <rte_string_fns.h> 18 #include <rte_spinlock.h> 19 #include <rte_memzone.h> 20 #include <rte_fbarray.h> 21 22 #include "eal_internal_cfg.h" 23 #include "eal_memalloc.h" 24 #include "eal_memcfg.h" 25 #include "eal_private.h" 26 #include "malloc_elem.h" 27 #include "malloc_heap.h" 28 #include "malloc_mp.h" 29 30 /* start external socket ID's at a very high number */ 31 #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */ 32 #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES)) 33 34 static unsigned 35 check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) 36 { 37 unsigned check_flag = 0; 38 39 if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) 40 return 1; 41 42 switch (hugepage_sz) { 43 case RTE_PGSIZE_256K: 44 check_flag = RTE_MEMZONE_256KB; 45 break; 46 case RTE_PGSIZE_2M: 47 check_flag = RTE_MEMZONE_2MB; 48 break; 49 case RTE_PGSIZE_16M: 50 check_flag = RTE_MEMZONE_16MB; 51 break; 52 case RTE_PGSIZE_256M: 53 check_flag = RTE_MEMZONE_256MB; 54 break; 55 case RTE_PGSIZE_512M: 56 check_flag = RTE_MEMZONE_512MB; 57 break; 58 case RTE_PGSIZE_1G: 59 check_flag = RTE_MEMZONE_1GB; 60 break; 61 case RTE_PGSIZE_4G: 62 check_flag = RTE_MEMZONE_4GB; 63 break; 64 case RTE_PGSIZE_16G: 65 check_flag = RTE_MEMZONE_16GB; 66 } 67 68 return check_flag & flags; 69 } 70 71 int 72 malloc_socket_to_heap_id(unsigned int socket_id) 73 { 74 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 75 int i; 76 77 for (i = 0; i < RTE_MAX_HEAPS; i++) { 78 struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 79 80 if (heap->socket_id == socket_id) 81 return i; 82 } 83 return -1; 84 } 85 86 /* 87 * Expand the heap with a memory area. 88 */ 89 static struct malloc_elem * 90 malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, 91 void *start, size_t len, bool dirty) 92 { 93 struct malloc_elem *elem = start; 94 95 malloc_elem_init(elem, heap, msl, len, elem, len, dirty); 96 97 malloc_elem_insert(elem); 98 99 elem = malloc_elem_join_adjacent_free(elem); 100 101 malloc_elem_free_list_insert(elem); 102 103 return elem; 104 } 105 106 static int 107 malloc_add_seg(const struct rte_memseg_list *msl, 108 const struct rte_memseg *ms, size_t len, void *arg __rte_unused) 109 { 110 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 111 struct rte_memseg_list *found_msl; 112 struct malloc_heap *heap; 113 int msl_idx, heap_idx; 114 115 if (msl->external) 116 return 0; 117 118 heap_idx = malloc_socket_to_heap_id(msl->socket_id); 119 if (heap_idx < 0) { 120 EAL_LOG(ERR, "Memseg list has invalid socket id"); 121 return -1; 122 } 123 heap = &mcfg->malloc_heaps[heap_idx]; 124 125 /* msl is const, so find it */ 126 msl_idx = msl - mcfg->memsegs; 127 128 if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS) 129 return -1; 130 131 found_msl = &mcfg->memsegs[msl_idx]; 132 133 malloc_heap_add_memory(heap, found_msl, ms->addr, len, 134 ms->flags & RTE_MEMSEG_FLAG_DIRTY); 135 136 heap->total_size += len; 137 138 EAL_LOG(DEBUG, "Added %zuM to heap on socket %i", len >> 20, 139 msl->socket_id); 140 return 0; 141 } 142 143 /* 144 * Iterates through the freelist for a heap to find a free element 145 * which can store data of the required size and with the requested alignment. 146 * If size is 0, find the biggest available elem. 147 * Returns null on failure, or pointer to element on success. 148 */ 149 static struct malloc_elem * 150 find_suitable_element(struct malloc_heap *heap, size_t size, 151 unsigned int flags, size_t align, size_t bound, bool contig) 152 { 153 size_t idx; 154 struct malloc_elem *elem, *alt_elem = NULL; 155 156 for (idx = malloc_elem_free_list_index(size); 157 idx < RTE_HEAP_NUM_FREELISTS; idx++) { 158 for (elem = LIST_FIRST(&heap->free_head[idx]); 159 !!elem; elem = LIST_NEXT(elem, free_list)) { 160 if (malloc_elem_can_hold(elem, size, align, bound, 161 contig)) { 162 if (check_hugepage_sz(flags, 163 elem->msl->page_sz)) 164 return elem; 165 if (alt_elem == NULL) 166 alt_elem = elem; 167 } 168 } 169 } 170 171 if (flags & RTE_MEMZONE_SIZE_HINT_ONLY) 172 return alt_elem; 173 174 return NULL; 175 } 176 177 /* 178 * Iterates through the freelist for a heap to find a free element with the 179 * biggest size and requested alignment. Will also set size to whatever element 180 * size that was found. 181 * Returns null on failure, or pointer to element on success. 182 */ 183 static struct malloc_elem * 184 find_biggest_element(struct malloc_heap *heap, size_t *size, 185 unsigned int flags, size_t align, bool contig) 186 { 187 struct malloc_elem *elem, *max_elem = NULL; 188 size_t idx, max_size = 0; 189 190 for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 191 for (elem = LIST_FIRST(&heap->free_head[idx]); 192 !!elem; elem = LIST_NEXT(elem, free_list)) { 193 size_t cur_size; 194 if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 && 195 !check_hugepage_sz(flags, 196 elem->msl->page_sz)) 197 continue; 198 if (contig) { 199 cur_size = 200 malloc_elem_find_max_iova_contig(elem, 201 align); 202 } else { 203 void *data_start = RTE_PTR_ADD(elem, 204 MALLOC_ELEM_HEADER_LEN); 205 void *data_end = RTE_PTR_ADD(elem, elem->size - 206 MALLOC_ELEM_TRAILER_LEN); 207 void *aligned = RTE_PTR_ALIGN_CEIL(data_start, 208 align); 209 /* check if aligned data start is beyond end */ 210 if (aligned >= data_end) 211 continue; 212 cur_size = RTE_PTR_DIFF(data_end, aligned); 213 } 214 if (cur_size > max_size) { 215 max_size = cur_size; 216 max_elem = elem; 217 } 218 } 219 } 220 221 *size = max_size; 222 return max_elem; 223 } 224 225 /* 226 * Main function to allocate a block of memory from the heap. 227 * It locks the free list, scans it, and adds a new memseg if the 228 * scan fails. Once the new memseg is added, it re-scans and should return 229 * the new element after releasing the lock. 230 */ 231 static void * 232 heap_alloc(struct malloc_heap *heap, size_t size, unsigned int flags, 233 size_t align, size_t bound, bool contig) 234 { 235 struct malloc_elem *elem; 236 size_t user_size = size; 237 238 size = RTE_CACHE_LINE_ROUNDUP(size); 239 align = RTE_CACHE_LINE_ROUNDUP(align); 240 241 /* roundup might cause an overflow */ 242 if (size == 0) 243 return NULL; 244 elem = find_suitable_element(heap, size, flags, align, bound, contig); 245 if (elem != NULL) { 246 elem = malloc_elem_alloc(elem, size, align, bound, contig); 247 248 /* increase heap's count of allocated elements */ 249 heap->alloc_count++; 250 251 asan_set_redzone(elem, user_size); 252 } 253 254 return elem == NULL ? NULL : (void *)(&elem[1]); 255 } 256 257 static void * 258 heap_alloc_biggest(struct malloc_heap *heap, unsigned int flags, size_t align, bool contig) 259 { 260 struct malloc_elem *elem; 261 size_t size; 262 263 align = RTE_CACHE_LINE_ROUNDUP(align); 264 265 elem = find_biggest_element(heap, &size, flags, align, contig); 266 if (elem != NULL) { 267 elem = malloc_elem_alloc(elem, size, align, 0, contig); 268 269 /* increase heap's count of allocated elements */ 270 heap->alloc_count++; 271 272 asan_set_redzone(elem, size); 273 } 274 275 return elem == NULL ? NULL : (void *)(&elem[1]); 276 } 277 278 /* this function is exposed in malloc_mp.h */ 279 void 280 rollback_expand_heap(struct rte_memseg **ms, int n_segs, 281 struct malloc_elem *elem, void *map_addr, size_t map_len) 282 { 283 if (elem != NULL) { 284 malloc_elem_free_list_remove(elem); 285 malloc_elem_hide_region(elem, map_addr, map_len); 286 } 287 288 eal_memalloc_free_seg_bulk(ms, n_segs); 289 } 290 291 /* this function is exposed in malloc_mp.h */ 292 struct malloc_elem * 293 alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 294 int socket, unsigned int flags, size_t align, size_t bound, 295 bool contig, struct rte_memseg **ms, int n_segs) 296 { 297 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 298 struct rte_memseg_list *msl; 299 struct malloc_elem *elem = NULL; 300 size_t alloc_sz; 301 int allocd_pages, i; 302 bool dirty = false; 303 void *ret, *map_addr; 304 305 alloc_sz = (size_t)pg_sz * n_segs; 306 307 /* first, check if we're allowed to allocate this memory */ 308 if (eal_memalloc_mem_alloc_validate(socket, 309 heap->total_size + alloc_sz) < 0) { 310 EAL_LOG(DEBUG, "User has disallowed allocation"); 311 return NULL; 312 } 313 314 allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz, 315 socket, true); 316 317 /* make sure we've allocated our pages... */ 318 if (allocd_pages < 0) 319 return NULL; 320 321 map_addr = ms[0]->addr; 322 msl = rte_mem_virt2memseg_list(map_addr); 323 324 /* check if we wanted contiguous memory but didn't get it */ 325 if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) { 326 EAL_LOG(DEBUG, "%s(): couldn't allocate physically contiguous space", 327 __func__); 328 goto fail; 329 } 330 331 /* 332 * Once we have all the memseg lists configured, if there is a dma mask 333 * set, check iova addresses are not out of range. Otherwise the device 334 * setting the dma mask could have problems with the mapped memory. 335 * 336 * There are two situations when this can happen: 337 * 1) memory initialization 338 * 2) dynamic memory allocation 339 * 340 * For 1), an error when checking dma mask implies app can not be 341 * executed. For 2) implies the new memory can not be added. 342 */ 343 if (mcfg->dma_maskbits && 344 rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { 345 /* 346 * Currently this can only happen if IOMMU is enabled 347 * and the address width supported by the IOMMU hw is 348 * not enough for using the memory mapped IOVAs. 349 * 350 * If IOVA is VA, advice to try with '--iova-mode pa' 351 * which could solve some situations when IOVA VA is not 352 * really needed. 353 */ 354 EAL_LOG(ERR, 355 "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask", 356 __func__); 357 358 /* 359 * If IOVA is VA and it is possible to run with IOVA PA, 360 * because user is root, give and advice for solving the 361 * problem. 362 */ 363 if ((rte_eal_iova_mode() == RTE_IOVA_VA) && 364 rte_eal_using_phys_addrs()) 365 EAL_LOG(ERR, 366 "%s(): Please try initializing EAL with --iova-mode=pa parameter", 367 __func__); 368 goto fail; 369 } 370 371 /* Element is dirty if it contains at least one dirty page. */ 372 for (i = 0; i < allocd_pages; i++) 373 dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY; 374 375 /* add newly minted memsegs to malloc heap */ 376 elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty); 377 378 /* try once more, as now we have allocated new memory */ 379 ret = find_suitable_element(heap, elt_size, flags, align, bound, 380 contig); 381 382 if (ret == NULL) 383 goto fail; 384 385 return elem; 386 387 fail: 388 rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 389 return NULL; 390 } 391 392 static int 393 try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, 394 size_t elt_size, int socket, unsigned int flags, size_t align, 395 size_t bound, bool contig) 396 { 397 struct malloc_elem *elem; 398 struct rte_memseg **ms; 399 void *map_addr; 400 size_t alloc_sz; 401 int n_segs; 402 bool callback_triggered = false; 403 404 alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) + 405 MALLOC_ELEM_OVERHEAD, pg_sz); 406 n_segs = alloc_sz / pg_sz; 407 408 /* we can't know in advance how many pages we'll need, so we malloc */ 409 ms = malloc(sizeof(*ms) * n_segs); 410 if (ms == NULL) 411 return -1; 412 memset(ms, 0, sizeof(*ms) * n_segs); 413 414 elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align, 415 bound, contig, ms, n_segs); 416 417 if (elem == NULL) 418 goto free_ms; 419 420 map_addr = ms[0]->addr; 421 422 /* notify user about changes in memory map */ 423 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); 424 425 /* notify other processes that this has happened */ 426 if (request_sync()) { 427 /* we couldn't ensure all processes have mapped memory, 428 * so free it back and notify everyone that it's been 429 * freed back. 430 * 431 * technically, we could've avoided adding memory addresses to 432 * the map, but that would've led to inconsistent behavior 433 * between primary and secondary processes, as those get 434 * callbacks during sync. therefore, force primary process to 435 * do alloc-and-rollback syncs as well. 436 */ 437 callback_triggered = true; 438 goto free_elem; 439 } 440 heap->total_size += alloc_sz; 441 442 EAL_LOG(DEBUG, "Heap on socket %d was expanded by %zdMB", 443 socket, alloc_sz >> 20ULL); 444 445 free(ms); 446 447 return 0; 448 449 free_elem: 450 if (callback_triggered) 451 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 452 map_addr, alloc_sz); 453 454 rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); 455 456 request_sync(); 457 free_ms: 458 free(ms); 459 460 return -1; 461 } 462 463 static int 464 try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz, 465 size_t elt_size, int socket, unsigned int flags, size_t align, 466 size_t bound, bool contig) 467 { 468 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 469 struct malloc_mp_req req; 470 int req_result; 471 472 memset(&req, 0, sizeof(req)); 473 474 req.t = REQ_TYPE_ALLOC; 475 req.alloc_req.align = align; 476 req.alloc_req.bound = bound; 477 req.alloc_req.contig = contig; 478 req.alloc_req.flags = flags; 479 req.alloc_req.elt_size = elt_size; 480 req.alloc_req.page_sz = pg_sz; 481 req.alloc_req.socket = socket; 482 req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps; 483 484 req_result = request_to_primary(&req); 485 486 if (req_result != 0) 487 return -1; 488 489 if (req.result != REQ_RESULT_SUCCESS) 490 return -1; 491 492 return 0; 493 } 494 495 static int 496 try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, 497 int socket, unsigned int flags, size_t align, size_t bound, 498 bool contig) 499 { 500 int ret; 501 502 rte_mcfg_mem_write_lock(); 503 504 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 505 ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket, 506 flags, align, bound, contig); 507 } else { 508 ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket, 509 flags, align, bound, contig); 510 } 511 512 rte_mcfg_mem_write_unlock(); 513 return ret; 514 } 515 516 static int 517 compare_pagesz(const void *a, const void *b) 518 { 519 const struct rte_memseg_list * const*mpa = a; 520 const struct rte_memseg_list * const*mpb = b; 521 const struct rte_memseg_list *msla = *mpa; 522 const struct rte_memseg_list *mslb = *mpb; 523 uint64_t pg_sz_a = msla->page_sz; 524 uint64_t pg_sz_b = mslb->page_sz; 525 526 if (pg_sz_a < pg_sz_b) 527 return -1; 528 if (pg_sz_a > pg_sz_b) 529 return 1; 530 return 0; 531 } 532 533 static int 534 alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, 535 unsigned int flags, size_t align, size_t bound, bool contig) 536 { 537 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 538 struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS]; 539 struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS]; 540 uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS]; 541 uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS]; 542 uint64_t prev_pg_sz; 543 int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz; 544 bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0; 545 unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 546 void *ret; 547 548 memset(requested_msls, 0, sizeof(requested_msls)); 549 memset(other_msls, 0, sizeof(other_msls)); 550 memset(requested_pg_sz, 0, sizeof(requested_pg_sz)); 551 memset(other_pg_sz, 0, sizeof(other_pg_sz)); 552 553 /* 554 * go through memseg list and take note of all the page sizes available, 555 * and if any of them were specifically requested by the user. 556 */ 557 n_requested_msls = 0; 558 n_other_msls = 0; 559 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 560 struct rte_memseg_list *msl = &mcfg->memsegs[i]; 561 562 if (msl->socket_id != socket) 563 continue; 564 565 if (msl->base_va == NULL) 566 continue; 567 568 /* if pages of specific size were requested */ 569 if (size_flags != 0 && check_hugepage_sz(size_flags, 570 msl->page_sz)) 571 requested_msls[n_requested_msls++] = msl; 572 else if (size_flags == 0 || size_hint) 573 other_msls[n_other_msls++] = msl; 574 } 575 576 /* sort the lists, smallest first */ 577 qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]), 578 compare_pagesz); 579 qsort(other_msls, n_other_msls, sizeof(other_msls[0]), 580 compare_pagesz); 581 582 /* now, extract page sizes we are supposed to try */ 583 prev_pg_sz = 0; 584 n_requested_pg_sz = 0; 585 for (i = 0; i < n_requested_msls; i++) { 586 uint64_t pg_sz = requested_msls[i]->page_sz; 587 588 if (prev_pg_sz != pg_sz) { 589 requested_pg_sz[n_requested_pg_sz++] = pg_sz; 590 prev_pg_sz = pg_sz; 591 } 592 } 593 prev_pg_sz = 0; 594 n_other_pg_sz = 0; 595 for (i = 0; i < n_other_msls; i++) { 596 uint64_t pg_sz = other_msls[i]->page_sz; 597 598 if (prev_pg_sz != pg_sz) { 599 other_pg_sz[n_other_pg_sz++] = pg_sz; 600 prev_pg_sz = pg_sz; 601 } 602 } 603 604 /* finally, try allocating memory of specified page sizes, starting from 605 * the smallest sizes 606 */ 607 for (i = 0; i < n_requested_pg_sz; i++) { 608 uint64_t pg_sz = requested_pg_sz[i]; 609 610 /* 611 * do not pass the size hint here, as user expects other page 612 * sizes first, before resorting to best effort allocation. 613 */ 614 if (!try_expand_heap(heap, pg_sz, size, socket, size_flags, 615 align, bound, contig)) 616 return 0; 617 } 618 if (n_other_pg_sz == 0) 619 return -1; 620 621 /* now, check if we can reserve anything with size hint */ 622 ret = find_suitable_element(heap, size, flags, align, bound, contig); 623 if (ret != NULL) 624 return 0; 625 626 /* 627 * we still couldn't reserve memory, so try expanding heap with other 628 * page sizes, if there are any 629 */ 630 for (i = 0; i < n_other_pg_sz; i++) { 631 uint64_t pg_sz = other_pg_sz[i]; 632 633 if (!try_expand_heap(heap, pg_sz, size, socket, flags, 634 align, bound, contig)) 635 return 0; 636 } 637 return -1; 638 } 639 640 /* this will try lower page sizes first */ 641 static void * 642 malloc_heap_alloc_on_heap_id(size_t size, unsigned int heap_id, unsigned int flags, size_t align, 643 size_t bound, bool contig) 644 { 645 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 646 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 647 unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY; 648 int socket_id; 649 void *ret; 650 const struct internal_config *internal_conf = 651 eal_get_internal_configuration(); 652 653 rte_spinlock_lock(&(heap->lock)); 654 655 align = align == 0 ? 1 : align; 656 657 /* for legacy mode, try once and with all flags */ 658 if (internal_conf->legacy_mem) { 659 ret = heap_alloc(heap, size, flags, align, bound, contig); 660 goto alloc_unlock; 661 } 662 663 /* 664 * we do not pass the size hint here, because even if allocation fails, 665 * we may still be able to allocate memory from appropriate page sizes, 666 * we just need to request more memory first. 667 */ 668 669 socket_id = rte_socket_id_by_idx(heap_id); 670 /* 671 * if socket ID is negative, we cannot find a socket ID for this heap - 672 * which means it's an external heap. those can have unexpected page 673 * sizes, so if the user asked to allocate from there - assume user 674 * knows what they're doing, and allow allocating from there with any 675 * page size flags. 676 */ 677 if (socket_id < 0) 678 size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY; 679 680 ret = heap_alloc(heap, size, size_flags, align, bound, contig); 681 if (ret != NULL) 682 goto alloc_unlock; 683 684 /* if socket ID is invalid, this is an external heap */ 685 if (socket_id < 0) 686 goto alloc_unlock; 687 688 if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align, 689 bound, contig)) { 690 ret = heap_alloc(heap, size, flags, align, bound, contig); 691 692 /* this should have succeeded */ 693 if (ret == NULL) 694 EAL_LOG(ERR, "Error allocating from heap"); 695 } 696 alloc_unlock: 697 rte_spinlock_unlock(&(heap->lock)); 698 return ret; 699 } 700 701 static unsigned int 702 malloc_get_numa_socket(void) 703 { 704 const struct internal_config *conf = eal_get_internal_configuration(); 705 unsigned int socket_id = rte_socket_id(); 706 unsigned int idx; 707 708 if (socket_id != (unsigned int)SOCKET_ID_ANY) 709 return socket_id; 710 711 /* for control threads, return first socket where memory is available */ 712 for (idx = 0; idx < rte_socket_count(); idx++) { 713 socket_id = rte_socket_id_by_idx(idx); 714 if (conf->socket_mem[socket_id] != 0) 715 return socket_id; 716 } 717 /* We couldn't quickly find a NUMA node where memory was available, 718 * so fall back to using main lcore socket ID. 719 */ 720 socket_id = rte_lcore_to_socket_id(rte_get_main_lcore()); 721 /* Main lcore socket ID may be SOCKET_ID_ANY 722 * when main lcore thread is affinitized to multiple NUMA nodes. 723 */ 724 if (socket_id != (unsigned int)SOCKET_ID_ANY) 725 return socket_id; 726 /* Failed to find meaningful socket ID, so use the first one available. */ 727 return rte_socket_id_by_idx(0); 728 } 729 730 void * 731 malloc_heap_alloc(size_t size, int socket_arg, unsigned int flags, 732 size_t align, size_t bound, bool contig) 733 { 734 int socket, heap_id, i; 735 void *ret; 736 737 /* return NULL if size is 0 or alignment is not power-of-2 */ 738 if (size == 0 || (align && !rte_is_power_of_2(align))) 739 return NULL; 740 741 if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES) 742 socket_arg = SOCKET_ID_ANY; 743 744 if (socket_arg == SOCKET_ID_ANY) 745 socket = malloc_get_numa_socket(); 746 else 747 socket = socket_arg; 748 749 /* turn socket ID into heap ID */ 750 heap_id = malloc_socket_to_heap_id(socket); 751 /* if heap id is negative, socket ID was invalid */ 752 if (heap_id < 0) 753 return NULL; 754 755 ret = malloc_heap_alloc_on_heap_id(size, heap_id, flags, align, bound, contig); 756 if (ret != NULL || socket_arg != SOCKET_ID_ANY) 757 return ret; 758 759 /* try other heaps. we are only iterating through native DPDK sockets, 760 * so external heaps won't be included. 761 */ 762 for (i = 0; i < (int) rte_socket_count(); i++) { 763 if (i == heap_id) 764 continue; 765 ret = malloc_heap_alloc_on_heap_id(size, i, flags, align, bound, contig); 766 if (ret != NULL) 767 return ret; 768 } 769 return NULL; 770 } 771 772 static void * 773 heap_alloc_biggest_on_heap_id(unsigned int heap_id, 774 unsigned int flags, size_t align, bool contig) 775 { 776 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 777 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; 778 void *ret; 779 780 rte_spinlock_lock(&(heap->lock)); 781 782 align = align == 0 ? 1 : align; 783 784 ret = heap_alloc_biggest(heap, flags, align, contig); 785 786 rte_spinlock_unlock(&(heap->lock)); 787 788 return ret; 789 } 790 791 void * 792 malloc_heap_alloc_biggest(int socket_arg, unsigned int flags, size_t align, bool contig) 793 { 794 int socket, i, cur_socket, heap_id; 795 void *ret; 796 797 /* return NULL if align is not power-of-2 */ 798 if ((align && !rte_is_power_of_2(align))) 799 return NULL; 800 801 if (!rte_eal_has_hugepages()) 802 socket_arg = SOCKET_ID_ANY; 803 804 if (socket_arg == SOCKET_ID_ANY) 805 socket = malloc_get_numa_socket(); 806 else 807 socket = socket_arg; 808 809 /* turn socket ID into heap ID */ 810 heap_id = malloc_socket_to_heap_id(socket); 811 /* if heap id is negative, socket ID was invalid */ 812 if (heap_id < 0) 813 return NULL; 814 815 ret = heap_alloc_biggest_on_heap_id(heap_id, flags, align, contig); 816 if (ret != NULL || socket_arg != SOCKET_ID_ANY) 817 return ret; 818 819 /* try other heaps */ 820 for (i = 0; i < (int) rte_socket_count(); i++) { 821 cur_socket = rte_socket_id_by_idx(i); 822 if (cur_socket == socket) 823 continue; 824 ret = heap_alloc_biggest_on_heap_id(i, flags, align, contig); 825 if (ret != NULL) 826 return ret; 827 } 828 return NULL; 829 } 830 831 /* this function is exposed in malloc_mp.h */ 832 int 833 malloc_heap_free_pages(void *aligned_start, size_t aligned_len) 834 { 835 int n_segs, seg_idx, max_seg_idx; 836 struct rte_memseg_list *msl; 837 size_t page_sz; 838 839 msl = rte_mem_virt2memseg_list(aligned_start); 840 if (msl == NULL) 841 return -1; 842 843 page_sz = (size_t)msl->page_sz; 844 n_segs = aligned_len / page_sz; 845 seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz; 846 max_seg_idx = seg_idx + n_segs; 847 848 for (; seg_idx < max_seg_idx; seg_idx++) { 849 struct rte_memseg *ms; 850 851 ms = rte_fbarray_get(&msl->memseg_arr, seg_idx); 852 eal_memalloc_free_seg(ms); 853 } 854 return 0; 855 } 856 857 int 858 malloc_heap_free(struct malloc_elem *elem) 859 { 860 struct malloc_heap *heap; 861 void *start, *aligned_start, *end, *aligned_end; 862 size_t len, aligned_len, page_sz; 863 struct rte_memseg_list *msl; 864 unsigned int i, n_segs, before_space, after_space; 865 int ret; 866 bool unmapped = false; 867 const struct internal_config *internal_conf = 868 eal_get_internal_configuration(); 869 870 if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 871 return -1; 872 873 asan_clear_redzone(elem); 874 875 /* elem may be merged with previous element, so keep heap address */ 876 heap = elem->heap; 877 msl = elem->msl; 878 page_sz = (size_t)msl->page_sz; 879 880 rte_spinlock_lock(&(heap->lock)); 881 882 void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad); 883 size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad; 884 885 /* mark element as free */ 886 elem->state = ELEM_FREE; 887 888 elem = malloc_elem_free(elem); 889 890 /* anything after this is a bonus */ 891 ret = 0; 892 893 /* ...of which we can't avail if we are in legacy mode, or if this is an 894 * externally allocated segment. 895 */ 896 if (internal_conf->legacy_mem || (msl->external > 0)) 897 goto free_unlock; 898 899 /* check if we can free any memory back to the system */ 900 if (elem->size < page_sz) 901 goto free_unlock; 902 903 /* if user requested to match allocations, the sizes must match - if not, 904 * we will defer freeing these hugepages until the entire original allocation 905 * can be freed 906 */ 907 if (internal_conf->match_allocations && elem->size != elem->orig_size) 908 goto free_unlock; 909 910 /* probably, but let's make sure, as we may not be using up full page */ 911 start = elem; 912 len = elem->size; 913 aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz); 914 end = RTE_PTR_ADD(elem, len); 915 aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz); 916 917 aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 918 919 /* can't free anything */ 920 if (aligned_len < page_sz) 921 goto free_unlock; 922 923 /* we can free something. however, some of these pages may be marked as 924 * unfreeable, so also check that as well 925 */ 926 n_segs = aligned_len / page_sz; 927 for (i = 0; i < n_segs; i++) { 928 const struct rte_memseg *tmp = 929 rte_mem_virt2memseg(aligned_start, msl); 930 931 if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { 932 /* this is an unfreeable segment, so move start */ 933 aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len); 934 } 935 } 936 937 /* recalculate length and number of segments */ 938 aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start); 939 n_segs = aligned_len / page_sz; 940 941 /* check if we can still free some pages */ 942 if (n_segs == 0) 943 goto free_unlock; 944 945 /* We're not done yet. We also have to check if by freeing space we will 946 * be leaving free elements that are too small to store new elements. 947 * Check if we have enough space in the beginning and at the end, or if 948 * start/end are exactly page aligned. 949 */ 950 before_space = RTE_PTR_DIFF(aligned_start, elem); 951 after_space = RTE_PTR_DIFF(end, aligned_end); 952 if (before_space != 0 && 953 before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 954 /* There is not enough space before start, but we may be able to 955 * move the start forward by one page. 956 */ 957 if (n_segs == 1) 958 goto free_unlock; 959 960 /* move start */ 961 aligned_start = RTE_PTR_ADD(aligned_start, page_sz); 962 aligned_len -= page_sz; 963 n_segs--; 964 } 965 if (after_space != 0 && after_space < 966 MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 967 /* There is not enough space after end, but we may be able to 968 * move the end backwards by one page. 969 */ 970 if (n_segs == 1) 971 goto free_unlock; 972 973 /* move end */ 974 aligned_end = RTE_PTR_SUB(aligned_end, page_sz); 975 aligned_len -= page_sz; 976 n_segs--; 977 } 978 979 /* now we can finally free us some pages */ 980 981 rte_mcfg_mem_write_lock(); 982 983 /* 984 * we allow secondary processes to clear the heap of this allocated 985 * memory because it is safe to do so, as even if notifications about 986 * unmapped pages don't make it to other processes, heap is shared 987 * across all processes, and will become empty of this memory anyway, 988 * and nothing can allocate it back unless primary process will be able 989 * to deliver allocation message to every single running process. 990 */ 991 992 malloc_elem_free_list_remove(elem); 993 994 malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len); 995 996 heap->total_size -= aligned_len; 997 998 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 999 /* notify user about changes in memory map */ 1000 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, 1001 aligned_start, aligned_len); 1002 1003 /* don't care if any of this fails */ 1004 malloc_heap_free_pages(aligned_start, aligned_len); 1005 1006 request_sync(); 1007 } else { 1008 struct malloc_mp_req req; 1009 1010 memset(&req, 0, sizeof(req)); 1011 1012 req.t = REQ_TYPE_FREE; 1013 req.free_req.addr = aligned_start; 1014 req.free_req.len = aligned_len; 1015 1016 /* 1017 * we request primary to deallocate pages, but we don't do it 1018 * in this thread. instead, we notify primary that we would like 1019 * to deallocate pages, and this process will receive another 1020 * request (in parallel) that will do it for us on another 1021 * thread. 1022 * 1023 * we also don't really care if this succeeds - the data is 1024 * already removed from the heap, so it is, for all intents and 1025 * purposes, hidden from the rest of DPDK even if some other 1026 * process (including this one) may have these pages mapped. 1027 * 1028 * notifications about deallocated memory happen during sync. 1029 */ 1030 request_to_primary(&req); 1031 } 1032 1033 /* we didn't exit early, meaning we have unmapped some pages */ 1034 unmapped = true; 1035 1036 EAL_LOG(DEBUG, "Heap on socket %d was shrunk by %zdMB", 1037 msl->socket_id, aligned_len >> 20ULL); 1038 1039 rte_mcfg_mem_write_unlock(); 1040 free_unlock: 1041 asan_set_freezone(asan_ptr, asan_data_len); 1042 1043 /* if we unmapped some memory, we need to do additional work for ASan */ 1044 if (unmapped) { 1045 void *asan_end = RTE_PTR_ADD(asan_ptr, asan_data_len); 1046 void *aligned_end = RTE_PTR_ADD(aligned_start, aligned_len); 1047 void *aligned_trailer = RTE_PTR_SUB(aligned_start, 1048 MALLOC_ELEM_TRAILER_LEN); 1049 1050 /* 1051 * There was a memory area that was unmapped. This memory area 1052 * will have to be marked as available for ASan, because we will 1053 * want to use it next time it gets mapped again. The OS memory 1054 * protection should trigger a fault on access to these areas 1055 * anyway, so we are not giving up any protection. 1056 */ 1057 asan_set_zone(aligned_start, aligned_len, 0x00); 1058 1059 /* 1060 * ...however, when we unmap pages, we create new free elements 1061 * which might have been marked as "freed" with an earlier 1062 * `asan_set_freezone` call. So, if there is an area past the 1063 * unmapped space that was marked as freezone for ASan, we need 1064 * to mark the malloc header as available. 1065 */ 1066 if (asan_end > aligned_end) 1067 asan_set_zone(aligned_end, MALLOC_ELEM_HEADER_LEN, 0x00); 1068 1069 /* if there's space before unmapped memory, mark as available */ 1070 if (asan_ptr < aligned_start) 1071 asan_set_zone(aligned_trailer, MALLOC_ELEM_TRAILER_LEN, 0x00); 1072 } 1073 1074 rte_spinlock_unlock(&(heap->lock)); 1075 return ret; 1076 } 1077 1078 int 1079 malloc_heap_resize(struct malloc_elem *elem, size_t size) 1080 { 1081 int ret; 1082 1083 if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) 1084 return -1; 1085 1086 rte_spinlock_lock(&(elem->heap->lock)); 1087 1088 ret = malloc_elem_resize(elem, size); 1089 1090 rte_spinlock_unlock(&(elem->heap->lock)); 1091 1092 return ret; 1093 } 1094 1095 /* 1096 * Function to retrieve data for a given heap 1097 */ 1098 int 1099 malloc_heap_get_stats(struct malloc_heap *heap, 1100 struct rte_malloc_socket_stats *socket_stats) 1101 { 1102 size_t idx; 1103 struct malloc_elem *elem; 1104 1105 rte_spinlock_lock(&heap->lock); 1106 1107 /* Initialise variables for heap */ 1108 socket_stats->free_count = 0; 1109 socket_stats->heap_freesz_bytes = 0; 1110 socket_stats->greatest_free_size = 0; 1111 1112 /* Iterate through free list */ 1113 for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { 1114 for (elem = LIST_FIRST(&heap->free_head[idx]); 1115 !!elem; elem = LIST_NEXT(elem, free_list)) 1116 { 1117 socket_stats->free_count++; 1118 socket_stats->heap_freesz_bytes += elem->size; 1119 if (elem->size > socket_stats->greatest_free_size) 1120 socket_stats->greatest_free_size = elem->size; 1121 } 1122 } 1123 /* Get stats on overall heap and allocated memory on this heap */ 1124 socket_stats->heap_totalsz_bytes = heap->total_size; 1125 socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - 1126 socket_stats->heap_freesz_bytes); 1127 socket_stats->alloc_count = heap->alloc_count; 1128 1129 rte_spinlock_unlock(&heap->lock); 1130 return 0; 1131 } 1132 1133 /* 1134 * Function to retrieve data for a given heap 1135 */ 1136 void 1137 malloc_heap_dump(struct malloc_heap *heap, FILE *f) 1138 { 1139 struct malloc_elem *elem; 1140 1141 rte_spinlock_lock(&heap->lock); 1142 1143 fprintf(f, "Heap size: 0x%zx\n", heap->total_size); 1144 fprintf(f, "Heap alloc count: %u\n", heap->alloc_count); 1145 1146 elem = heap->first; 1147 while (elem) { 1148 malloc_elem_dump(elem, f); 1149 elem = elem->next; 1150 } 1151 1152 rte_spinlock_unlock(&heap->lock); 1153 } 1154 1155 static int 1156 destroy_elem(struct malloc_elem *elem, size_t len) 1157 { 1158 struct malloc_heap *heap = elem->heap; 1159 1160 /* notify all subscribers that a memory area is going to be removed */ 1161 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len); 1162 1163 /* this element can be removed */ 1164 malloc_elem_free_list_remove(elem); 1165 malloc_elem_hide_region(elem, elem, len); 1166 1167 heap->total_size -= len; 1168 1169 memset(elem, 0, sizeof(*elem)); 1170 1171 return 0; 1172 } 1173 1174 struct rte_memseg_list * 1175 malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], 1176 unsigned int n_pages, size_t page_sz, const char *seg_name, 1177 unsigned int socket_id) 1178 { 1179 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1180 char fbarray_name[RTE_FBARRAY_NAME_LEN]; 1181 struct rte_memseg_list *msl = NULL; 1182 struct rte_fbarray *arr; 1183 size_t seg_len = n_pages * page_sz; 1184 unsigned int i; 1185 1186 /* first, find a free memseg list */ 1187 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { 1188 struct rte_memseg_list *tmp = &mcfg->memsegs[i]; 1189 if (tmp->base_va == NULL) { 1190 msl = tmp; 1191 break; 1192 } 1193 } 1194 if (msl == NULL) { 1195 EAL_LOG(ERR, "Couldn't find empty memseg list"); 1196 rte_errno = ENOSPC; 1197 return NULL; 1198 } 1199 1200 snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p", 1201 seg_name, va_addr); 1202 1203 /* create the backing fbarray */ 1204 if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages, 1205 sizeof(struct rte_memseg)) < 0) { 1206 EAL_LOG(ERR, "Couldn't create fbarray backing the memseg list"); 1207 return NULL; 1208 } 1209 arr = &msl->memseg_arr; 1210 1211 /* fbarray created, fill it up */ 1212 for (i = 0; i < n_pages; i++) { 1213 struct rte_memseg *ms; 1214 1215 rte_fbarray_set_used(arr, i); 1216 ms = rte_fbarray_get(arr, i); 1217 ms->addr = RTE_PTR_ADD(va_addr, i * page_sz); 1218 ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i]; 1219 ms->hugepage_sz = page_sz; 1220 ms->len = page_sz; 1221 ms->nchannel = rte_memory_get_nchannel(); 1222 ms->nrank = rte_memory_get_nrank(); 1223 ms->socket_id = socket_id; 1224 } 1225 1226 /* set up the memseg list */ 1227 msl->base_va = va_addr; 1228 msl->page_sz = page_sz; 1229 msl->socket_id = socket_id; 1230 msl->len = seg_len; 1231 msl->version = 0; 1232 msl->external = 1; 1233 1234 return msl; 1235 } 1236 1237 struct extseg_walk_arg { 1238 void *va_addr; 1239 size_t len; 1240 struct rte_memseg_list *msl; 1241 }; 1242 1243 static int 1244 extseg_walk(const struct rte_memseg_list *msl, void *arg) 1245 { 1246 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1247 struct extseg_walk_arg *wa = arg; 1248 1249 if (msl->base_va == wa->va_addr && msl->len == wa->len) { 1250 unsigned int found_idx; 1251 1252 /* msl is const */ 1253 found_idx = msl - mcfg->memsegs; 1254 wa->msl = &mcfg->memsegs[found_idx]; 1255 return 1; 1256 } 1257 return 0; 1258 } 1259 1260 struct rte_memseg_list * 1261 malloc_heap_find_external_seg(void *va_addr, size_t len) 1262 { 1263 struct extseg_walk_arg wa; 1264 int res; 1265 1266 wa.va_addr = va_addr; 1267 wa.len = len; 1268 1269 res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa); 1270 1271 if (res != 1) { 1272 /* 0 means nothing was found, -1 shouldn't happen */ 1273 if (res == 0) 1274 rte_errno = ENOENT; 1275 return NULL; 1276 } 1277 return wa.msl; 1278 } 1279 1280 int 1281 malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) 1282 { 1283 /* destroy the fbarray backing this memory */ 1284 if (rte_fbarray_destroy(&msl->memseg_arr) < 0) 1285 return -1; 1286 1287 /* reset the memseg list */ 1288 memset(msl, 0, sizeof(*msl)); 1289 1290 return 0; 1291 } 1292 1293 int 1294 malloc_heap_add_external_memory(struct malloc_heap *heap, 1295 struct rte_memseg_list *msl) 1296 { 1297 /* erase contents of new memory */ 1298 memset(msl->base_va, 0, msl->len); 1299 1300 /* now, add newly minted memory to the malloc heap */ 1301 malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false); 1302 1303 heap->total_size += msl->len; 1304 1305 /* all done! */ 1306 EAL_LOG(DEBUG, "Added segment for heap %s starting at %p", 1307 heap->name, msl->base_va); 1308 1309 /* notify all subscribers that a new memory area has been added */ 1310 eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, 1311 msl->base_va, msl->len); 1312 1313 return 0; 1314 } 1315 1316 int 1317 malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, 1318 size_t len) 1319 { 1320 struct malloc_elem *elem = heap->first; 1321 1322 /* find element with specified va address */ 1323 while (elem != NULL && elem != va_addr) { 1324 elem = elem->next; 1325 /* stop if we've blown past our VA */ 1326 if (elem > (struct malloc_elem *)va_addr) { 1327 rte_errno = ENOENT; 1328 return -1; 1329 } 1330 } 1331 /* check if element was found */ 1332 if (elem == NULL || elem->msl->len != len) { 1333 rte_errno = ENOENT; 1334 return -1; 1335 } 1336 /* if element's size is not equal to segment len, segment is busy */ 1337 if (elem->state == ELEM_BUSY || elem->size != len) { 1338 rte_errno = EBUSY; 1339 return -1; 1340 } 1341 return destroy_elem(elem, len); 1342 } 1343 1344 int 1345 malloc_heap_create(struct malloc_heap *heap, const char *heap_name) 1346 { 1347 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1348 uint32_t next_socket_id = mcfg->next_socket_id; 1349 1350 /* prevent overflow. did you really create 2 billion heaps??? */ 1351 if (next_socket_id > INT32_MAX) { 1352 EAL_LOG(ERR, "Cannot assign new socket ID's"); 1353 rte_errno = ENOSPC; 1354 return -1; 1355 } 1356 1357 /* initialize empty heap */ 1358 heap->alloc_count = 0; 1359 heap->first = NULL; 1360 heap->last = NULL; 1361 LIST_INIT(heap->free_head); 1362 rte_spinlock_init(&heap->lock); 1363 heap->total_size = 0; 1364 heap->socket_id = next_socket_id; 1365 1366 /* we hold a global mem hotplug writelock, so it's safe to increment */ 1367 mcfg->next_socket_id++; 1368 1369 /* set up name */ 1370 strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 1371 return 0; 1372 } 1373 1374 int 1375 malloc_heap_destroy(struct malloc_heap *heap) 1376 { 1377 if (heap->alloc_count != 0) { 1378 EAL_LOG(ERR, "Heap is still in use"); 1379 rte_errno = EBUSY; 1380 return -1; 1381 } 1382 if (heap->first != NULL || heap->last != NULL) { 1383 EAL_LOG(ERR, "Heap still contains memory segments"); 1384 rte_errno = EBUSY; 1385 return -1; 1386 } 1387 if (heap->total_size != 0) 1388 EAL_LOG(ERR, "Total size not zero, heap is likely corrupt"); 1389 1390 /* Reset all of the heap but the (hold) lock so caller can release it. */ 1391 RTE_BUILD_BUG_ON(offsetof(struct malloc_heap, lock) != 0); 1392 memset(RTE_PTR_ADD(heap, sizeof(heap->lock)), 0, 1393 sizeof(*heap) - sizeof(heap->lock)); 1394 1395 return 0; 1396 } 1397 1398 int 1399 rte_eal_malloc_heap_init(void) 1400 { 1401 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 1402 unsigned int i; 1403 const struct internal_config *internal_conf = 1404 eal_get_internal_configuration(); 1405 1406 if (internal_conf->match_allocations) 1407 EAL_LOG(DEBUG, "Hugepages will be freed exactly as allocated."); 1408 1409 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1410 /* assign min socket ID to external heaps */ 1411 mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID; 1412 1413 /* assign names to default DPDK heaps */ 1414 for (i = 0; i < rte_socket_count(); i++) { 1415 struct malloc_heap *heap = &mcfg->malloc_heaps[i]; 1416 char heap_name[RTE_HEAP_NAME_MAX_LEN]; 1417 int socket_id = rte_socket_id_by_idx(i); 1418 1419 snprintf(heap_name, sizeof(heap_name), 1420 "socket_%i", socket_id); 1421 strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN); 1422 heap->socket_id = socket_id; 1423 } 1424 } 1425 1426 if (register_mp_requests()) { 1427 EAL_LOG(ERR, "Couldn't register malloc multiprocess actions"); 1428 return -1; 1429 } 1430 1431 return 0; 1432 } 1433 1434 int rte_eal_malloc_heap_populate(void) 1435 { 1436 /* mem hotplug is unlocked here. it's safe for primary as no requests can 1437 * even come before primary itself is fully initialized, and secondaries 1438 * do not need to initialize the heap. 1439 */ 1440 1441 /* secondary process does not need to initialize anything */ 1442 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1443 return 0; 1444 1445 /* add all IOVA-contiguous areas to the heap */ 1446 return rte_memseg_contig_walk(malloc_add_seg, NULL); 1447 } 1448 1449 void 1450 rte_eal_malloc_heap_cleanup(void) 1451 { 1452 unregister_mp_requests(); 1453 } 1454