1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 #include <inttypes.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <unistd.h> 10 #include <sys/queue.h> 11 12 #include <rte_memory.h> 13 #include <rte_eal.h> 14 #include <rte_launch.h> 15 #include <rte_per_lcore.h> 16 #include <rte_lcore.h> 17 #include <rte_debug.h> 18 #include <rte_common.h> 19 #include <rte_spinlock.h> 20 21 #include "eal_private.h" 22 #include "eal_internal_cfg.h" 23 #include "eal_memalloc.h" 24 #include "malloc_elem.h" 25 #include "malloc_heap.h" 26 27 /* 28 * If debugging is enabled, freed memory is set to poison value 29 * to catch buggy programs. Otherwise, freed memory is set to zero 30 * to avoid having to zero in zmalloc 31 */ 32 #ifdef RTE_MALLOC_DEBUG 33 #define MALLOC_POISON 0x6b 34 #else 35 #define MALLOC_POISON 0 36 #endif 37 38 size_t 39 malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) 40 { 41 void *cur_page, *contig_seg_start, *page_end, *cur_seg_end; 42 void *data_start, *data_end; 43 rte_iova_t expected_iova; 44 struct rte_memseg *ms; 45 size_t page_sz, cur, max; 46 const struct internal_config *internal_conf = 47 eal_get_internal_configuration(); 48 49 page_sz = (size_t)elem->msl->page_sz; 50 data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); 51 data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN); 52 /* segment must start after header and with specified alignment */ 53 contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align); 54 55 /* return if aligned address is already out of malloc element */ 56 if (contig_seg_start > data_end) 57 return 0; 58 59 /* if we're in IOVA as VA mode, or if we're in legacy mode with 60 * hugepages, all elements are IOVA-contiguous. however, we can only 61 * make these assumptions about internal memory - externally allocated 62 * segments have to be checked. 63 */ 64 if (!elem->msl->external && 65 (rte_eal_iova_mode() == RTE_IOVA_VA || 66 (internal_conf->legacy_mem && 67 rte_eal_has_hugepages()))) 68 return RTE_PTR_DIFF(data_end, contig_seg_start); 69 70 cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz); 71 ms = rte_mem_virt2memseg(cur_page, elem->msl); 72 73 /* do first iteration outside the loop */ 74 page_end = RTE_PTR_ADD(cur_page, page_sz); 75 cur_seg_end = RTE_MIN(page_end, data_end); 76 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) - 77 MALLOC_ELEM_TRAILER_LEN; 78 max = cur; 79 expected_iova = ms->iova + page_sz; 80 /* memsegs are contiguous in memory */ 81 ms++; 82 83 cur_page = RTE_PTR_ADD(cur_page, page_sz); 84 85 while (cur_page < data_end) { 86 page_end = RTE_PTR_ADD(cur_page, page_sz); 87 cur_seg_end = RTE_MIN(page_end, data_end); 88 89 /* reset start of contiguous segment if unexpected iova */ 90 if (ms->iova != expected_iova) { 91 /* next contiguous segment must start at specified 92 * alignment. 93 */ 94 contig_seg_start = RTE_PTR_ALIGN(cur_page, align); 95 /* new segment start may be on a different page, so find 96 * the page and skip to next iteration to make sure 97 * we're not blowing past data end. 98 */ 99 ms = rte_mem_virt2memseg(contig_seg_start, elem->msl); 100 cur_page = ms->addr; 101 /* don't trigger another recalculation */ 102 expected_iova = ms->iova; 103 continue; 104 } 105 /* cur_seg_end ends on a page boundary or on data end. if we're 106 * looking at data end, then malloc trailer is already included 107 * in the calculations. if we're looking at page end, then we 108 * know there's more data past this page and thus there's space 109 * for malloc element trailer, so don't count it here. 110 */ 111 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start); 112 /* update max if cur value is bigger */ 113 if (cur > max) 114 max = cur; 115 116 /* move to next page */ 117 cur_page = page_end; 118 expected_iova = ms->iova + page_sz; 119 /* memsegs are contiguous in memory */ 120 ms++; 121 } 122 123 return max; 124 } 125 126 /* 127 * Initialize a general malloc_elem header structure 128 */ 129 void 130 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, 131 struct rte_memseg_list *msl, size_t size, 132 struct malloc_elem *orig_elem, size_t orig_size) 133 { 134 elem->heap = heap; 135 elem->msl = msl; 136 elem->prev = NULL; 137 elem->next = NULL; 138 memset(&elem->free_list, 0, sizeof(elem->free_list)); 139 elem->state = ELEM_FREE; 140 elem->size = size; 141 elem->pad = 0; 142 elem->orig_elem = orig_elem; 143 elem->orig_size = orig_size; 144 set_header(elem); 145 set_trailer(elem); 146 } 147 148 void 149 malloc_elem_insert(struct malloc_elem *elem) 150 { 151 struct malloc_elem *prev_elem, *next_elem; 152 struct malloc_heap *heap = elem->heap; 153 154 /* first and last elements must be both NULL or both non-NULL */ 155 if ((heap->first == NULL) != (heap->last == NULL)) { 156 RTE_LOG(ERR, EAL, "Heap is probably corrupt\n"); 157 return; 158 } 159 160 if (heap->first == NULL && heap->last == NULL) { 161 /* if empty heap */ 162 heap->first = elem; 163 heap->last = elem; 164 prev_elem = NULL; 165 next_elem = NULL; 166 } else if (elem < heap->first) { 167 /* if lower than start */ 168 prev_elem = NULL; 169 next_elem = heap->first; 170 heap->first = elem; 171 } else if (elem > heap->last) { 172 /* if higher than end */ 173 prev_elem = heap->last; 174 next_elem = NULL; 175 heap->last = elem; 176 } else { 177 /* the new memory is somewhere between start and end */ 178 uint64_t dist_from_start, dist_from_end; 179 180 dist_from_end = RTE_PTR_DIFF(heap->last, elem); 181 dist_from_start = RTE_PTR_DIFF(elem, heap->first); 182 183 /* check which is closer, and find closest list entries */ 184 if (dist_from_start < dist_from_end) { 185 prev_elem = heap->first; 186 while (prev_elem->next < elem) 187 prev_elem = prev_elem->next; 188 next_elem = prev_elem->next; 189 } else { 190 next_elem = heap->last; 191 while (next_elem->prev > elem) 192 next_elem = next_elem->prev; 193 prev_elem = next_elem->prev; 194 } 195 } 196 197 /* insert new element */ 198 elem->prev = prev_elem; 199 elem->next = next_elem; 200 if (prev_elem) 201 prev_elem->next = elem; 202 if (next_elem) 203 next_elem->prev = elem; 204 } 205 206 /* 207 * Attempt to find enough physically contiguous memory in this block to store 208 * our data. Assume that element has at least enough space to fit in the data, 209 * so we just check the page addresses. 210 */ 211 static bool 212 elem_check_phys_contig(const struct rte_memseg_list *msl, 213 void *start, size_t size) 214 { 215 return eal_memalloc_is_contig(msl, start, size); 216 } 217 218 /* 219 * calculate the starting point of where data of the requested size 220 * and alignment would fit in the current element. If the data doesn't 221 * fit, return NULL. 222 */ 223 static void * 224 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, 225 size_t bound, bool contig) 226 { 227 size_t elem_size = elem->size; 228 229 /* 230 * we're allocating from the end, so adjust the size of element by 231 * alignment size. 232 */ 233 while (elem_size >= size) { 234 const size_t bmask = ~(bound - 1); 235 uintptr_t end_pt = (uintptr_t)elem + 236 elem_size - MALLOC_ELEM_TRAILER_LEN; 237 uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), 238 align); 239 uintptr_t new_elem_start; 240 241 /* check boundary */ 242 if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { 243 end_pt = RTE_ALIGN_FLOOR(end_pt, bound); 244 new_data_start = RTE_ALIGN_FLOOR((end_pt - size), 245 align); 246 end_pt = new_data_start + size; 247 248 if (((end_pt - 1) & bmask) != (new_data_start & bmask)) 249 return NULL; 250 } 251 252 new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; 253 254 /* if the new start point is before the exist start, 255 * it won't fit 256 */ 257 if (new_elem_start < (uintptr_t)elem) 258 return NULL; 259 260 if (contig) { 261 size_t new_data_size = end_pt - new_data_start; 262 263 /* 264 * if physical contiguousness was requested and we 265 * couldn't fit all data into one physically contiguous 266 * block, try again with lower addresses. 267 */ 268 if (!elem_check_phys_contig(elem->msl, 269 (void *)new_data_start, 270 new_data_size)) { 271 elem_size -= align; 272 continue; 273 } 274 } 275 return (void *)new_elem_start; 276 } 277 return NULL; 278 } 279 280 /* 281 * use elem_start_pt to determine if we get meet the size and 282 * alignment request from the current element 283 */ 284 int 285 malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, 286 size_t bound, bool contig) 287 { 288 return elem_start_pt(elem, size, align, bound, contig) != NULL; 289 } 290 291 /* 292 * split an existing element into two smaller elements at the given 293 * split_pt parameter. 294 */ 295 static void 296 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) 297 { 298 struct malloc_elem *next_elem = elem->next; 299 const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; 300 const size_t new_elem_size = elem->size - old_elem_size; 301 302 malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size, 303 elem->orig_elem, elem->orig_size); 304 split_pt->prev = elem; 305 split_pt->next = next_elem; 306 if (next_elem) 307 next_elem->prev = split_pt; 308 else 309 elem->heap->last = split_pt; 310 elem->next = split_pt; 311 elem->size = old_elem_size; 312 set_trailer(elem); 313 if (elem->pad) { 314 /* Update inner padding inner element size. */ 315 elem = RTE_PTR_ADD(elem, elem->pad); 316 elem->size = old_elem_size - elem->pad; 317 } 318 } 319 320 /* 321 * our malloc heap is a doubly linked list, so doubly remove our element. 322 */ 323 static void __rte_unused 324 remove_elem(struct malloc_elem *elem) 325 { 326 struct malloc_elem *next, *prev; 327 next = elem->next; 328 prev = elem->prev; 329 330 if (next) 331 next->prev = prev; 332 else 333 elem->heap->last = prev; 334 if (prev) 335 prev->next = next; 336 else 337 elem->heap->first = next; 338 339 elem->prev = NULL; 340 elem->next = NULL; 341 } 342 343 static int 344 next_elem_is_adjacent(struct malloc_elem *elem) 345 { 346 const struct internal_config *internal_conf = 347 eal_get_internal_configuration(); 348 349 return elem->next == RTE_PTR_ADD(elem, elem->size) && 350 elem->next->msl == elem->msl && 351 (!internal_conf->match_allocations || 352 elem->orig_elem == elem->next->orig_elem); 353 } 354 355 static int 356 prev_elem_is_adjacent(struct malloc_elem *elem) 357 { 358 const struct internal_config *internal_conf = 359 eal_get_internal_configuration(); 360 361 return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) && 362 elem->prev->msl == elem->msl && 363 (!internal_conf->match_allocations || 364 elem->orig_elem == elem->prev->orig_elem); 365 } 366 367 /* 368 * Given an element size, compute its freelist index. 369 * We free an element into the freelist containing similarly-sized elements. 370 * We try to allocate elements starting with the freelist containing 371 * similarly-sized elements, and if necessary, we search freelists 372 * containing larger elements. 373 * 374 * Example element size ranges for a heap with five free lists: 375 * heap->free_head[0] - (0 , 2^8] 376 * heap->free_head[1] - (2^8 , 2^10] 377 * heap->free_head[2] - (2^10 ,2^12] 378 * heap->free_head[3] - (2^12, 2^14] 379 * heap->free_head[4] - (2^14, MAX_SIZE] 380 */ 381 size_t 382 malloc_elem_free_list_index(size_t size) 383 { 384 #define MALLOC_MINSIZE_LOG2 8 385 #define MALLOC_LOG2_INCREMENT 2 386 387 size_t log2; 388 size_t index; 389 390 if (size <= (1UL << MALLOC_MINSIZE_LOG2)) 391 return 0; 392 393 /* Find next power of 2 >= size. */ 394 log2 = sizeof(size) * 8 - __builtin_clzl(size - 1); 395 396 /* Compute freelist index, based on log2(size). */ 397 index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / 398 MALLOC_LOG2_INCREMENT; 399 400 return index <= RTE_HEAP_NUM_FREELISTS - 1 ? 401 index : RTE_HEAP_NUM_FREELISTS - 1; 402 } 403 404 /* 405 * Add the specified element to its heap's free list. 406 */ 407 void 408 malloc_elem_free_list_insert(struct malloc_elem *elem) 409 { 410 size_t idx; 411 412 idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); 413 elem->state = ELEM_FREE; 414 LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); 415 } 416 417 /* 418 * Remove the specified element from its heap's free list. 419 */ 420 void 421 malloc_elem_free_list_remove(struct malloc_elem *elem) 422 { 423 LIST_REMOVE(elem, free_list); 424 } 425 426 /* 427 * reserve a block of data in an existing malloc_elem. If the malloc_elem 428 * is much larger than the data block requested, we split the element in two. 429 * This function is only called from malloc_heap_alloc so parameter checking 430 * is not done here, as it's done there previously. 431 */ 432 struct malloc_elem * 433 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, 434 size_t bound, bool contig) 435 { 436 struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound, 437 contig); 438 const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; 439 const size_t trailer_size = elem->size - old_elem_size - size - 440 MALLOC_ELEM_OVERHEAD; 441 442 malloc_elem_free_list_remove(elem); 443 444 if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 445 /* split it, too much free space after elem */ 446 struct malloc_elem *new_free_elem = 447 RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); 448 449 asan_clear_split_alloczone(new_free_elem); 450 451 split_elem(elem, new_free_elem); 452 malloc_elem_free_list_insert(new_free_elem); 453 454 if (elem == elem->heap->last) 455 elem->heap->last = new_free_elem; 456 } 457 458 if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 459 /* don't split it, pad the element instead */ 460 elem->state = ELEM_BUSY; 461 elem->pad = old_elem_size; 462 463 asan_clear_alloczone(elem); 464 465 /* put a dummy header in padding, to point to real element header */ 466 if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything 467 * is cache-line aligned */ 468 new_elem->pad = elem->pad; 469 new_elem->state = ELEM_PAD; 470 new_elem->size = elem->size - elem->pad; 471 set_header(new_elem); 472 } 473 474 return new_elem; 475 } 476 477 asan_clear_split_alloczone(new_elem); 478 479 /* we are going to split the element in two. The original element 480 * remains free, and the new element is the one allocated. 481 * Re-insert original element, in case its new size makes it 482 * belong on a different list. 483 */ 484 485 split_elem(elem, new_elem); 486 487 asan_clear_alloczone(new_elem); 488 489 new_elem->state = ELEM_BUSY; 490 malloc_elem_free_list_insert(elem); 491 492 return new_elem; 493 } 494 495 /* 496 * join two struct malloc_elem together. elem1 and elem2 must 497 * be contiguous in memory. 498 */ 499 static inline void 500 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) 501 { 502 struct malloc_elem *next = elem2->next; 503 elem1->size += elem2->size; 504 if (next) 505 next->prev = elem1; 506 else 507 elem1->heap->last = elem1; 508 elem1->next = next; 509 if (elem1->pad) { 510 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad); 511 inner->size = elem1->size - elem1->pad; 512 } 513 } 514 515 struct malloc_elem * 516 malloc_elem_join_adjacent_free(struct malloc_elem *elem) 517 { 518 /* 519 * check if next element exists, is adjacent and is free, if so join 520 * with it, need to remove from free list. 521 */ 522 if (elem->next != NULL && elem->next->state == ELEM_FREE && 523 next_elem_is_adjacent(elem)) { 524 void *erase; 525 size_t erase_len; 526 527 /* we will want to erase the trailer and header */ 528 erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN); 529 erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad; 530 531 /* remove from free list, join to this one */ 532 malloc_elem_free_list_remove(elem->next); 533 join_elem(elem, elem->next); 534 535 /* erase header, trailer and pad */ 536 memset(erase, MALLOC_POISON, erase_len); 537 } 538 539 /* 540 * check if prev element exists, is adjacent and is free, if so join 541 * with it, need to remove from free list. 542 */ 543 if (elem->prev != NULL && elem->prev->state == ELEM_FREE && 544 prev_elem_is_adjacent(elem)) { 545 struct malloc_elem *new_elem; 546 void *erase; 547 size_t erase_len; 548 549 /* we will want to erase trailer and header */ 550 erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN); 551 erase_len = MALLOC_ELEM_OVERHEAD + elem->pad; 552 553 /* remove from free list, join to this one */ 554 malloc_elem_free_list_remove(elem->prev); 555 556 new_elem = elem->prev; 557 join_elem(new_elem, elem); 558 559 /* erase header, trailer and pad */ 560 memset(erase, MALLOC_POISON, erase_len); 561 562 elem = new_elem; 563 } 564 565 return elem; 566 } 567 568 /* 569 * free a malloc_elem block by adding it to the free list. If the 570 * blocks either immediately before or immediately after newly freed block 571 * are also free, the blocks are merged together. 572 */ 573 struct malloc_elem * 574 malloc_elem_free(struct malloc_elem *elem) 575 { 576 void *ptr; 577 size_t data_len; 578 579 ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); 580 data_len = elem->size - MALLOC_ELEM_OVERHEAD; 581 582 elem = malloc_elem_join_adjacent_free(elem); 583 584 malloc_elem_free_list_insert(elem); 585 586 elem->pad = 0; 587 588 /* decrease heap's count of allocated elements */ 589 elem->heap->alloc_count--; 590 591 /* poison memory */ 592 memset(ptr, MALLOC_POISON, data_len); 593 594 return elem; 595 } 596 597 /* assume all checks were already done */ 598 void 599 malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len) 600 { 601 struct malloc_elem *hide_start, *hide_end, *prev, *next; 602 size_t len_before, len_after; 603 604 hide_start = start; 605 hide_end = RTE_PTR_ADD(start, len); 606 607 prev = elem->prev; 608 next = elem->next; 609 610 /* we cannot do anything with non-adjacent elements */ 611 if (next && next_elem_is_adjacent(elem)) { 612 len_after = RTE_PTR_DIFF(next, hide_end); 613 if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 614 asan_clear_split_alloczone(hide_end); 615 616 /* split after */ 617 split_elem(elem, hide_end); 618 619 malloc_elem_free_list_insert(hide_end); 620 } else if (len_after > 0) { 621 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); 622 return; 623 } 624 } 625 626 /* we cannot do anything with non-adjacent elements */ 627 if (prev && prev_elem_is_adjacent(elem)) { 628 len_before = RTE_PTR_DIFF(hide_start, elem); 629 if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 630 asan_clear_split_alloczone(hide_start); 631 632 /* split before */ 633 split_elem(elem, hide_start); 634 635 prev = elem; 636 elem = hide_start; 637 638 malloc_elem_free_list_insert(prev); 639 } else if (len_before > 0) { 640 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); 641 return; 642 } 643 } 644 645 asan_clear_alloczone(elem); 646 647 remove_elem(elem); 648 } 649 650 /* 651 * attempt to resize a malloc_elem by expanding into any free space 652 * immediately after it in memory. 653 */ 654 int 655 malloc_elem_resize(struct malloc_elem *elem, size_t size) 656 { 657 const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD; 658 659 /* if we request a smaller size, then always return ok */ 660 if (elem->size >= new_size) { 661 asan_clear_alloczone(elem); 662 return 0; 663 } 664 665 /* check if there is a next element, it's free and adjacent */ 666 if (!elem->next || elem->next->state != ELEM_FREE || 667 !next_elem_is_adjacent(elem)) 668 return -1; 669 if (elem->size + elem->next->size < new_size) 670 return -1; 671 672 /* we now know the element fits, so remove from free list, 673 * join the two 674 */ 675 malloc_elem_free_list_remove(elem->next); 676 join_elem(elem, elem->next); 677 678 if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) { 679 /* now we have a big block together. Lets cut it down a bit, by splitting */ 680 struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); 681 split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); 682 683 asan_clear_split_alloczone(split_pt); 684 685 split_elem(elem, split_pt); 686 malloc_elem_free_list_insert(split_pt); 687 } 688 689 asan_clear_alloczone(elem); 690 691 return 0; 692 } 693 694 static inline const char * 695 elem_state_to_str(enum elem_state state) 696 { 697 switch (state) { 698 case ELEM_PAD: 699 return "PAD"; 700 case ELEM_BUSY: 701 return "BUSY"; 702 case ELEM_FREE: 703 return "FREE"; 704 } 705 return "ERROR"; 706 } 707 708 void 709 malloc_elem_dump(const struct malloc_elem *elem, FILE *f) 710 { 711 fprintf(f, "Malloc element at %p (%s)\n", elem, 712 elem_state_to_str(elem->state)); 713 fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad); 714 fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next); 715 } 716