1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 #include <inttypes.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <unistd.h> 10 #include <sys/queue.h> 11 12 #include <rte_memory.h> 13 #include <rte_eal.h> 14 #include <rte_launch.h> 15 #include <rte_per_lcore.h> 16 #include <rte_lcore.h> 17 #include <rte_debug.h> 18 #include <rte_common.h> 19 #include <rte_spinlock.h> 20 21 #include "eal_private.h" 22 #include "eal_internal_cfg.h" 23 #include "eal_memalloc.h" 24 #include "malloc_elem.h" 25 #include "malloc_heap.h" 26 27 /* 28 * If debugging is enabled, freed memory is set to poison value 29 * to catch buggy programs. Otherwise, freed memory is set to zero 30 * to avoid having to zero in zmalloc 31 */ 32 #ifdef RTE_MALLOC_DEBUG 33 #define MALLOC_POISON 0x6b 34 #else 35 #define MALLOC_POISON 0 36 #endif 37 38 size_t 39 malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) 40 { 41 void *cur_page, *contig_seg_start, *page_end, *cur_seg_end; 42 void *data_start, *data_end; 43 rte_iova_t expected_iova; 44 struct rte_memseg *ms; 45 size_t page_sz, cur, max; 46 const struct internal_config *internal_conf = 47 eal_get_internal_configuration(); 48 49 page_sz = (size_t)elem->msl->page_sz; 50 data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); 51 data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN); 52 /* segment must start after header and with specified alignment */ 53 contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align); 54 55 /* return if aligned address is already out of malloc element */ 56 if (contig_seg_start > data_end) 57 return 0; 58 59 /* if we're in IOVA as VA mode, or if we're in legacy mode with 60 * hugepages, all elements are IOVA-contiguous. however, we can only 61 * make these assumptions about internal memory - externally allocated 62 * segments have to be checked. 63 */ 64 if (!elem->msl->external && 65 (rte_eal_iova_mode() == RTE_IOVA_VA || 66 (internal_conf->legacy_mem && 67 rte_eal_has_hugepages()))) 68 return RTE_PTR_DIFF(data_end, contig_seg_start); 69 70 cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz); 71 ms = rte_mem_virt2memseg(cur_page, elem->msl); 72 73 /* do first iteration outside the loop */ 74 page_end = RTE_PTR_ADD(cur_page, page_sz); 75 cur_seg_end = RTE_MIN(page_end, data_end); 76 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) - 77 MALLOC_ELEM_TRAILER_LEN; 78 max = cur; 79 expected_iova = ms->iova + page_sz; 80 /* memsegs are contiguous in memory */ 81 ms++; 82 83 cur_page = RTE_PTR_ADD(cur_page, page_sz); 84 85 while (cur_page < data_end) { 86 page_end = RTE_PTR_ADD(cur_page, page_sz); 87 cur_seg_end = RTE_MIN(page_end, data_end); 88 89 /* reset start of contiguous segment if unexpected iova */ 90 if (ms->iova != expected_iova) { 91 /* next contiguous segment must start at specified 92 * alignment. 93 */ 94 contig_seg_start = RTE_PTR_ALIGN(cur_page, align); 95 /* new segment start may be on a different page, so find 96 * the page and skip to next iteration to make sure 97 * we're not blowing past data end. 98 */ 99 ms = rte_mem_virt2memseg(contig_seg_start, elem->msl); 100 cur_page = ms->addr; 101 /* don't trigger another recalculation */ 102 expected_iova = ms->iova; 103 continue; 104 } 105 /* cur_seg_end ends on a page boundary or on data end. if we're 106 * looking at data end, then malloc trailer is already included 107 * in the calculations. if we're looking at page end, then we 108 * know there's more data past this page and thus there's space 109 * for malloc element trailer, so don't count it here. 110 */ 111 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start); 112 /* update max if cur value is bigger */ 113 if (cur > max) 114 max = cur; 115 116 /* move to next page */ 117 cur_page = page_end; 118 expected_iova = ms->iova + page_sz; 119 /* memsegs are contiguous in memory */ 120 ms++; 121 } 122 123 return max; 124 } 125 126 /* 127 * Initialize a general malloc_elem header structure 128 */ 129 void 130 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, 131 struct rte_memseg_list *msl, size_t size, 132 struct malloc_elem *orig_elem, size_t orig_size) 133 { 134 elem->heap = heap; 135 elem->msl = msl; 136 elem->prev = NULL; 137 elem->next = NULL; 138 memset(&elem->free_list, 0, sizeof(elem->free_list)); 139 elem->state = ELEM_FREE; 140 elem->size = size; 141 elem->pad = 0; 142 elem->orig_elem = orig_elem; 143 elem->orig_size = orig_size; 144 set_header(elem); 145 set_trailer(elem); 146 } 147 148 void 149 malloc_elem_insert(struct malloc_elem *elem) 150 { 151 struct malloc_elem *prev_elem, *next_elem; 152 struct malloc_heap *heap = elem->heap; 153 154 /* first and last elements must be both NULL or both non-NULL */ 155 if ((heap->first == NULL) != (heap->last == NULL)) { 156 RTE_LOG(ERR, EAL, "Heap is probably corrupt\n"); 157 return; 158 } 159 160 if (heap->first == NULL && heap->last == NULL) { 161 /* if empty heap */ 162 heap->first = elem; 163 heap->last = elem; 164 prev_elem = NULL; 165 next_elem = NULL; 166 } else if (elem < heap->first) { 167 /* if lower than start */ 168 prev_elem = NULL; 169 next_elem = heap->first; 170 heap->first = elem; 171 } else if (elem > heap->last) { 172 /* if higher than end */ 173 prev_elem = heap->last; 174 next_elem = NULL; 175 heap->last = elem; 176 } else { 177 /* the new memory is somewhere between start and end */ 178 uint64_t dist_from_start, dist_from_end; 179 180 dist_from_end = RTE_PTR_DIFF(heap->last, elem); 181 dist_from_start = RTE_PTR_DIFF(elem, heap->first); 182 183 /* check which is closer, and find closest list entries */ 184 if (dist_from_start < dist_from_end) { 185 prev_elem = heap->first; 186 while (prev_elem->next < elem) 187 prev_elem = prev_elem->next; 188 next_elem = prev_elem->next; 189 } else { 190 next_elem = heap->last; 191 while (next_elem->prev > elem) 192 next_elem = next_elem->prev; 193 prev_elem = next_elem->prev; 194 } 195 } 196 197 /* insert new element */ 198 elem->prev = prev_elem; 199 elem->next = next_elem; 200 if (prev_elem) 201 prev_elem->next = elem; 202 if (next_elem) 203 next_elem->prev = elem; 204 } 205 206 /* 207 * Attempt to find enough physically contiguous memory in this block to store 208 * our data. Assume that element has at least enough space to fit in the data, 209 * so we just check the page addresses. 210 */ 211 static bool 212 elem_check_phys_contig(const struct rte_memseg_list *msl, 213 void *start, size_t size) 214 { 215 return eal_memalloc_is_contig(msl, start, size); 216 } 217 218 /* 219 * calculate the starting point of where data of the requested size 220 * and alignment would fit in the current element. If the data doesn't 221 * fit, return NULL. 222 */ 223 static void * 224 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, 225 size_t bound, bool contig) 226 { 227 size_t elem_size = elem->size; 228 229 /* 230 * we're allocating from the end, so adjust the size of element by 231 * alignment size. 232 */ 233 while (elem_size >= size) { 234 const size_t bmask = ~(bound - 1); 235 uintptr_t end_pt = (uintptr_t)elem + 236 elem_size - MALLOC_ELEM_TRAILER_LEN; 237 uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), 238 align); 239 uintptr_t new_elem_start; 240 241 /* check boundary */ 242 if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { 243 end_pt = RTE_ALIGN_FLOOR(end_pt, bound); 244 new_data_start = RTE_ALIGN_FLOOR((end_pt - size), 245 align); 246 end_pt = new_data_start + size; 247 248 if (((end_pt - 1) & bmask) != (new_data_start & bmask)) 249 return NULL; 250 } 251 252 new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; 253 254 /* if the new start point is before the exist start, 255 * it won't fit 256 */ 257 if (new_elem_start < (uintptr_t)elem) 258 return NULL; 259 260 if (contig) { 261 size_t new_data_size = end_pt - new_data_start; 262 263 /* 264 * if physical contiguousness was requested and we 265 * couldn't fit all data into one physically contiguous 266 * block, try again with lower addresses. 267 */ 268 if (!elem_check_phys_contig(elem->msl, 269 (void *)new_data_start, 270 new_data_size)) { 271 elem_size -= align; 272 continue; 273 } 274 } 275 return (void *)new_elem_start; 276 } 277 return NULL; 278 } 279 280 /* 281 * use elem_start_pt to determine if we get meet the size and 282 * alignment request from the current element 283 */ 284 int 285 malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, 286 size_t bound, bool contig) 287 { 288 return elem_start_pt(elem, size, align, bound, contig) != NULL; 289 } 290 291 /* 292 * split an existing element into two smaller elements at the given 293 * split_pt parameter. 294 */ 295 static void 296 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) 297 { 298 struct malloc_elem *next_elem = elem->next; 299 const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; 300 const size_t new_elem_size = elem->size - old_elem_size; 301 302 malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size, 303 elem->orig_elem, elem->orig_size); 304 split_pt->prev = elem; 305 split_pt->next = next_elem; 306 if (next_elem) 307 next_elem->prev = split_pt; 308 else 309 elem->heap->last = split_pt; 310 elem->next = split_pt; 311 elem->size = old_elem_size; 312 set_trailer(elem); 313 if (elem->pad) { 314 /* Update inner padding inner element size. */ 315 elem = RTE_PTR_ADD(elem, elem->pad); 316 elem->size = old_elem_size - elem->pad; 317 } 318 } 319 320 /* 321 * our malloc heap is a doubly linked list, so doubly remove our element. 322 */ 323 static void __rte_unused 324 remove_elem(struct malloc_elem *elem) 325 { 326 struct malloc_elem *next, *prev; 327 next = elem->next; 328 prev = elem->prev; 329 330 if (next) 331 next->prev = prev; 332 else 333 elem->heap->last = prev; 334 if (prev) 335 prev->next = next; 336 else 337 elem->heap->first = next; 338 339 elem->prev = NULL; 340 elem->next = NULL; 341 } 342 343 static int 344 next_elem_is_adjacent(struct malloc_elem *elem) 345 { 346 const struct internal_config *internal_conf = 347 eal_get_internal_configuration(); 348 349 return elem->next == RTE_PTR_ADD(elem, elem->size) && 350 elem->next->msl == elem->msl && 351 (!internal_conf->match_allocations || 352 elem->orig_elem == elem->next->orig_elem); 353 } 354 355 static int 356 prev_elem_is_adjacent(struct malloc_elem *elem) 357 { 358 const struct internal_config *internal_conf = 359 eal_get_internal_configuration(); 360 361 return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) && 362 elem->prev->msl == elem->msl && 363 (!internal_conf->match_allocations || 364 elem->orig_elem == elem->prev->orig_elem); 365 } 366 367 /* 368 * Given an element size, compute its freelist index. 369 * We free an element into the freelist containing similarly-sized elements. 370 * We try to allocate elements starting with the freelist containing 371 * similarly-sized elements, and if necessary, we search freelists 372 * containing larger elements. 373 * 374 * Example element size ranges for a heap with five free lists: 375 * heap->free_head[0] - (0 , 2^8] 376 * heap->free_head[1] - (2^8 , 2^10] 377 * heap->free_head[2] - (2^10 ,2^12] 378 * heap->free_head[3] - (2^12, 2^14] 379 * heap->free_head[4] - (2^14, MAX_SIZE] 380 */ 381 size_t 382 malloc_elem_free_list_index(size_t size) 383 { 384 #define MALLOC_MINSIZE_LOG2 8 385 #define MALLOC_LOG2_INCREMENT 2 386 387 size_t log2; 388 size_t index; 389 390 if (size <= (1UL << MALLOC_MINSIZE_LOG2)) 391 return 0; 392 393 /* Find next power of 2 >= size. */ 394 log2 = sizeof(size) * 8 - __builtin_clzl(size - 1); 395 396 /* Compute freelist index, based on log2(size). */ 397 index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / 398 MALLOC_LOG2_INCREMENT; 399 400 return index <= RTE_HEAP_NUM_FREELISTS - 1 ? 401 index : RTE_HEAP_NUM_FREELISTS - 1; 402 } 403 404 /* 405 * Add the specified element to its heap's free list. 406 */ 407 void 408 malloc_elem_free_list_insert(struct malloc_elem *elem) 409 { 410 size_t idx; 411 412 idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); 413 elem->state = ELEM_FREE; 414 LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); 415 } 416 417 /* 418 * Remove the specified element from its heap's free list. 419 */ 420 void 421 malloc_elem_free_list_remove(struct malloc_elem *elem) 422 { 423 LIST_REMOVE(elem, free_list); 424 } 425 426 /* 427 * reserve a block of data in an existing malloc_elem. If the malloc_elem 428 * is much larger than the data block requested, we split the element in two. 429 * This function is only called from malloc_heap_alloc so parameter checking 430 * is not done here, as it's done there previously. 431 */ 432 struct malloc_elem * 433 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, 434 size_t bound, bool contig) 435 { 436 struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound, 437 contig); 438 const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; 439 const size_t trailer_size = elem->size - old_elem_size - size - 440 MALLOC_ELEM_OVERHEAD; 441 442 malloc_elem_free_list_remove(elem); 443 444 if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 445 /* split it, too much free space after elem */ 446 struct malloc_elem *new_free_elem = 447 RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); 448 449 split_elem(elem, new_free_elem); 450 malloc_elem_free_list_insert(new_free_elem); 451 452 if (elem == elem->heap->last) 453 elem->heap->last = new_free_elem; 454 } 455 456 if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 457 /* don't split it, pad the element instead */ 458 elem->state = ELEM_BUSY; 459 elem->pad = old_elem_size; 460 461 /* put a dummy header in padding, to point to real element header */ 462 if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything 463 * is cache-line aligned */ 464 new_elem->pad = elem->pad; 465 new_elem->state = ELEM_PAD; 466 new_elem->size = elem->size - elem->pad; 467 set_header(new_elem); 468 } 469 470 return new_elem; 471 } 472 473 /* we are going to split the element in two. The original element 474 * remains free, and the new element is the one allocated. 475 * Re-insert original element, in case its new size makes it 476 * belong on a different list. 477 */ 478 split_elem(elem, new_elem); 479 new_elem->state = ELEM_BUSY; 480 malloc_elem_free_list_insert(elem); 481 482 return new_elem; 483 } 484 485 /* 486 * join two struct malloc_elem together. elem1 and elem2 must 487 * be contiguous in memory. 488 */ 489 static inline void 490 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) 491 { 492 struct malloc_elem *next = elem2->next; 493 elem1->size += elem2->size; 494 if (next) 495 next->prev = elem1; 496 else 497 elem1->heap->last = elem1; 498 elem1->next = next; 499 if (elem1->pad) { 500 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad); 501 inner->size = elem1->size - elem1->pad; 502 } 503 } 504 505 struct malloc_elem * 506 malloc_elem_join_adjacent_free(struct malloc_elem *elem) 507 { 508 /* 509 * check if next element exists, is adjacent and is free, if so join 510 * with it, need to remove from free list. 511 */ 512 if (elem->next != NULL && elem->next->state == ELEM_FREE && 513 next_elem_is_adjacent(elem)) { 514 void *erase; 515 size_t erase_len; 516 517 /* we will want to erase the trailer and header */ 518 erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN); 519 erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad; 520 521 /* remove from free list, join to this one */ 522 malloc_elem_free_list_remove(elem->next); 523 join_elem(elem, elem->next); 524 525 /* erase header, trailer and pad */ 526 memset(erase, MALLOC_POISON, erase_len); 527 } 528 529 /* 530 * check if prev element exists, is adjacent and is free, if so join 531 * with it, need to remove from free list. 532 */ 533 if (elem->prev != NULL && elem->prev->state == ELEM_FREE && 534 prev_elem_is_adjacent(elem)) { 535 struct malloc_elem *new_elem; 536 void *erase; 537 size_t erase_len; 538 539 /* we will want to erase trailer and header */ 540 erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN); 541 erase_len = MALLOC_ELEM_OVERHEAD + elem->pad; 542 543 /* remove from free list, join to this one */ 544 malloc_elem_free_list_remove(elem->prev); 545 546 new_elem = elem->prev; 547 join_elem(new_elem, elem); 548 549 /* erase header, trailer and pad */ 550 memset(erase, MALLOC_POISON, erase_len); 551 552 elem = new_elem; 553 } 554 555 return elem; 556 } 557 558 /* 559 * free a malloc_elem block by adding it to the free list. If the 560 * blocks either immediately before or immediately after newly freed block 561 * are also free, the blocks are merged together. 562 */ 563 struct malloc_elem * 564 malloc_elem_free(struct malloc_elem *elem) 565 { 566 void *ptr; 567 size_t data_len; 568 569 ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); 570 data_len = elem->size - MALLOC_ELEM_OVERHEAD; 571 572 elem = malloc_elem_join_adjacent_free(elem); 573 574 malloc_elem_free_list_insert(elem); 575 576 elem->pad = 0; 577 578 /* decrease heap's count of allocated elements */ 579 elem->heap->alloc_count--; 580 581 /* poison memory */ 582 memset(ptr, MALLOC_POISON, data_len); 583 584 return elem; 585 } 586 587 /* assume all checks were already done */ 588 void 589 malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len) 590 { 591 struct malloc_elem *hide_start, *hide_end, *prev, *next; 592 size_t len_before, len_after; 593 594 hide_start = start; 595 hide_end = RTE_PTR_ADD(start, len); 596 597 prev = elem->prev; 598 next = elem->next; 599 600 /* we cannot do anything with non-adjacent elements */ 601 if (next && next_elem_is_adjacent(elem)) { 602 len_after = RTE_PTR_DIFF(next, hide_end); 603 if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 604 /* split after */ 605 split_elem(elem, hide_end); 606 607 malloc_elem_free_list_insert(hide_end); 608 } else if (len_after > 0) { 609 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); 610 return; 611 } 612 } 613 614 /* we cannot do anything with non-adjacent elements */ 615 if (prev && prev_elem_is_adjacent(elem)) { 616 len_before = RTE_PTR_DIFF(hide_start, elem); 617 if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { 618 /* split before */ 619 split_elem(elem, hide_start); 620 621 prev = elem; 622 elem = hide_start; 623 624 malloc_elem_free_list_insert(prev); 625 } else if (len_before > 0) { 626 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n"); 627 return; 628 } 629 } 630 631 remove_elem(elem); 632 } 633 634 /* 635 * attempt to resize a malloc_elem by expanding into any free space 636 * immediately after it in memory. 637 */ 638 int 639 malloc_elem_resize(struct malloc_elem *elem, size_t size) 640 { 641 const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD; 642 643 /* if we request a smaller size, then always return ok */ 644 if (elem->size >= new_size) 645 return 0; 646 647 /* check if there is a next element, it's free and adjacent */ 648 if (!elem->next || elem->next->state != ELEM_FREE || 649 !next_elem_is_adjacent(elem)) 650 return -1; 651 if (elem->size + elem->next->size < new_size) 652 return -1; 653 654 /* we now know the element fits, so remove from free list, 655 * join the two 656 */ 657 malloc_elem_free_list_remove(elem->next); 658 join_elem(elem, elem->next); 659 660 if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) { 661 /* now we have a big block together. Lets cut it down a bit, by splitting */ 662 struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); 663 split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); 664 split_elem(elem, split_pt); 665 malloc_elem_free_list_insert(split_pt); 666 } 667 return 0; 668 } 669 670 static inline const char * 671 elem_state_to_str(enum elem_state state) 672 { 673 switch (state) { 674 case ELEM_PAD: 675 return "PAD"; 676 case ELEM_BUSY: 677 return "BUSY"; 678 case ELEM_FREE: 679 return "FREE"; 680 } 681 return "ERROR"; 682 } 683 684 void 685 malloc_elem_dump(const struct malloc_elem *elem, FILE *f) 686 { 687 fprintf(f, "Malloc element at %p (%s)\n", elem, 688 elem_state_to_str(elem->state)); 689 fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad); 690 fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next); 691 } 692