1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/assert.h" 5 #include "jemalloc/internal/extent_mmap.h" 6 #include "jemalloc/internal/mutex.h" 7 #include "jemalloc/internal/sz.h" 8 9 /* 10 * In auto mode, arenas switch to huge pages for the base allocator on the 11 * second base block. a0 switches to thp on the 5th block (after 20 megabytes 12 * of metadata), since more metadata (e.g. rtree nodes) come from a0's base. 13 */ 14 15 #define BASE_AUTO_THP_THRESHOLD 2 16 #define BASE_AUTO_THP_THRESHOLD_A0 5 17 18 /******************************************************************************/ 19 /* Data. */ 20 21 static base_t *b0; 22 23 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; 24 25 const char *metadata_thp_mode_names[] = { 26 "disabled", 27 "auto", 28 "always" 29 }; 30 31 /******************************************************************************/ 32 33 static inline bool 34 metadata_thp_madvise(void) { 35 return (metadata_thp_enabled() && 36 (init_system_thp_mode == thp_mode_default)); 37 } 38 39 static void * 40 base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) { 41 void *addr; 42 bool zero = true; 43 bool commit = true; 44 45 /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ 46 assert(size == HUGEPAGE_CEILING(size)); 47 size_t alignment = HUGEPAGE; 48 if (ehooks_are_default(ehooks)) { 49 addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); 50 if (have_madvise_huge && addr) { 51 pages_set_thp_state(addr, size); 52 } 53 } else { 54 addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero, 55 &commit); 56 } 57 58 return addr; 59 } 60 61 static void 62 base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr, 63 size_t size) { 64 /* 65 * Cascade through dalloc, decommit, purge_forced, and purge_lazy, 66 * stopping at first success. This cascade is performed for consistency 67 * with the cascade in extent_dalloc_wrapper() because an application's 68 * custom hooks may not support e.g. dalloc. This function is only ever 69 * called as a side effect of arena destruction, so although it might 70 * seem pointless to do anything besides dalloc here, the application 71 * may in fact want the end state of all associated virtual memory to be 72 * in some consistent-but-allocated state. 73 */ 74 if (ehooks_are_default(ehooks)) { 75 if (!extent_dalloc_mmap(addr, size)) { 76 goto label_done; 77 } 78 if (!pages_decommit(addr, size)) { 79 goto label_done; 80 } 81 if (!pages_purge_forced(addr, size)) { 82 goto label_done; 83 } 84 if (!pages_purge_lazy(addr, size)) { 85 goto label_done; 86 } 87 /* Nothing worked. This should never happen. */ 88 not_reached(); 89 } else { 90 if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) { 91 goto label_done; 92 } 93 if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) { 94 goto label_done; 95 } 96 if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) { 97 goto label_done; 98 } 99 if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) { 100 goto label_done; 101 } 102 /* Nothing worked. That's the application's problem. */ 103 } 104 label_done: 105 if (metadata_thp_madvise()) { 106 /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ 107 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && 108 (size & HUGEPAGE_MASK) == 0); 109 pages_nohuge(addr, size); 110 } 111 } 112 113 static void 114 base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr, 115 size_t size) { 116 size_t sn; 117 118 sn = *extent_sn_next; 119 (*extent_sn_next)++; 120 121 edata_binit(edata, addr, size, sn); 122 } 123 124 static size_t 125 base_get_num_blocks(base_t *base, bool with_new_block) { 126 base_block_t *b = base->blocks; 127 assert(b != NULL); 128 129 size_t n_blocks = with_new_block ? 2 : 1; 130 while (b->next != NULL) { 131 n_blocks++; 132 b = b->next; 133 } 134 135 return n_blocks; 136 } 137 138 static void 139 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { 140 assert(opt_metadata_thp == metadata_thp_auto); 141 malloc_mutex_assert_owner(tsdn, &base->mtx); 142 if (base->auto_thp_switched) { 143 return; 144 } 145 /* Called when adding a new block. */ 146 bool should_switch; 147 if (base_ind_get(base) != 0) { 148 should_switch = (base_get_num_blocks(base, true) == 149 BASE_AUTO_THP_THRESHOLD); 150 } else { 151 should_switch = (base_get_num_blocks(base, true) == 152 BASE_AUTO_THP_THRESHOLD_A0); 153 } 154 if (!should_switch) { 155 return; 156 } 157 158 base->auto_thp_switched = true; 159 assert(!config_stats || base->n_thp == 0); 160 /* Make the initial blocks THP lazily. */ 161 base_block_t *block = base->blocks; 162 while (block != NULL) { 163 assert((block->size & HUGEPAGE_MASK) == 0); 164 pages_huge(block, block->size); 165 if (config_stats) { 166 base->n_thp += HUGEPAGE_CEILING(block->size - 167 edata_bsize_get(&block->edata)) >> LG_HUGEPAGE; 168 } 169 block = block->next; 170 assert(block == NULL || (base_ind_get(base) == 0)); 171 } 172 } 173 174 static void * 175 base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size, 176 size_t alignment) { 177 void *ret; 178 179 assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM)); 180 assert(size == ALIGNMENT_CEILING(size, alignment)); 181 182 *gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata), 183 alignment) - (uintptr_t)edata_addr_get(edata); 184 ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size); 185 assert(edata_bsize_get(edata) >= *gap_size + size); 186 edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) + 187 *gap_size + size), edata_bsize_get(edata) - *gap_size - size, 188 edata_sn_get(edata)); 189 return ret; 190 } 191 192 static void 193 base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size, 194 void *addr, size_t size) { 195 if (edata_bsize_get(edata) > 0) { 196 /* 197 * Compute the index for the largest size class that does not 198 * exceed extent's size. 199 */ 200 szind_t index_floor = 201 sz_size2index(edata_bsize_get(edata) + 1) - 1; 202 edata_heap_insert(&base->avail[index_floor], edata); 203 } 204 205 if (config_stats) { 206 base->allocated += size; 207 /* 208 * Add one PAGE to base_resident for every page boundary that is 209 * crossed by the new allocation. Adjust n_thp similarly when 210 * metadata_thp is enabled. 211 */ 212 base->resident += PAGE_CEILING((uintptr_t)addr + size) - 213 PAGE_CEILING((uintptr_t)addr - gap_size); 214 assert(base->allocated <= base->resident); 215 assert(base->resident <= base->mapped); 216 if (metadata_thp_madvise() && (opt_metadata_thp == 217 metadata_thp_always || base->auto_thp_switched)) { 218 base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) 219 - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> 220 LG_HUGEPAGE; 221 assert(base->mapped >= base->n_thp << LG_HUGEPAGE); 222 } 223 } 224 } 225 226 static void * 227 base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size, 228 size_t alignment) { 229 void *ret; 230 size_t gap_size; 231 232 ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment); 233 base_extent_bump_alloc_post(base, edata, gap_size, ret, size); 234 return ret; 235 } 236 237 /* 238 * Allocate a block of virtual memory that is large enough to start with a 239 * base_block_t header, followed by an object of specified size and alignment. 240 * On success a pointer to the initialized base_block_t header is returned. 241 */ 242 static base_block_t * 243 base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind, 244 pszind_t *pind_last, size_t *extent_sn_next, size_t size, 245 size_t alignment) { 246 alignment = ALIGNMENT_CEILING(alignment, QUANTUM); 247 size_t usize = ALIGNMENT_CEILING(size, alignment); 248 size_t header_size = sizeof(base_block_t); 249 size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) - 250 header_size; 251 /* 252 * Create increasingly larger blocks in order to limit the total number 253 * of disjoint virtual memory ranges. Choose the next size in the page 254 * size class series (skipping size classes that are not a multiple of 255 * HUGEPAGE), or a size large enough to satisfy the requested size and 256 * alignment, whichever is larger. 257 */ 258 size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size 259 + usize)); 260 pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ? 261 *pind_last + 1 : *pind_last; 262 size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next)); 263 size_t block_size = (min_block_size > next_block_size) ? min_block_size 264 : next_block_size; 265 base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind, 266 block_size); 267 if (block == NULL) { 268 return NULL; 269 } 270 271 if (metadata_thp_madvise()) { 272 void *addr = (void *)block; 273 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && 274 (block_size & HUGEPAGE_MASK) == 0); 275 if (opt_metadata_thp == metadata_thp_always) { 276 pages_huge(addr, block_size); 277 } else if (opt_metadata_thp == metadata_thp_auto && 278 base != NULL) { 279 /* base != NULL indicates this is not a new base. */ 280 malloc_mutex_lock(tsdn, &base->mtx); 281 base_auto_thp_switch(tsdn, base); 282 if (base->auto_thp_switched) { 283 pages_huge(addr, block_size); 284 } 285 malloc_mutex_unlock(tsdn, &base->mtx); 286 } 287 } 288 289 *pind_last = sz_psz2ind(block_size); 290 block->size = block_size; 291 block->next = NULL; 292 assert(block_size >= header_size); 293 base_edata_init(extent_sn_next, &block->edata, 294 (void *)((uintptr_t)block + header_size), block_size - header_size); 295 return block; 296 } 297 298 /* 299 * Allocate an extent that is at least as large as specified size, with 300 * specified alignment. 301 */ 302 static edata_t * 303 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { 304 malloc_mutex_assert_owner(tsdn, &base->mtx); 305 306 ehooks_t *ehooks = base_ehooks_get_for_metadata(base); 307 /* 308 * Drop mutex during base_block_alloc(), because an extent hook will be 309 * called. 310 */ 311 malloc_mutex_unlock(tsdn, &base->mtx); 312 base_block_t *block = base_block_alloc(tsdn, base, ehooks, 313 base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, 314 alignment); 315 malloc_mutex_lock(tsdn, &base->mtx); 316 if (block == NULL) { 317 return NULL; 318 } 319 block->next = base->blocks; 320 base->blocks = block; 321 if (config_stats) { 322 base->allocated += sizeof(base_block_t); 323 base->resident += PAGE_CEILING(sizeof(base_block_t)); 324 base->mapped += block->size; 325 if (metadata_thp_madvise() && 326 !(opt_metadata_thp == metadata_thp_auto 327 && !base->auto_thp_switched)) { 328 assert(base->n_thp > 0); 329 base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> 330 LG_HUGEPAGE; 331 } 332 assert(base->allocated <= base->resident); 333 assert(base->resident <= base->mapped); 334 assert(base->n_thp << LG_HUGEPAGE <= base->mapped); 335 } 336 return &block->edata; 337 } 338 339 base_t * 340 b0get(void) { 341 return b0; 342 } 343 344 base_t * 345 base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks, 346 bool metadata_use_hooks) { 347 pszind_t pind_last = 0; 348 size_t extent_sn_next = 0; 349 350 /* 351 * The base will contain the ehooks eventually, but it itself is 352 * allocated using them. So we use some stack ehooks to bootstrap its 353 * memory, and then initialize the ehooks within the base_t. 354 */ 355 ehooks_t fake_ehooks; 356 ehooks_init(&fake_ehooks, metadata_use_hooks ? 357 (extent_hooks_t *)__UNCONST(extent_hooks) : 358 (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks), ind); 359 360 base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind, 361 &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); 362 if (block == NULL) { 363 return NULL; 364 } 365 366 size_t gap_size; 367 size_t base_alignment = CACHELINE; 368 size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment); 369 base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata, 370 &gap_size, base_size, base_alignment); 371 ehooks_init(&base->ehooks, (extent_hooks_t *)__UNCONST(extent_hooks), ind); 372 ehooks_init(&base->ehooks_base, metadata_use_hooks ? 373 (extent_hooks_t *)__UNCONST(extent_hooks) : 374 (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks), ind); 375 if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE, 376 malloc_mutex_rank_exclusive)) { 377 base_unmap(tsdn, &fake_ehooks, ind, block, block->size); 378 return NULL; 379 } 380 base->pind_last = pind_last; 381 base->extent_sn_next = extent_sn_next; 382 base->blocks = block; 383 base->auto_thp_switched = false; 384 for (szind_t i = 0; i < SC_NSIZES; i++) { 385 edata_heap_new(&base->avail[i]); 386 } 387 if (config_stats) { 388 base->allocated = sizeof(base_block_t); 389 base->resident = PAGE_CEILING(sizeof(base_block_t)); 390 base->mapped = block->size; 391 base->n_thp = (opt_metadata_thp == metadata_thp_always) && 392 metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) 393 >> LG_HUGEPAGE : 0; 394 assert(base->allocated <= base->resident); 395 assert(base->resident <= base->mapped); 396 assert(base->n_thp << LG_HUGEPAGE <= base->mapped); 397 } 398 base_extent_bump_alloc_post(base, &block->edata, gap_size, base, 399 base_size); 400 401 return base; 402 } 403 404 void 405 base_delete(tsdn_t *tsdn, base_t *base) { 406 ehooks_t *ehooks = base_ehooks_get_for_metadata(base); 407 base_block_t *next = base->blocks; 408 do { 409 base_block_t *block = next; 410 next = block->next; 411 base_unmap(tsdn, ehooks, base_ind_get(base), block, 412 block->size); 413 } while (next != NULL); 414 } 415 416 ehooks_t * 417 base_ehooks_get(base_t *base) { 418 return &base->ehooks; 419 } 420 421 ehooks_t * 422 base_ehooks_get_for_metadata(base_t *base) { 423 return &base->ehooks_base; 424 } 425 426 extent_hooks_t * 427 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) { 428 extent_hooks_t *old_extent_hooks = 429 ehooks_get_extent_hooks_ptr(&base->ehooks); 430 ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks)); 431 return old_extent_hooks; 432 } 433 434 static void * 435 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, 436 size_t *esn) { 437 alignment = QUANTUM_CEILING(alignment); 438 size_t usize = ALIGNMENT_CEILING(size, alignment); 439 size_t asize = usize + alignment - QUANTUM; 440 441 edata_t *edata = NULL; 442 malloc_mutex_lock(tsdn, &base->mtx); 443 for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) { 444 edata = edata_heap_remove_first(&base->avail[i]); 445 if (edata != NULL) { 446 /* Use existing space. */ 447 break; 448 } 449 } 450 if (edata == NULL) { 451 /* Try to allocate more space. */ 452 edata = base_extent_alloc(tsdn, base, usize, alignment); 453 } 454 void *ret; 455 if (edata == NULL) { 456 ret = NULL; 457 goto label_return; 458 } 459 460 ret = base_extent_bump_alloc(base, edata, usize, alignment); 461 if (esn != NULL) { 462 *esn = (size_t)edata_sn_get(edata); 463 } 464 label_return: 465 malloc_mutex_unlock(tsdn, &base->mtx); 466 return ret; 467 } 468 469 /* 470 * base_alloc() returns zeroed memory, which is always demand-zeroed for the 471 * auto arenas, in order to make multi-page sparse data structures such as radix 472 * tree nodes efficient with respect to physical memory usage. Upon success a 473 * pointer to at least size bytes with specified alignment is returned. Note 474 * that size is rounded up to the nearest multiple of alignment to avoid false 475 * sharing. 476 */ 477 void * 478 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { 479 return base_alloc_impl(tsdn, base, size, alignment, NULL); 480 } 481 482 edata_t * 483 base_alloc_edata(tsdn_t *tsdn, base_t *base) { 484 size_t esn; 485 edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t), 486 EDATA_ALIGNMENT, &esn); 487 if (edata == NULL) { 488 return NULL; 489 } 490 edata_esn_set(edata, esn); 491 return edata; 492 } 493 494 void 495 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, 496 size_t *mapped, size_t *n_thp) { 497 cassert(config_stats); 498 499 malloc_mutex_lock(tsdn, &base->mtx); 500 assert(base->allocated <= base->resident); 501 assert(base->resident <= base->mapped); 502 *allocated = base->allocated; 503 *resident = base->resident; 504 *mapped = base->mapped; 505 *n_thp = base->n_thp; 506 malloc_mutex_unlock(tsdn, &base->mtx); 507 } 508 509 void 510 base_prefork(tsdn_t *tsdn, base_t *base) { 511 malloc_mutex_prefork(tsdn, &base->mtx); 512 } 513 514 void 515 base_postfork_parent(tsdn_t *tsdn, base_t *base) { 516 malloc_mutex_postfork_parent(tsdn, &base->mtx); 517 } 518 519 void 520 base_postfork_child(tsdn_t *tsdn, base_t *base) { 521 malloc_mutex_postfork_child(tsdn, &base->mtx); 522 } 523 524 bool 525 base_boot(tsdn_t *tsdn) { 526 b0 = base_new(tsdn, 0, (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks), 527 /* metadata_use_hooks */ true); 528 return (b0 == NULL); 529 } 530