1 #include "jemalloc/internal/jemalloc_preamble.h" 2 3 #include "jemalloc/internal/pages.h" 4 5 #include "jemalloc/internal/jemalloc_internal_includes.h" 6 7 #include "jemalloc/internal/assert.h" 8 #include "jemalloc/internal/malloc_io.h" 9 10 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 11 #include <sys/sysctl.h> 12 #ifdef __FreeBSD__ 13 #include <vm/vm_param.h> 14 #endif 15 #endif 16 #ifdef __NetBSD__ 17 #include <sys/bitops.h> /* ilog2 */ 18 #endif 19 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG 20 #define PAGES_FD_TAG VM_MAKE_TAG(101U) 21 #else 22 #define PAGES_FD_TAG -1 23 #endif 24 25 /******************************************************************************/ 26 /* Data. */ 27 28 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 29 static size_t os_page; 30 31 #ifndef _WIN32 32 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 33 # define PAGES_PROT_DECOMMIT (PROT_NONE) 34 static int mmap_flags; 35 #endif 36 static bool os_overcommits; 37 38 const char *thp_mode_names[] = { 39 "default", 40 "always", 41 "never", 42 "not supported" 43 }; 44 thp_mode_t opt_thp = THP_MODE_DEFAULT; 45 thp_mode_t init_system_thp_mode; 46 47 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 48 static bool pages_can_purge_lazy_runtime = true; 49 50 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 51 static int madvise_dont_need_zeros_is_faulty = -1; 52 /** 53 * Check that MADV_DONTNEED will actually zero pages on subsequent access. 54 * 55 * Since qemu does not support this, yet [1], and you can get very tricky 56 * assert if you will run program with jemalloc in use under qemu: 57 * 58 * <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" 59 * 60 * [1]: https://patchwork.kernel.org/patch/10576637/ 61 */ 62 static int madvise_MADV_DONTNEED_zeroes_pages() 63 { 64 int works = -1; 65 size_t size = PAGE; 66 67 void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 68 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 69 70 if (addr == MAP_FAILED) { 71 malloc_write("<jemalloc>: Cannot allocate memory for " 72 "MADV_DONTNEED check\n"); 73 if (opt_abort) { 74 abort(); 75 } 76 } 77 78 memset(addr, 'A', size); 79 if (madvise(addr, size, MADV_DONTNEED) == 0) { 80 works = memchr(addr, 'A', size) == NULL; 81 } else { 82 /* 83 * If madvise() does not support MADV_DONTNEED, then we can 84 * call it anyway, and use it's return code. 85 */ 86 works = 1; 87 } 88 89 if (munmap(addr, size) != 0) { 90 malloc_write("<jemalloc>: Cannot deallocate memory for " 91 "MADV_DONTNEED check\n"); 92 if (opt_abort) { 93 abort(); 94 } 95 } 96 97 return works; 98 } 99 #endif 100 101 /******************************************************************************/ 102 /* 103 * Function prototypes for static functions that are referenced prior to 104 * definition. 105 */ 106 107 static void os_pages_unmap(void *addr, size_t size); 108 109 /******************************************************************************/ 110 111 static void * 112 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 113 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 114 assert(ALIGNMENT_CEILING(size, os_page) == size); 115 assert(size != 0); 116 117 if (os_overcommits) { 118 *commit = true; 119 } 120 121 void *ret; 122 #ifdef _WIN32 123 /* 124 * If VirtualAlloc can't allocate at the given address when one is 125 * given, it fails and returns NULL. 126 */ 127 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 128 PAGE_READWRITE); 129 #else 130 /* 131 * We don't use MAP_FIXED here, because it can cause the *replacement* 132 * of existing mappings, and we only want to create new mappings. 133 */ 134 { 135 int flags = mmap_flags; 136 #ifdef __NetBSD__ 137 /* 138 * On NetBSD PAGE for a platform is defined to the 139 * maximum page size of all machine architectures 140 * for that platform, so that we can use the same 141 * binaries across all machine architectures. 142 */ 143 if (alignment > os_page || PAGE > os_page) { 144 unsigned int a = ilog2(MAX(alignment, PAGE)); 145 flags |= MAP_ALIGNED(a); 146 } 147 #endif 148 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 149 150 ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0); 151 } 152 assert(ret != NULL); 153 154 if (ret == MAP_FAILED) { 155 ret = NULL; 156 } else if (addr != NULL && ret != addr) { 157 /* 158 * We succeeded in mapping memory, but not in the right place. 159 */ 160 os_pages_unmap(ret, size); 161 ret = NULL; 162 } 163 #endif 164 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 165 ret == addr)); 166 return ret; 167 } 168 169 static void * 170 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 171 bool *commit) { 172 void *ret = (void *)((uintptr_t)addr + leadsize); 173 174 assert(alloc_size >= leadsize + size); 175 #ifdef _WIN32 176 os_pages_unmap(addr, alloc_size); 177 void *new_addr = os_pages_map(ret, size, PAGE, commit); 178 if (new_addr == ret) { 179 return ret; 180 } 181 if (new_addr != NULL) { 182 os_pages_unmap(new_addr, size); 183 } 184 return NULL; 185 #else 186 size_t trailsize = alloc_size - leadsize - size; 187 188 if (leadsize != 0) { 189 os_pages_unmap(addr, leadsize); 190 } 191 if (trailsize != 0) { 192 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 193 } 194 return ret; 195 #endif 196 } 197 198 static void 199 os_pages_unmap(void *addr, size_t size) { 200 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 201 assert(ALIGNMENT_CEILING(size, os_page) == size); 202 203 #ifdef _WIN32 204 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 205 #else 206 if (munmap(addr, size) == -1) 207 #endif 208 { 209 char buf[BUFERROR_BUF]; 210 211 buferror(get_errno(), buf, sizeof(buf)); 212 malloc_printf("<jemalloc>: Error in " 213 #ifdef _WIN32 214 "VirtualFree" 215 #else 216 "munmap" 217 #endif 218 "(): %s\n", buf); 219 if (opt_abort) { 220 abort(); 221 } 222 } 223 } 224 225 static void * 226 pages_map_slow(size_t size, size_t alignment, bool *commit) { 227 size_t alloc_size = size + alignment - os_page; 228 /* Beware size_t wrap-around. */ 229 if (alloc_size < size) { 230 return NULL; 231 } 232 233 void *ret; 234 do { 235 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 236 if (pages == NULL) { 237 return NULL; 238 } 239 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 240 - (uintptr_t)pages; 241 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 242 } while (ret == NULL); 243 244 assert(ret != NULL); 245 assert(PAGE_ADDR2BASE(ret) == ret); 246 return ret; 247 } 248 249 void * 250 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 251 assert(alignment >= PAGE); 252 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 253 254 #if defined(__FreeBSD__) && defined(MAP_EXCL) 255 /* 256 * FreeBSD has mechanisms both to mmap at specific address without 257 * touching existing mappings, and to mmap with specific alignment. 258 */ 259 { 260 if (os_overcommits) { 261 *commit = true; 262 } 263 264 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 265 int flags = mmap_flags; 266 267 if (addr != NULL) { 268 flags |= MAP_FIXED | MAP_EXCL; 269 } else { 270 unsigned alignment_bits = ffs_zu(alignment); 271 assert(alignment_bits > 0); 272 flags |= MAP_ALIGNED(alignment_bits); 273 } 274 275 void *ret = mmap(addr, size, prot, flags, -1, 0); 276 if (ret == MAP_FAILED) { 277 ret = NULL; 278 } 279 280 return ret; 281 } 282 #endif 283 /* 284 * Ideally, there would be a way to specify alignment to mmap() (like 285 * NetBSD has), but in the absence of such a feature, we have to work 286 * hard to efficiently create aligned mappings. The reliable, but 287 * slow method is to create a mapping that is over-sized, then trim the 288 * excess. However, that always results in one or two calls to 289 * os_pages_unmap(), and it can leave holes in the process's virtual 290 * memory map if memory grows downward. 291 * 292 * Optimistically try mapping precisely the right amount before falling 293 * back to the slow method, with the expectation that the optimistic 294 * approach works most of the time. 295 */ 296 297 void *ret = os_pages_map(addr, size, os_page, commit); 298 if (ret == NULL || ret == addr) { 299 return ret; 300 } 301 assert(addr == NULL); 302 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 303 os_pages_unmap(ret, size); 304 return pages_map_slow(size, alignment, commit); 305 } 306 307 assert(PAGE_ADDR2BASE(ret) == ret); 308 return ret; 309 } 310 311 void 312 pages_unmap(void *addr, size_t size) { 313 assert(PAGE_ADDR2BASE(addr) == addr); 314 assert(PAGE_CEILING(size) == size); 315 316 os_pages_unmap(addr, size); 317 } 318 319 static bool 320 os_pages_commit(void *addr, size_t size, bool commit) { 321 assert(PAGE_ADDR2BASE(addr) == addr); 322 assert(PAGE_CEILING(size) == size); 323 324 #ifdef _WIN32 325 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 326 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 327 #else 328 { 329 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 330 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 331 PAGES_FD_TAG, 0); 332 if (result == MAP_FAILED) { 333 return true; 334 } 335 if (result != addr) { 336 /* 337 * We succeeded in mapping memory, but not in the right 338 * place. 339 */ 340 os_pages_unmap(result, size); 341 return true; 342 } 343 return false; 344 } 345 #endif 346 } 347 348 static bool 349 pages_commit_impl(void *addr, size_t size, bool commit) { 350 if (os_overcommits) { 351 return true; 352 } 353 354 return os_pages_commit(addr, size, commit); 355 } 356 357 bool 358 pages_commit(void *addr, size_t size) { 359 return pages_commit_impl(addr, size, true); 360 } 361 362 bool 363 pages_decommit(void *addr, size_t size) { 364 return pages_commit_impl(addr, size, false); 365 } 366 367 void 368 pages_mark_guards(void *head, void *tail) { 369 assert(head != NULL || tail != NULL); 370 assert(head == NULL || tail == NULL || 371 (uintptr_t)head < (uintptr_t)tail); 372 #ifdef JEMALLOC_HAVE_MPROTECT 373 if (head != NULL) { 374 mprotect(head, PAGE, PROT_NONE); 375 } 376 if (tail != NULL) { 377 mprotect(tail, PAGE, PROT_NONE); 378 } 379 #else 380 /* Decommit sets to PROT_NONE / MEM_DECOMMIT. */ 381 if (head != NULL) { 382 os_pages_commit(head, PAGE, false); 383 } 384 if (tail != NULL) { 385 os_pages_commit(tail, PAGE, false); 386 } 387 #endif 388 } 389 390 void 391 pages_unmark_guards(void *head, void *tail) { 392 assert(head != NULL || tail != NULL); 393 assert(head == NULL || tail == NULL || 394 (uintptr_t)head < (uintptr_t)tail); 395 #ifdef JEMALLOC_HAVE_MPROTECT 396 bool head_and_tail = (head != NULL) && (tail != NULL); 397 size_t range = head_and_tail ? 398 (uintptr_t)tail - (uintptr_t)head + PAGE : 399 SIZE_T_MAX; 400 /* 401 * The amount of work that the kernel does in mprotect depends on the 402 * range argument. SC_LARGE_MINCLASS is an arbitrary threshold chosen 403 * to prevent kernel from doing too much work that would outweigh the 404 * savings of performing one less system call. 405 */ 406 bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS; 407 if (ranged_mprotect) { 408 mprotect(head, range, PROT_READ | PROT_WRITE); 409 } else { 410 if (head != NULL) { 411 mprotect(head, PAGE, PROT_READ | PROT_WRITE); 412 } 413 if (tail != NULL) { 414 mprotect(tail, PAGE, PROT_READ | PROT_WRITE); 415 } 416 } 417 #else 418 if (head != NULL) { 419 os_pages_commit(head, PAGE, true); 420 } 421 if (tail != NULL) { 422 os_pages_commit(tail, PAGE, true); 423 } 424 #endif 425 } 426 427 bool 428 pages_purge_lazy(void *addr, size_t size) { 429 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 430 assert(PAGE_CEILING(size) == size); 431 432 if (!pages_can_purge_lazy) { 433 return true; 434 } 435 if (!pages_can_purge_lazy_runtime) { 436 /* 437 * Built with lazy purge enabled, but detected it was not 438 * supported on the current system. 439 */ 440 return true; 441 } 442 443 #ifdef _WIN32 444 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 445 return false; 446 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 447 return (madvise(addr, size, 448 # ifdef MADV_FREE 449 MADV_FREE 450 # else 451 JEMALLOC_MADV_FREE 452 # endif 453 ) != 0); 454 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 455 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 456 return (madvise(addr, size, MADV_DONTNEED) != 0); 457 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ 458 !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) 459 return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); 460 #else 461 not_reached(); 462 #endif 463 } 464 465 bool 466 pages_purge_forced(void *addr, size_t size) { 467 assert(PAGE_ADDR2BASE(addr) == addr); 468 assert(PAGE_CEILING(size) == size); 469 470 if (!pages_can_purge_forced) { 471 return true; 472 } 473 474 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 475 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 476 return (unlikely(madvise_dont_need_zeros_is_faulty) || 477 madvise(addr, size, MADV_DONTNEED) != 0); 478 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ 479 defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) 480 return (unlikely(madvise_dont_need_zeros_is_faulty) || 481 posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); 482 #elif defined(JEMALLOC_MAPS_COALESCE) 483 /* Try to overlay a new demand-zeroed mapping. */ 484 return pages_commit(addr, size); 485 #else 486 not_reached(); 487 #endif 488 } 489 490 static bool 491 pages_huge_impl(void *addr, size_t size, bool aligned) { 492 if (aligned) { 493 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 494 assert(HUGEPAGE_CEILING(size) == size); 495 } 496 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) 497 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 498 #elif defined(JEMALLOC_HAVE_MEMCNTL) 499 struct memcntl_mha m = {0}; 500 m.mha_cmd = MHA_MAPSIZE_VA; 501 m.mha_pagesize = HUGEPAGE; 502 return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0); 503 #else 504 return true; 505 #endif 506 } 507 508 bool 509 pages_huge(void *addr, size_t size) { 510 return pages_huge_impl(addr, size, true); 511 } 512 513 static bool 514 pages_huge_unaligned(void *addr, size_t size) { 515 return pages_huge_impl(addr, size, false); 516 } 517 518 static bool 519 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 520 if (aligned) { 521 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 522 assert(HUGEPAGE_CEILING(size) == size); 523 } 524 525 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 526 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 527 #else 528 return false; 529 #endif 530 } 531 532 bool 533 pages_nohuge(void *addr, size_t size) { 534 return pages_nohuge_impl(addr, size, true); 535 } 536 537 static bool 538 pages_nohuge_unaligned(void *addr, size_t size) { 539 return pages_nohuge_impl(addr, size, false); 540 } 541 542 bool 543 pages_dontdump(void *addr, size_t size) { 544 assert(PAGE_ADDR2BASE(addr) == addr); 545 assert(PAGE_CEILING(size) == size); 546 #if defined(JEMALLOC_MADVISE_DONTDUMP) 547 return madvise(addr, size, MADV_DONTDUMP) != 0; 548 #elif defined(JEMALLOC_MADVISE_NOCORE) 549 return madvise(addr, size, MADV_NOCORE) != 0; 550 #else 551 return false; 552 #endif 553 } 554 555 bool 556 pages_dodump(void *addr, size_t size) { 557 assert(PAGE_ADDR2BASE(addr) == addr); 558 assert(PAGE_CEILING(size) == size); 559 #if defined(JEMALLOC_MADVISE_DONTDUMP) 560 return madvise(addr, size, MADV_DODUMP) != 0; 561 #elif defined(JEMALLOC_MADVISE_NOCORE) 562 return madvise(addr, size, MADV_CORE) != 0; 563 #else 564 return false; 565 #endif 566 } 567 568 569 static size_t 570 os_page_detect(void) { 571 #ifdef _WIN32 572 SYSTEM_INFO si; 573 GetSystemInfo(&si); 574 return si.dwPageSize; 575 #elif defined(__FreeBSD__) 576 /* 577 * This returns the value obtained from 578 * the auxv vector, avoiding a syscall. 579 */ 580 return getpagesize(); 581 #else 582 long result = sysconf(_SC_PAGESIZE); 583 if (result == -1) { 584 return LG_PAGE; 585 } 586 return (size_t)result; 587 #endif 588 } 589 590 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 591 static bool 592 os_overcommits_sysctl(void) { 593 int vm_overcommit; 594 size_t sz; 595 596 sz = sizeof(vm_overcommit); 597 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 598 int mib[2]; 599 600 mib[0] = CTL_VM; 601 mib[1] = VM_OVERCOMMIT; 602 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 603 return false; /* Error. */ 604 } 605 #else 606 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 607 return false; /* Error. */ 608 } 609 #endif 610 611 return ((vm_overcommit & 0x3) == 0); 612 } 613 #endif 614 615 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 616 /* 617 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 618 * reentry during bootstrapping if another library has interposed system call 619 * wrappers. 620 */ 621 static bool 622 os_overcommits_proc(void) { 623 int fd; 624 char buf[1]; 625 626 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 627 #if defined(O_CLOEXEC) 628 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 629 O_CLOEXEC); 630 #else 631 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 632 if (fd != -1) { 633 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 634 } 635 #endif 636 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 637 #if defined(O_CLOEXEC) 638 fd = (int)syscall(SYS_openat, 639 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 640 #else 641 fd = (int)syscall(SYS_openat, 642 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 643 if (fd != -1) { 644 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 645 } 646 #endif 647 #else 648 #if defined(O_CLOEXEC) 649 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 650 #else 651 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 652 if (fd != -1) { 653 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 654 } 655 #endif 656 #endif 657 658 if (fd == -1) { 659 return false; /* Error. */ 660 } 661 662 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 663 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 664 syscall(SYS_close, fd); 665 #else 666 close(fd); 667 #endif 668 669 if (nread < 1) { 670 return false; /* Error. */ 671 } 672 /* 673 * /proc/sys/vm/overcommit_memory meanings: 674 * 0: Heuristic overcommit. 675 * 1: Always overcommit. 676 * 2: Never overcommit. 677 */ 678 return (buf[0] == '0' || buf[0] == '1'); 679 } 680 #endif 681 682 void 683 pages_set_thp_state (void *ptr, size_t size) { 684 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 685 return; 686 } 687 assert(opt_thp != thp_mode_not_supported && 688 init_system_thp_mode != thp_mode_not_supported); 689 690 if (opt_thp == thp_mode_always 691 && init_system_thp_mode != thp_mode_never) { 692 assert(init_system_thp_mode == thp_mode_default); 693 pages_huge_unaligned(ptr, size); 694 } else if (opt_thp == thp_mode_never) { 695 assert(init_system_thp_mode == thp_mode_default || 696 init_system_thp_mode == thp_mode_always); 697 pages_nohuge_unaligned(ptr, size); 698 } 699 } 700 701 static void 702 init_thp_state(void) { 703 if (!have_madvise_huge && !have_memcntl) { 704 if (metadata_thp_enabled() && opt_abort) { 705 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 706 abort(); 707 } 708 goto label_error; 709 } 710 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) 711 static const char sys_state_madvise[] = "always [madvise] never\n"; 712 static const char sys_state_always[] = "[always] madvise never\n"; 713 static const char sys_state_never[] = "always madvise [never]\n"; 714 char buf[sizeof(sys_state_madvise)]; 715 716 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 717 int fd = (int)syscall(SYS_open, 718 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 719 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 720 int fd = (int)syscall(SYS_openat, 721 AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 722 #else 723 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 724 #endif 725 if (fd == -1) { 726 goto label_error; 727 } 728 729 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 730 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 731 syscall(SYS_close, fd); 732 #else 733 close(fd); 734 #endif 735 736 if (nread < 0) { 737 goto label_error; 738 } 739 740 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 741 init_system_thp_mode = thp_mode_default; 742 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 743 init_system_thp_mode = thp_mode_always; 744 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 745 init_system_thp_mode = thp_mode_never; 746 } else { 747 goto label_error; 748 } 749 return; 750 #elif defined(JEMALLOC_HAVE_MEMCNTL) 751 init_system_thp_mode = thp_mode_default; 752 return; 753 #endif 754 label_error: 755 opt_thp = init_system_thp_mode = thp_mode_not_supported; 756 } 757 758 bool 759 pages_boot(void) { 760 os_page = os_page_detect(); 761 if (os_page > PAGE) { 762 malloc_write("<jemalloc>: Unsupported system page size\n"); 763 if (opt_abort) { 764 abort(); 765 } 766 return true; 767 } 768 769 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 770 if (!opt_trust_madvise) { 771 madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages(); 772 if (madvise_dont_need_zeros_is_faulty) { 773 malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n"); 774 malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n"); 775 } 776 } else { 777 /* In case opt_trust_madvise is disable, 778 * do not do runtime check */ 779 madvise_dont_need_zeros_is_faulty = 0; 780 } 781 #endif 782 783 #ifndef _WIN32 784 mmap_flags = MAP_PRIVATE | MAP_ANON; 785 #endif 786 787 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 788 os_overcommits = os_overcommits_sysctl(); 789 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 790 os_overcommits = os_overcommits_proc(); 791 # ifdef MAP_NORESERVE 792 if (os_overcommits) { 793 mmap_flags |= MAP_NORESERVE; 794 } 795 # endif 796 #elif defined(__NetBSD__) 797 os_overcommits = true; 798 #else 799 os_overcommits = false; 800 #endif 801 802 init_thp_state(); 803 804 #ifdef __FreeBSD__ 805 /* 806 * FreeBSD doesn't need the check; madvise(2) is known to work. 807 */ 808 #else 809 /* Detect lazy purge runtime support. */ 810 if (pages_can_purge_lazy) { 811 bool committed = false; 812 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 813 if (madv_free_page == NULL) { 814 return true; 815 } 816 assert(pages_can_purge_lazy_runtime); 817 if (pages_purge_lazy(madv_free_page, PAGE)) { 818 pages_can_purge_lazy_runtime = false; 819 } 820 os_pages_unmap(madv_free_page, PAGE); 821 } 822 #endif 823 824 return false; 825 } 826