1*8e33eff8Schristos #define JEMALLOC_PAGES_C_ 2*8e33eff8Schristos #include "jemalloc/internal/jemalloc_preamble.h" 3*8e33eff8Schristos 4*8e33eff8Schristos #include "jemalloc/internal/pages.h" 5*8e33eff8Schristos 6*8e33eff8Schristos #include "jemalloc/internal/jemalloc_internal_includes.h" 7*8e33eff8Schristos 8*8e33eff8Schristos #include "jemalloc/internal/assert.h" 9*8e33eff8Schristos #include "jemalloc/internal/malloc_io.h" 10*8e33eff8Schristos 11*8e33eff8Schristos #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12*8e33eff8Schristos #include <sys/sysctl.h> 13*8e33eff8Schristos #ifdef __FreeBSD__ 14*8e33eff8Schristos #include <vm/vm_param.h> 15*8e33eff8Schristos #endif 16*8e33eff8Schristos #endif 17*8e33eff8Schristos #ifdef MAP_ALIGNED 18*8e33eff8Schristos #include <sys/bitops.h> /* NetBSD */ 19*8e33eff8Schristos #endif 20*8e33eff8Schristos 21*8e33eff8Schristos /******************************************************************************/ 22*8e33eff8Schristos /* Data. */ 23*8e33eff8Schristos 24*8e33eff8Schristos /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 25*8e33eff8Schristos static size_t os_page; 26*8e33eff8Schristos 27*8e33eff8Schristos #ifndef _WIN32 28*8e33eff8Schristos # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 29*8e33eff8Schristos # define PAGES_PROT_DECOMMIT (PROT_NONE) 30*8e33eff8Schristos static int mmap_flags; 31*8e33eff8Schristos #endif 32*8e33eff8Schristos static bool os_overcommits; 33*8e33eff8Schristos 34*8e33eff8Schristos const char *thp_mode_names[] = { 35*8e33eff8Schristos "default", 36*8e33eff8Schristos "always", 37*8e33eff8Schristos "never", 38*8e33eff8Schristos "not supported" 39*8e33eff8Schristos }; 40*8e33eff8Schristos thp_mode_t opt_thp = THP_MODE_DEFAULT; 41*8e33eff8Schristos thp_mode_t init_system_thp_mode; 42*8e33eff8Schristos 43*8e33eff8Schristos /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 44*8e33eff8Schristos static bool pages_can_purge_lazy_runtime = true; 45*8e33eff8Schristos 46*8e33eff8Schristos /******************************************************************************/ 47*8e33eff8Schristos /* 48*8e33eff8Schristos * Function prototypes for static functions that are referenced prior to 49*8e33eff8Schristos * definition. 50*8e33eff8Schristos */ 51*8e33eff8Schristos 52*8e33eff8Schristos static void os_pages_unmap(void *addr, size_t size); 53*8e33eff8Schristos 54*8e33eff8Schristos /******************************************************************************/ 55*8e33eff8Schristos 56*8e33eff8Schristos static void * 57*8e33eff8Schristos os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 58*8e33eff8Schristos assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 59*8e33eff8Schristos assert(ALIGNMENT_CEILING(size, os_page) == size); 60*8e33eff8Schristos assert(size != 0); 61*8e33eff8Schristos 62*8e33eff8Schristos if (os_overcommits) { 63*8e33eff8Schristos *commit = true; 64*8e33eff8Schristos } 65*8e33eff8Schristos 66*8e33eff8Schristos void *ret; 67*8e33eff8Schristos #ifdef _WIN32 68*8e33eff8Schristos /* 69*8e33eff8Schristos * If VirtualAlloc can't allocate at the given address when one is 70*8e33eff8Schristos * given, it fails and returns NULL. 71*8e33eff8Schristos */ 72*8e33eff8Schristos ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 73*8e33eff8Schristos PAGE_READWRITE); 74*8e33eff8Schristos #else 75*8e33eff8Schristos /* 76*8e33eff8Schristos * We don't use MAP_FIXED here, because it can cause the *replacement* 77*8e33eff8Schristos * of existing mappings, and we only want to create new mappings. 78*8e33eff8Schristos */ 79*8e33eff8Schristos { 80*8e33eff8Schristos int flags = mmap_flags; 81*8e33eff8Schristos #ifdef MAP_ALIGNED 82*8e33eff8Schristos if (alignment > os_page || PAGE > os_page) { 83*8e33eff8Schristos int a = ilog2(MAX(alignment, PAGE)); 84*8e33eff8Schristos flags |= MAP_ALIGNED(a); 85*8e33eff8Schristos } 86*8e33eff8Schristos #endif 87*8e33eff8Schristos int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 88*8e33eff8Schristos 89*8e33eff8Schristos ret = mmap(addr, size, prot, flags, -1, 0); 90*8e33eff8Schristos } 91*8e33eff8Schristos assert(ret != NULL); 92*8e33eff8Schristos 93*8e33eff8Schristos if (ret == MAP_FAILED) { 94*8e33eff8Schristos ret = NULL; 95*8e33eff8Schristos } else if (addr != NULL && ret != addr) { 96*8e33eff8Schristos /* 97*8e33eff8Schristos * We succeeded in mapping memory, but not in the right place. 98*8e33eff8Schristos */ 99*8e33eff8Schristos os_pages_unmap(ret, size); 100*8e33eff8Schristos ret = NULL; 101*8e33eff8Schristos } 102*8e33eff8Schristos #endif 103*8e33eff8Schristos assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 104*8e33eff8Schristos ret == addr)); 105*8e33eff8Schristos return ret; 106*8e33eff8Schristos } 107*8e33eff8Schristos 108*8e33eff8Schristos static void * 109*8e33eff8Schristos os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 110*8e33eff8Schristos bool *commit) { 111*8e33eff8Schristos void *ret = (void *)((uintptr_t)addr + leadsize); 112*8e33eff8Schristos 113*8e33eff8Schristos assert(alloc_size >= leadsize + size); 114*8e33eff8Schristos #ifdef _WIN32 115*8e33eff8Schristos os_pages_unmap(addr, alloc_size); 116*8e33eff8Schristos void *new_addr = os_pages_map(ret, size, PAGE, commit); 117*8e33eff8Schristos if (new_addr == ret) { 118*8e33eff8Schristos return ret; 119*8e33eff8Schristos } 120*8e33eff8Schristos if (new_addr != NULL) { 121*8e33eff8Schristos os_pages_unmap(new_addr, size); 122*8e33eff8Schristos } 123*8e33eff8Schristos return NULL; 124*8e33eff8Schristos #else 125*8e33eff8Schristos size_t trailsize = alloc_size - leadsize - size; 126*8e33eff8Schristos 127*8e33eff8Schristos if (leadsize != 0) { 128*8e33eff8Schristos os_pages_unmap(addr, leadsize); 129*8e33eff8Schristos } 130*8e33eff8Schristos if (trailsize != 0) { 131*8e33eff8Schristos os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 132*8e33eff8Schristos } 133*8e33eff8Schristos return ret; 134*8e33eff8Schristos #endif 135*8e33eff8Schristos } 136*8e33eff8Schristos 137*8e33eff8Schristos static void 138*8e33eff8Schristos os_pages_unmap(void *addr, size_t size) { 139*8e33eff8Schristos assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 140*8e33eff8Schristos assert(ALIGNMENT_CEILING(size, os_page) == size); 141*8e33eff8Schristos 142*8e33eff8Schristos #ifdef _WIN32 143*8e33eff8Schristos if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 144*8e33eff8Schristos #else 145*8e33eff8Schristos if (munmap(addr, size) == -1) 146*8e33eff8Schristos #endif 147*8e33eff8Schristos { 148*8e33eff8Schristos char buf[BUFERROR_BUF]; 149*8e33eff8Schristos 150*8e33eff8Schristos buferror(get_errno(), buf, sizeof(buf)); 151*8e33eff8Schristos malloc_printf("<jemalloc>: Error in " 152*8e33eff8Schristos #ifdef _WIN32 153*8e33eff8Schristos "VirtualFree" 154*8e33eff8Schristos #else 155*8e33eff8Schristos "munmap" 156*8e33eff8Schristos #endif 157*8e33eff8Schristos "(): %s\n", buf); 158*8e33eff8Schristos if (opt_abort) { 159*8e33eff8Schristos abort(); 160*8e33eff8Schristos } 161*8e33eff8Schristos } 162*8e33eff8Schristos } 163*8e33eff8Schristos 164*8e33eff8Schristos static void * 165*8e33eff8Schristos pages_map_slow(size_t size, size_t alignment, bool *commit) { 166*8e33eff8Schristos size_t alloc_size = size + alignment - os_page; 167*8e33eff8Schristos /* Beware size_t wrap-around. */ 168*8e33eff8Schristos if (alloc_size < size) { 169*8e33eff8Schristos return NULL; 170*8e33eff8Schristos } 171*8e33eff8Schristos 172*8e33eff8Schristos void *ret; 173*8e33eff8Schristos do { 174*8e33eff8Schristos void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 175*8e33eff8Schristos if (pages == NULL) { 176*8e33eff8Schristos return NULL; 177*8e33eff8Schristos } 178*8e33eff8Schristos size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 179*8e33eff8Schristos - (uintptr_t)pages; 180*8e33eff8Schristos ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 181*8e33eff8Schristos } while (ret == NULL); 182*8e33eff8Schristos 183*8e33eff8Schristos assert(ret != NULL); 184*8e33eff8Schristos assert(PAGE_ADDR2BASE(ret) == ret); 185*8e33eff8Schristos return ret; 186*8e33eff8Schristos } 187*8e33eff8Schristos 188*8e33eff8Schristos void * 189*8e33eff8Schristos pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 190*8e33eff8Schristos assert(alignment >= PAGE); 191*8e33eff8Schristos assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 192*8e33eff8Schristos 193*8e33eff8Schristos /* 194*8e33eff8Schristos * Ideally, there would be a way to specify alignment to mmap() (like 195*8e33eff8Schristos * NetBSD has), but in the absence of such a feature, we have to work 196*8e33eff8Schristos * hard to efficiently create aligned mappings. The reliable, but 197*8e33eff8Schristos * slow method is to create a mapping that is over-sized, then trim the 198*8e33eff8Schristos * excess. However, that always results in one or two calls to 199*8e33eff8Schristos * os_pages_unmap(), and it can leave holes in the process's virtual 200*8e33eff8Schristos * memory map if memory grows downward. 201*8e33eff8Schristos * 202*8e33eff8Schristos * Optimistically try mapping precisely the right amount before falling 203*8e33eff8Schristos * back to the slow method, with the expectation that the optimistic 204*8e33eff8Schristos * approach works most of the time. 205*8e33eff8Schristos */ 206*8e33eff8Schristos 207*8e33eff8Schristos void *ret = os_pages_map(addr, size, os_page, commit); 208*8e33eff8Schristos if (ret == NULL || ret == addr) { 209*8e33eff8Schristos return ret; 210*8e33eff8Schristos } 211*8e33eff8Schristos assert(addr == NULL); 212*8e33eff8Schristos if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 213*8e33eff8Schristos os_pages_unmap(ret, size); 214*8e33eff8Schristos return pages_map_slow(size, alignment, commit); 215*8e33eff8Schristos } 216*8e33eff8Schristos 217*8e33eff8Schristos assert(PAGE_ADDR2BASE(ret) == ret); 218*8e33eff8Schristos return ret; 219*8e33eff8Schristos } 220*8e33eff8Schristos 221*8e33eff8Schristos void 222*8e33eff8Schristos pages_unmap(void *addr, size_t size) { 223*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 224*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 225*8e33eff8Schristos 226*8e33eff8Schristos os_pages_unmap(addr, size); 227*8e33eff8Schristos } 228*8e33eff8Schristos 229*8e33eff8Schristos static bool 230*8e33eff8Schristos pages_commit_impl(void *addr, size_t size, bool commit) { 231*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 232*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 233*8e33eff8Schristos 234*8e33eff8Schristos if (os_overcommits) { 235*8e33eff8Schristos return true; 236*8e33eff8Schristos } 237*8e33eff8Schristos 238*8e33eff8Schristos #ifdef _WIN32 239*8e33eff8Schristos return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 240*8e33eff8Schristos PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 241*8e33eff8Schristos #else 242*8e33eff8Schristos { 243*8e33eff8Schristos int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 244*8e33eff8Schristos void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 245*8e33eff8Schristos -1, 0); 246*8e33eff8Schristos if (result == MAP_FAILED) { 247*8e33eff8Schristos return true; 248*8e33eff8Schristos } 249*8e33eff8Schristos if (result != addr) { 250*8e33eff8Schristos /* 251*8e33eff8Schristos * We succeeded in mapping memory, but not in the right 252*8e33eff8Schristos * place. 253*8e33eff8Schristos */ 254*8e33eff8Schristos os_pages_unmap(result, size); 255*8e33eff8Schristos return true; 256*8e33eff8Schristos } 257*8e33eff8Schristos return false; 258*8e33eff8Schristos } 259*8e33eff8Schristos #endif 260*8e33eff8Schristos } 261*8e33eff8Schristos 262*8e33eff8Schristos bool 263*8e33eff8Schristos pages_commit(void *addr, size_t size) { 264*8e33eff8Schristos return pages_commit_impl(addr, size, true); 265*8e33eff8Schristos } 266*8e33eff8Schristos 267*8e33eff8Schristos bool 268*8e33eff8Schristos pages_decommit(void *addr, size_t size) { 269*8e33eff8Schristos return pages_commit_impl(addr, size, false); 270*8e33eff8Schristos } 271*8e33eff8Schristos 272*8e33eff8Schristos bool 273*8e33eff8Schristos pages_purge_lazy(void *addr, size_t size) { 274*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 275*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 276*8e33eff8Schristos 277*8e33eff8Schristos if (!pages_can_purge_lazy) { 278*8e33eff8Schristos return true; 279*8e33eff8Schristos } 280*8e33eff8Schristos if (!pages_can_purge_lazy_runtime) { 281*8e33eff8Schristos /* 282*8e33eff8Schristos * Built with lazy purge enabled, but detected it was not 283*8e33eff8Schristos * supported on the current system. 284*8e33eff8Schristos */ 285*8e33eff8Schristos return true; 286*8e33eff8Schristos } 287*8e33eff8Schristos 288*8e33eff8Schristos #ifdef _WIN32 289*8e33eff8Schristos VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 290*8e33eff8Schristos return false; 291*8e33eff8Schristos #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 292*8e33eff8Schristos return (madvise(addr, size, 293*8e33eff8Schristos # ifdef MADV_FREE 294*8e33eff8Schristos MADV_FREE 295*8e33eff8Schristos # else 296*8e33eff8Schristos JEMALLOC_MADV_FREE 297*8e33eff8Schristos # endif 298*8e33eff8Schristos ) != 0); 299*8e33eff8Schristos #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 300*8e33eff8Schristos !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 301*8e33eff8Schristos return (madvise(addr, size, MADV_DONTNEED) != 0); 302*8e33eff8Schristos #else 303*8e33eff8Schristos not_reached(); 304*8e33eff8Schristos #endif 305*8e33eff8Schristos } 306*8e33eff8Schristos 307*8e33eff8Schristos bool 308*8e33eff8Schristos pages_purge_forced(void *addr, size_t size) { 309*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 310*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 311*8e33eff8Schristos 312*8e33eff8Schristos if (!pages_can_purge_forced) { 313*8e33eff8Schristos return true; 314*8e33eff8Schristos } 315*8e33eff8Schristos 316*8e33eff8Schristos #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 317*8e33eff8Schristos defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 318*8e33eff8Schristos return (madvise(addr, size, MADV_DONTNEED) != 0); 319*8e33eff8Schristos #elif defined(JEMALLOC_MAPS_COALESCE) 320*8e33eff8Schristos /* Try to overlay a new demand-zeroed mapping. */ 321*8e33eff8Schristos return pages_commit(addr, size); 322*8e33eff8Schristos #else 323*8e33eff8Schristos not_reached(); 324*8e33eff8Schristos #endif 325*8e33eff8Schristos } 326*8e33eff8Schristos 327*8e33eff8Schristos static bool 328*8e33eff8Schristos pages_huge_impl(void *addr, size_t size, bool aligned) { 329*8e33eff8Schristos if (aligned) { 330*8e33eff8Schristos assert(HUGEPAGE_ADDR2BASE(addr) == addr); 331*8e33eff8Schristos assert(HUGEPAGE_CEILING(size) == size); 332*8e33eff8Schristos } 333*8e33eff8Schristos #ifdef JEMALLOC_HAVE_MADVISE_HUGE 334*8e33eff8Schristos return (madvise(addr, size, MADV_HUGEPAGE) != 0); 335*8e33eff8Schristos #else 336*8e33eff8Schristos return true; 337*8e33eff8Schristos #endif 338*8e33eff8Schristos } 339*8e33eff8Schristos 340*8e33eff8Schristos bool 341*8e33eff8Schristos pages_huge(void *addr, size_t size) { 342*8e33eff8Schristos return pages_huge_impl(addr, size, true); 343*8e33eff8Schristos } 344*8e33eff8Schristos 345*8e33eff8Schristos static bool 346*8e33eff8Schristos pages_huge_unaligned(void *addr, size_t size) { 347*8e33eff8Schristos return pages_huge_impl(addr, size, false); 348*8e33eff8Schristos } 349*8e33eff8Schristos 350*8e33eff8Schristos static bool 351*8e33eff8Schristos pages_nohuge_impl(void *addr, size_t size, bool aligned) { 352*8e33eff8Schristos if (aligned) { 353*8e33eff8Schristos assert(HUGEPAGE_ADDR2BASE(addr) == addr); 354*8e33eff8Schristos assert(HUGEPAGE_CEILING(size) == size); 355*8e33eff8Schristos } 356*8e33eff8Schristos 357*8e33eff8Schristos #ifdef JEMALLOC_HAVE_MADVISE_HUGE 358*8e33eff8Schristos return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 359*8e33eff8Schristos #else 360*8e33eff8Schristos return false; 361*8e33eff8Schristos #endif 362*8e33eff8Schristos } 363*8e33eff8Schristos 364*8e33eff8Schristos bool 365*8e33eff8Schristos pages_nohuge(void *addr, size_t size) { 366*8e33eff8Schristos return pages_nohuge_impl(addr, size, true); 367*8e33eff8Schristos } 368*8e33eff8Schristos 369*8e33eff8Schristos static bool 370*8e33eff8Schristos pages_nohuge_unaligned(void *addr, size_t size) { 371*8e33eff8Schristos return pages_nohuge_impl(addr, size, false); 372*8e33eff8Schristos } 373*8e33eff8Schristos 374*8e33eff8Schristos bool 375*8e33eff8Schristos pages_dontdump(void *addr, size_t size) { 376*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 377*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 378*8e33eff8Schristos #ifdef JEMALLOC_MADVISE_DONTDUMP 379*8e33eff8Schristos return madvise(addr, size, MADV_DONTDUMP) != 0; 380*8e33eff8Schristos #else 381*8e33eff8Schristos return false; 382*8e33eff8Schristos #endif 383*8e33eff8Schristos } 384*8e33eff8Schristos 385*8e33eff8Schristos bool 386*8e33eff8Schristos pages_dodump(void *addr, size_t size) { 387*8e33eff8Schristos assert(PAGE_ADDR2BASE(addr) == addr); 388*8e33eff8Schristos assert(PAGE_CEILING(size) == size); 389*8e33eff8Schristos #ifdef JEMALLOC_MADVISE_DONTDUMP 390*8e33eff8Schristos return madvise(addr, size, MADV_DODUMP) != 0; 391*8e33eff8Schristos #else 392*8e33eff8Schristos return false; 393*8e33eff8Schristos #endif 394*8e33eff8Schristos } 395*8e33eff8Schristos 396*8e33eff8Schristos 397*8e33eff8Schristos static size_t 398*8e33eff8Schristos os_page_detect(void) { 399*8e33eff8Schristos #ifdef _WIN32 400*8e33eff8Schristos SYSTEM_INFO si; 401*8e33eff8Schristos GetSystemInfo(&si); 402*8e33eff8Schristos return si.dwPageSize; 403*8e33eff8Schristos #elif defined(__FreeBSD__) 404*8e33eff8Schristos return getpagesize(); 405*8e33eff8Schristos #else 406*8e33eff8Schristos long result = sysconf(_SC_PAGESIZE); 407*8e33eff8Schristos if (result == -1) { 408*8e33eff8Schristos return LG_PAGE; 409*8e33eff8Schristos } 410*8e33eff8Schristos return (size_t)result; 411*8e33eff8Schristos #endif 412*8e33eff8Schristos } 413*8e33eff8Schristos 414*8e33eff8Schristos #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 415*8e33eff8Schristos static bool 416*8e33eff8Schristos os_overcommits_sysctl(void) { 417*8e33eff8Schristos int vm_overcommit; 418*8e33eff8Schristos size_t sz; 419*8e33eff8Schristos 420*8e33eff8Schristos sz = sizeof(vm_overcommit); 421*8e33eff8Schristos #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 422*8e33eff8Schristos int mib[2]; 423*8e33eff8Schristos 424*8e33eff8Schristos mib[0] = CTL_VM; 425*8e33eff8Schristos mib[1] = VM_OVERCOMMIT; 426*8e33eff8Schristos if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 427*8e33eff8Schristos return false; /* Error. */ 428*8e33eff8Schristos } 429*8e33eff8Schristos #else 430*8e33eff8Schristos if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 431*8e33eff8Schristos return false; /* Error. */ 432*8e33eff8Schristos } 433*8e33eff8Schristos #endif 434*8e33eff8Schristos 435*8e33eff8Schristos return ((vm_overcommit & 0x3) == 0); 436*8e33eff8Schristos } 437*8e33eff8Schristos #endif 438*8e33eff8Schristos 439*8e33eff8Schristos #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 440*8e33eff8Schristos /* 441*8e33eff8Schristos * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 442*8e33eff8Schristos * reentry during bootstrapping if another library has interposed system call 443*8e33eff8Schristos * wrappers. 444*8e33eff8Schristos */ 445*8e33eff8Schristos static bool 446*8e33eff8Schristos os_overcommits_proc(void) { 447*8e33eff8Schristos int fd; 448*8e33eff8Schristos char buf[1]; 449*8e33eff8Schristos 450*8e33eff8Schristos #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 451*8e33eff8Schristos #if defined(O_CLOEXEC) 452*8e33eff8Schristos fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 453*8e33eff8Schristos O_CLOEXEC); 454*8e33eff8Schristos #else 455*8e33eff8Schristos fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 456*8e33eff8Schristos if (fd != -1) { 457*8e33eff8Schristos fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 458*8e33eff8Schristos } 459*8e33eff8Schristos #endif 460*8e33eff8Schristos #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 461*8e33eff8Schristos #if defined(O_CLOEXEC) 462*8e33eff8Schristos fd = (int)syscall(SYS_openat, 463*8e33eff8Schristos AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 464*8e33eff8Schristos #else 465*8e33eff8Schristos fd = (int)syscall(SYS_openat, 466*8e33eff8Schristos AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 467*8e33eff8Schristos if (fd != -1) { 468*8e33eff8Schristos fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 469*8e33eff8Schristos } 470*8e33eff8Schristos #endif 471*8e33eff8Schristos #else 472*8e33eff8Schristos #if defined(O_CLOEXEC) 473*8e33eff8Schristos fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 474*8e33eff8Schristos #else 475*8e33eff8Schristos fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 476*8e33eff8Schristos if (fd != -1) { 477*8e33eff8Schristos fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 478*8e33eff8Schristos } 479*8e33eff8Schristos #endif 480*8e33eff8Schristos #endif 481*8e33eff8Schristos 482*8e33eff8Schristos if (fd == -1) { 483*8e33eff8Schristos return false; /* Error. */ 484*8e33eff8Schristos } 485*8e33eff8Schristos 486*8e33eff8Schristos ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 487*8e33eff8Schristos #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 488*8e33eff8Schristos syscall(SYS_close, fd); 489*8e33eff8Schristos #else 490*8e33eff8Schristos close(fd); 491*8e33eff8Schristos #endif 492*8e33eff8Schristos 493*8e33eff8Schristos if (nread < 1) { 494*8e33eff8Schristos return false; /* Error. */ 495*8e33eff8Schristos } 496*8e33eff8Schristos /* 497*8e33eff8Schristos * /proc/sys/vm/overcommit_memory meanings: 498*8e33eff8Schristos * 0: Heuristic overcommit. 499*8e33eff8Schristos * 1: Always overcommit. 500*8e33eff8Schristos * 2: Never overcommit. 501*8e33eff8Schristos */ 502*8e33eff8Schristos return (buf[0] == '0' || buf[0] == '1'); 503*8e33eff8Schristos } 504*8e33eff8Schristos #endif 505*8e33eff8Schristos 506*8e33eff8Schristos void 507*8e33eff8Schristos pages_set_thp_state (void *ptr, size_t size) { 508*8e33eff8Schristos if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 509*8e33eff8Schristos return; 510*8e33eff8Schristos } 511*8e33eff8Schristos assert(opt_thp != thp_mode_not_supported && 512*8e33eff8Schristos init_system_thp_mode != thp_mode_not_supported); 513*8e33eff8Schristos 514*8e33eff8Schristos if (opt_thp == thp_mode_always 515*8e33eff8Schristos && init_system_thp_mode != thp_mode_never) { 516*8e33eff8Schristos assert(init_system_thp_mode == thp_mode_default); 517*8e33eff8Schristos pages_huge_unaligned(ptr, size); 518*8e33eff8Schristos } else if (opt_thp == thp_mode_never) { 519*8e33eff8Schristos assert(init_system_thp_mode == thp_mode_default || 520*8e33eff8Schristos init_system_thp_mode == thp_mode_always); 521*8e33eff8Schristos pages_nohuge_unaligned(ptr, size); 522*8e33eff8Schristos } 523*8e33eff8Schristos } 524*8e33eff8Schristos 525*8e33eff8Schristos static void 526*8e33eff8Schristos init_thp_state(void) { 527*8e33eff8Schristos if (!have_madvise_huge) { 528*8e33eff8Schristos if (metadata_thp_enabled() && opt_abort) { 529*8e33eff8Schristos malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 530*8e33eff8Schristos abort(); 531*8e33eff8Schristos } 532*8e33eff8Schristos goto label_error; 533*8e33eff8Schristos } 534*8e33eff8Schristos 535*8e33eff8Schristos static const char sys_state_madvise[] = "always [madvise] never\n"; 536*8e33eff8Schristos static const char sys_state_always[] = "[always] madvise never\n"; 537*8e33eff8Schristos static const char sys_state_never[] = "always madvise [never]\n"; 538*8e33eff8Schristos char buf[sizeof(sys_state_madvise)]; 539*8e33eff8Schristos 540*8e33eff8Schristos #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 541*8e33eff8Schristos int fd = (int)syscall(SYS_open, 542*8e33eff8Schristos "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 543*8e33eff8Schristos #else 544*8e33eff8Schristos int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 545*8e33eff8Schristos #endif 546*8e33eff8Schristos if (fd == -1) { 547*8e33eff8Schristos goto label_error; 548*8e33eff8Schristos } 549*8e33eff8Schristos 550*8e33eff8Schristos ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 551*8e33eff8Schristos #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 552*8e33eff8Schristos syscall(SYS_close, fd); 553*8e33eff8Schristos #else 554*8e33eff8Schristos close(fd); 555*8e33eff8Schristos #endif 556*8e33eff8Schristos 557*8e33eff8Schristos if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 558*8e33eff8Schristos init_system_thp_mode = thp_mode_default; 559*8e33eff8Schristos } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 560*8e33eff8Schristos init_system_thp_mode = thp_mode_always; 561*8e33eff8Schristos } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 562*8e33eff8Schristos init_system_thp_mode = thp_mode_never; 563*8e33eff8Schristos } else { 564*8e33eff8Schristos goto label_error; 565*8e33eff8Schristos } 566*8e33eff8Schristos return; 567*8e33eff8Schristos label_error: 568*8e33eff8Schristos opt_thp = init_system_thp_mode = thp_mode_not_supported; 569*8e33eff8Schristos } 570*8e33eff8Schristos 571*8e33eff8Schristos bool 572*8e33eff8Schristos pages_boot(void) { 573*8e33eff8Schristos os_page = os_page_detect(); 574*8e33eff8Schristos if (os_page > PAGE) { 575*8e33eff8Schristos malloc_write("<jemalloc>: Unsupported system page size\n"); 576*8e33eff8Schristos if (opt_abort) { 577*8e33eff8Schristos abort(); 578*8e33eff8Schristos } 579*8e33eff8Schristos return true; 580*8e33eff8Schristos } 581*8e33eff8Schristos 582*8e33eff8Schristos #ifndef _WIN32 583*8e33eff8Schristos mmap_flags = MAP_PRIVATE | MAP_ANON; 584*8e33eff8Schristos #endif 585*8e33eff8Schristos 586*8e33eff8Schristos #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 587*8e33eff8Schristos os_overcommits = os_overcommits_sysctl(); 588*8e33eff8Schristos #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 589*8e33eff8Schristos os_overcommits = os_overcommits_proc(); 590*8e33eff8Schristos # ifdef MAP_NORESERVE 591*8e33eff8Schristos if (os_overcommits) { 592*8e33eff8Schristos mmap_flags |= MAP_NORESERVE; 593*8e33eff8Schristos } 594*8e33eff8Schristos # endif 595*8e33eff8Schristos #elif defined(__NetBSD__) 596*8e33eff8Schristos os_overcommits = true; 597*8e33eff8Schristos #else 598*8e33eff8Schristos os_overcommits = false; 599*8e33eff8Schristos #endif 600*8e33eff8Schristos 601*8e33eff8Schristos init_thp_state(); 602*8e33eff8Schristos 603*8e33eff8Schristos /* Detect lazy purge runtime support. */ 604*8e33eff8Schristos if (pages_can_purge_lazy) { 605*8e33eff8Schristos bool committed = false; 606*8e33eff8Schristos void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 607*8e33eff8Schristos if (madv_free_page == NULL) { 608*8e33eff8Schristos return true; 609*8e33eff8Schristos } 610*8e33eff8Schristos assert(pages_can_purge_lazy_runtime); 611*8e33eff8Schristos if (pages_purge_lazy(madv_free_page, PAGE)) { 612*8e33eff8Schristos pages_can_purge_lazy_runtime = false; 613*8e33eff8Schristos } 614*8e33eff8Schristos os_pages_unmap(madv_free_page, PAGE); 615*8e33eff8Schristos } 616*8e33eff8Schristos 617*8e33eff8Schristos return false; 618*8e33eff8Schristos } 619