xref: /netbsd-src/external/bsd/jemalloc.old/dist/src/pages.c (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 
4 #include "jemalloc/internal/pages.h"
5 
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
7 
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
10 
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
13 #ifdef __FreeBSD__
14 #include <vm/vm_param.h>
15 #endif
16 #endif
17 #ifdef MAP_ALIGNED
18 #include <sys/bitops.h>	/* NetBSD */
19 #endif
20 
21 /******************************************************************************/
22 /* Data. */
23 
24 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
25 static size_t	os_page;
26 
27 #ifndef _WIN32
28 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
29 #  define PAGES_PROT_DECOMMIT (PROT_NONE)
30 static int	mmap_flags;
31 #endif
32 static bool	os_overcommits;
33 
34 const char *thp_mode_names[] = {
35 	"default",
36 	"always",
37 	"never",
38 	"not supported"
39 };
40 thp_mode_t opt_thp = THP_MODE_DEFAULT;
41 thp_mode_t init_system_thp_mode;
42 
43 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
44 static bool pages_can_purge_lazy_runtime = true;
45 
46 /******************************************************************************/
47 /*
48  * Function prototypes for static functions that are referenced prior to
49  * definition.
50  */
51 
52 static void os_pages_unmap(void *addr, size_t size);
53 
54 /******************************************************************************/
55 
56 static void *
57 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
58 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
59 	assert(ALIGNMENT_CEILING(size, os_page) == size);
60 	assert(size != 0);
61 
62 	if (os_overcommits) {
63 		*commit = true;
64 	}
65 
66 	void *ret;
67 #ifdef _WIN32
68 	/*
69 	 * If VirtualAlloc can't allocate at the given address when one is
70 	 * given, it fails and returns NULL.
71 	 */
72 	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
73 	    PAGE_READWRITE);
74 #else
75 	/*
76 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
77 	 * of existing mappings, and we only want to create new mappings.
78 	 */
79 	{
80 		int flags = mmap_flags;
81 #ifdef MAP_ALIGNED
82 		if (alignment > os_page || PAGE > os_page) {
83 			int a = ilog2(MAX(alignment, PAGE));
84 			flags |= MAP_ALIGNED(a);
85 		}
86 #endif
87 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
88 
89 		ret = mmap(addr, size, prot, flags, -1, 0);
90 	}
91 	assert(ret != NULL);
92 
93 	if (ret == MAP_FAILED) {
94 		ret = NULL;
95 	} else if (addr != NULL && ret != addr) {
96 		/*
97 		 * We succeeded in mapping memory, but not in the right place.
98 		 */
99 		os_pages_unmap(ret, size);
100 		ret = NULL;
101 	}
102 #endif
103 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
104 	    ret == addr));
105 	return ret;
106 }
107 
108 static void *
109 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
110     bool *commit) {
111 	void *ret = (void *)((uintptr_t)addr + leadsize);
112 
113 	assert(alloc_size >= leadsize + size);
114 #ifdef _WIN32
115 	os_pages_unmap(addr, alloc_size);
116 	void *new_addr = os_pages_map(ret, size, PAGE, commit);
117 	if (new_addr == ret) {
118 		return ret;
119 	}
120 	if (new_addr != NULL) {
121 		os_pages_unmap(new_addr, size);
122 	}
123 	return NULL;
124 #else
125 	size_t trailsize = alloc_size - leadsize - size;
126 
127 	if (leadsize != 0) {
128 		os_pages_unmap(addr, leadsize);
129 	}
130 	if (trailsize != 0) {
131 		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
132 	}
133 	return ret;
134 #endif
135 }
136 
137 static void
138 os_pages_unmap(void *addr, size_t size) {
139 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
140 	assert(ALIGNMENT_CEILING(size, os_page) == size);
141 
142 #ifdef _WIN32
143 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
144 #else
145 	if (munmap(addr, size) == -1)
146 #endif
147 	{
148 		char buf[BUFERROR_BUF];
149 
150 		buferror(get_errno(), buf, sizeof(buf));
151 		malloc_printf("<jemalloc>: Error in "
152 #ifdef _WIN32
153 		    "VirtualFree"
154 #else
155 		    "munmap"
156 #endif
157 		    "(): %s\n", buf);
158 		if (opt_abort) {
159 			abort();
160 		}
161 	}
162 }
163 
164 static void *
165 pages_map_slow(size_t size, size_t alignment, bool *commit) {
166 	size_t alloc_size = size + alignment - os_page;
167 	/* Beware size_t wrap-around. */
168 	if (alloc_size < size) {
169 		return NULL;
170 	}
171 
172 	void *ret;
173 	do {
174 		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
175 		if (pages == NULL) {
176 			return NULL;
177 		}
178 		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
179 		    - (uintptr_t)pages;
180 		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
181 	} while (ret == NULL);
182 
183 	assert(ret != NULL);
184 	assert(PAGE_ADDR2BASE(ret) == ret);
185 	return ret;
186 }
187 
188 void *
189 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
190 	assert(alignment >= PAGE);
191 	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
192 
193 	/*
194 	 * Ideally, there would be a way to specify alignment to mmap() (like
195 	 * NetBSD has), but in the absence of such a feature, we have to work
196 	 * hard to efficiently create aligned mappings.  The reliable, but
197 	 * slow method is to create a mapping that is over-sized, then trim the
198 	 * excess.  However, that always results in one or two calls to
199 	 * os_pages_unmap(), and it can leave holes in the process's virtual
200 	 * memory map if memory grows downward.
201 	 *
202 	 * Optimistically try mapping precisely the right amount before falling
203 	 * back to the slow method, with the expectation that the optimistic
204 	 * approach works most of the time.
205 	 */
206 
207 	void *ret = os_pages_map(addr, size, os_page, commit);
208 	if (ret == NULL || ret == addr) {
209 		return ret;
210 	}
211 	assert(addr == NULL);
212 	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
213 		os_pages_unmap(ret, size);
214 		return pages_map_slow(size, alignment, commit);
215 	}
216 
217 	assert(PAGE_ADDR2BASE(ret) == ret);
218 	return ret;
219 }
220 
221 void
222 pages_unmap(void *addr, size_t size) {
223 	assert(PAGE_ADDR2BASE(addr) == addr);
224 	assert(PAGE_CEILING(size) == size);
225 
226 	os_pages_unmap(addr, size);
227 }
228 
229 static bool
230 pages_commit_impl(void *addr, size_t size, bool commit) {
231 	assert(PAGE_ADDR2BASE(addr) == addr);
232 	assert(PAGE_CEILING(size) == size);
233 
234 	if (os_overcommits) {
235 		return true;
236 	}
237 
238 #ifdef _WIN32
239 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
240 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
241 #else
242 	{
243 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
244 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
245 		    -1, 0);
246 		if (result == MAP_FAILED) {
247 			return true;
248 		}
249 		if (result != addr) {
250 			/*
251 			 * We succeeded in mapping memory, but not in the right
252 			 * place.
253 			 */
254 			os_pages_unmap(result, size);
255 			return true;
256 		}
257 		return false;
258 	}
259 #endif
260 }
261 
262 bool
263 pages_commit(void *addr, size_t size) {
264 	return pages_commit_impl(addr, size, true);
265 }
266 
267 bool
268 pages_decommit(void *addr, size_t size) {
269 	return pages_commit_impl(addr, size, false);
270 }
271 
272 bool
273 pages_purge_lazy(void *addr, size_t size) {
274 	assert(PAGE_ADDR2BASE(addr) == addr);
275 	assert(PAGE_CEILING(size) == size);
276 
277 	if (!pages_can_purge_lazy) {
278 		return true;
279 	}
280 	if (!pages_can_purge_lazy_runtime) {
281 		/*
282 		 * Built with lazy purge enabled, but detected it was not
283 		 * supported on the current system.
284 		 */
285 		return true;
286 	}
287 
288 #ifdef _WIN32
289 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
290 	return false;
291 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
292 	return (madvise(addr, size,
293 #  ifdef MADV_FREE
294 	    MADV_FREE
295 #  else
296 	    JEMALLOC_MADV_FREE
297 #  endif
298 	    ) != 0);
299 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
300     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
301 	return (madvise(addr, size, MADV_DONTNEED) != 0);
302 #else
303 	not_reached();
304 #endif
305 }
306 
307 bool
308 pages_purge_forced(void *addr, size_t size) {
309 	assert(PAGE_ADDR2BASE(addr) == addr);
310 	assert(PAGE_CEILING(size) == size);
311 
312 	if (!pages_can_purge_forced) {
313 		return true;
314 	}
315 
316 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
317     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
318 	return (madvise(addr, size, MADV_DONTNEED) != 0);
319 #elif defined(JEMALLOC_MAPS_COALESCE)
320 	/* Try to overlay a new demand-zeroed mapping. */
321 	return pages_commit(addr, size);
322 #else
323 	not_reached();
324 #endif
325 }
326 
327 static bool
328 pages_huge_impl(void *addr, size_t size, bool aligned) {
329 	if (aligned) {
330 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
331 		assert(HUGEPAGE_CEILING(size) == size);
332 	}
333 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
334 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
335 #else
336 	return true;
337 #endif
338 }
339 
340 bool
341 pages_huge(void *addr, size_t size) {
342 	return pages_huge_impl(addr, size, true);
343 }
344 
345 static bool
346 pages_huge_unaligned(void *addr, size_t size) {
347 	return pages_huge_impl(addr, size, false);
348 }
349 
350 static bool
351 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
352 	if (aligned) {
353 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
354 		assert(HUGEPAGE_CEILING(size) == size);
355 	}
356 
357 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
358 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
359 #else
360 	return false;
361 #endif
362 }
363 
364 bool
365 pages_nohuge(void *addr, size_t size) {
366 	return pages_nohuge_impl(addr, size, true);
367 }
368 
369 static bool
370 pages_nohuge_unaligned(void *addr, size_t size) {
371 	return pages_nohuge_impl(addr, size, false);
372 }
373 
374 bool
375 pages_dontdump(void *addr, size_t size) {
376 	assert(PAGE_ADDR2BASE(addr) == addr);
377 	assert(PAGE_CEILING(size) == size);
378 #ifdef JEMALLOC_MADVISE_DONTDUMP
379 	return madvise(addr, size, MADV_DONTDUMP) != 0;
380 #else
381 	return false;
382 #endif
383 }
384 
385 bool
386 pages_dodump(void *addr, size_t size) {
387 	assert(PAGE_ADDR2BASE(addr) == addr);
388 	assert(PAGE_CEILING(size) == size);
389 #ifdef JEMALLOC_MADVISE_DONTDUMP
390 	return madvise(addr, size, MADV_DODUMP) != 0;
391 #else
392 	return false;
393 #endif
394 }
395 
396 
397 static size_t
398 os_page_detect(void) {
399 #ifdef _WIN32
400 	SYSTEM_INFO si;
401 	GetSystemInfo(&si);
402 	return si.dwPageSize;
403 #elif defined(__FreeBSD__)
404 	return getpagesize();
405 #else
406 	long result = sysconf(_SC_PAGESIZE);
407 	if (result == -1) {
408 		return LG_PAGE;
409 	}
410 	return (size_t)result;
411 #endif
412 }
413 
414 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
415 static bool
416 os_overcommits_sysctl(void) {
417 	int vm_overcommit;
418 	size_t sz;
419 
420 	sz = sizeof(vm_overcommit);
421 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
422 	int mib[2];
423 
424 	mib[0] = CTL_VM;
425 	mib[1] = VM_OVERCOMMIT;
426 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
427 		return false; /* Error. */
428 	}
429 #else
430 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
431 		return false; /* Error. */
432 	}
433 #endif
434 
435 	return ((vm_overcommit & 0x3) == 0);
436 }
437 #endif
438 
439 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
440 /*
441  * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
442  * reentry during bootstrapping if another library has interposed system call
443  * wrappers.
444  */
445 static bool
446 os_overcommits_proc(void) {
447 	int fd;
448 	char buf[1];
449 
450 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
451 	#if defined(O_CLOEXEC)
452 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
453 			O_CLOEXEC);
454 	#else
455 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
456 		if (fd != -1) {
457 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
458 		}
459 	#endif
460 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
461 	#if defined(O_CLOEXEC)
462 		fd = (int)syscall(SYS_openat,
463 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
464 	#else
465 		fd = (int)syscall(SYS_openat,
466 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
467 		if (fd != -1) {
468 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
469 		}
470 	#endif
471 #else
472 	#if defined(O_CLOEXEC)
473 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
474 	#else
475 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
476 		if (fd != -1) {
477 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
478 		}
479 	#endif
480 #endif
481 
482 	if (fd == -1) {
483 		return false; /* Error. */
484 	}
485 
486 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
487 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
488 	syscall(SYS_close, fd);
489 #else
490 	close(fd);
491 #endif
492 
493 	if (nread < 1) {
494 		return false; /* Error. */
495 	}
496 	/*
497 	 * /proc/sys/vm/overcommit_memory meanings:
498 	 * 0: Heuristic overcommit.
499 	 * 1: Always overcommit.
500 	 * 2: Never overcommit.
501 	 */
502 	return (buf[0] == '0' || buf[0] == '1');
503 }
504 #endif
505 
506 void
507 pages_set_thp_state (void *ptr, size_t size) {
508 	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
509 		return;
510 	}
511 	assert(opt_thp != thp_mode_not_supported &&
512 	    init_system_thp_mode != thp_mode_not_supported);
513 
514 	if (opt_thp == thp_mode_always
515 	    && init_system_thp_mode != thp_mode_never) {
516 		assert(init_system_thp_mode == thp_mode_default);
517 		pages_huge_unaligned(ptr, size);
518 	} else if (opt_thp == thp_mode_never) {
519 		assert(init_system_thp_mode == thp_mode_default ||
520 		    init_system_thp_mode == thp_mode_always);
521 		pages_nohuge_unaligned(ptr, size);
522 	}
523 }
524 
525 static void
526 init_thp_state(void) {
527 	if (!have_madvise_huge) {
528 		if (metadata_thp_enabled() && opt_abort) {
529 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
530 			abort();
531 		}
532 		goto label_error;
533 	}
534 
535 	static const char sys_state_madvise[] = "always [madvise] never\n";
536 	static const char sys_state_always[] = "[always] madvise never\n";
537 	static const char sys_state_never[] = "always madvise [never]\n";
538 	char buf[sizeof(sys_state_madvise)];
539 
540 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
541 	int fd = (int)syscall(SYS_open,
542 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
543 #else
544 	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
545 #endif
546 	if (fd == -1) {
547 		goto label_error;
548 	}
549 
550 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
551 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
552 	syscall(SYS_close, fd);
553 #else
554 	close(fd);
555 #endif
556 
557 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
558 		init_system_thp_mode = thp_mode_default;
559 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
560 		init_system_thp_mode = thp_mode_always;
561 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
562 		init_system_thp_mode = thp_mode_never;
563 	} else {
564 		goto label_error;
565 	}
566 	return;
567 label_error:
568 	opt_thp = init_system_thp_mode = thp_mode_not_supported;
569 }
570 
571 bool
572 pages_boot(void) {
573 	os_page = os_page_detect();
574 	if (os_page > PAGE) {
575 		malloc_write("<jemalloc>: Unsupported system page size\n");
576 		if (opt_abort) {
577 			abort();
578 		}
579 		return true;
580 	}
581 
582 #ifndef _WIN32
583 	mmap_flags = MAP_PRIVATE | MAP_ANON;
584 #endif
585 
586 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
587 	os_overcommits = os_overcommits_sysctl();
588 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
589 	os_overcommits = os_overcommits_proc();
590 #  ifdef MAP_NORESERVE
591 	if (os_overcommits) {
592 		mmap_flags |= MAP_NORESERVE;
593 	}
594 #  endif
595 #elif defined(__NetBSD__)
596 	os_overcommits = true;
597 #else
598 	os_overcommits = false;
599 #endif
600 
601 	init_thp_state();
602 
603 	/* Detect lazy purge runtime support. */
604 	if (pages_can_purge_lazy) {
605 		bool committed = false;
606 		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
607 		if (madv_free_page == NULL) {
608 			return true;
609 		}
610 		assert(pages_can_purge_lazy_runtime);
611 		if (pages_purge_lazy(madv_free_page, PAGE)) {
612 			pages_can_purge_lazy_runtime = false;
613 		}
614 		os_pages_unmap(madv_free_page, PAGE);
615 	}
616 
617 	return false;
618 }
619