xref: /llvm-project/compiler-rt/lib/memprof/memprof_allocator.cpp (revision 968e3b682362e46042a718036ea7a641909b6375)
1 //===-- memprof_allocator.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler, a memory profiler.
10 //
11 // Implementation of MemProf's memory allocator, which uses the allocator
12 // from sanitizer_common.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "memprof_allocator.h"
17 #include "memprof_mapping.h"
18 #include "memprof_mibmap.h"
19 #include "memprof_rawprofile.h"
20 #include "memprof_stack.h"
21 #include "memprof_thread.h"
22 #include "profile/MemProfData.inc"
23 #include "sanitizer_common/sanitizer_allocator_checks.h"
24 #include "sanitizer_common/sanitizer_allocator_interface.h"
25 #include "sanitizer_common/sanitizer_allocator_report.h"
26 #include "sanitizer_common/sanitizer_array_ref.h"
27 #include "sanitizer_common/sanitizer_common.h"
28 #include "sanitizer_common/sanitizer_errno.h"
29 #include "sanitizer_common/sanitizer_file.h"
30 #include "sanitizer_common/sanitizer_flags.h"
31 #include "sanitizer_common/sanitizer_internal_defs.h"
32 #include "sanitizer_common/sanitizer_stackdepot.h"
33 
34 #include <sched.h>
35 #include <time.h>
36 
37 #define MAX_HISTOGRAM_PRINT_SIZE 32U
38 
39 extern bool __memprof_histogram;
40 
41 namespace __memprof {
42 namespace {
43 using ::llvm::memprof::MemInfoBlock;
44 
45 void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
46   u64 p;
47 
48   if (print_terse) {
49     p = M.TotalSize * 100 / M.AllocCount;
50     Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, M.AllocCount, p / 100, p % 100,
51            M.MinSize, M.MaxSize);
52     p = M.TotalAccessCount * 100 / M.AllocCount;
53     Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, M.MinAccessCount,
54            M.MaxAccessCount);
55     p = M.TotalLifetime * 100 / M.AllocCount;
56     Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, M.MinLifetime,
57            M.MaxLifetime);
58     Printf("%u/%u/%u/%u\n", M.NumMigratedCpu, M.NumLifetimeOverlaps,
59            M.NumSameAllocCpu, M.NumSameDeallocCpu);
60   } else {
61     p = M.TotalSize * 100 / M.AllocCount;
62     Printf("Memory allocation stack id = %llu\n", id);
63     Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n",
64            M.AllocCount, p / 100, p % 100, M.MinSize, M.MaxSize);
65     p = M.TotalAccessCount * 100 / M.AllocCount;
66     Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", p / 100,
67            p % 100, M.MinAccessCount, M.MaxAccessCount);
68     p = M.TotalLifetime * 100 / M.AllocCount;
69     Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100,
70            p % 100, M.MinLifetime, M.MaxLifetime);
71     Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
72            "cpu: %u, num same dealloc_cpu: %u\n",
73            M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
74            M.NumSameDeallocCpu);
75     Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize);
76     uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
77                              ? MAX_HISTOGRAM_PRINT_SIZE
78                              : M.AccessHistogramSize;
79     for (size_t i = 0; i < PrintSize; ++i) {
80       Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
81     }
82     Printf("\n");
83   }
84 }
85 } // namespace
86 
87 static int GetCpuId(void) {
88   // _memprof_preinit is called via the preinit_array, which subsequently calls
89   // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
90   // will seg fault as the address of __vdso_getcpu will be null.
91   if (!memprof_inited)
92     return -1;
93   return sched_getcpu();
94 }
95 
96 // Compute the timestamp in ms.
97 static int GetTimestamp(void) {
98   // timespec_get will segfault if called from dl_init
99   if (!memprof_timestamp_inited) {
100     // By returning 0, this will be effectively treated as being
101     // timestamped at memprof init time (when memprof_init_timestamp_s
102     // is initialized).
103     return 0;
104   }
105   timespec ts;
106   clock_gettime(CLOCK_REALTIME, &ts);
107   return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000;
108 }
109 
110 static MemprofAllocator &get_allocator();
111 
112 // The memory chunk allocated from the underlying allocator looks like this:
113 // H H U U U U U U
114 //   H -- ChunkHeader (32 bytes)
115 //   U -- user memory.
116 
117 // If there is left padding before the ChunkHeader (due to use of memalign),
118 // we store a magic value in the first uptr word of the memory block and
119 // store the address of ChunkHeader in the next uptr.
120 // M B L L L L L L L L L  H H U U U U U U
121 //   |                    ^
122 //   ---------------------|
123 //   M -- magic value kAllocBegMagic
124 //   B -- address of ChunkHeader pointing to the first 'H'
125 
126 constexpr uptr kMaxAllowedMallocBits = 40;
127 
128 // Should be no more than 32-bytes
129 struct ChunkHeader {
130   // 1-st 4 bytes.
131   u32 alloc_context_id;
132   // 2-nd 4 bytes
133   u32 cpu_id;
134   // 3-rd 4 bytes
135   u32 timestamp_ms;
136   // 4-th 4 bytes
137   // Note only 1 bit is needed for this flag if we need space in the future for
138   // more fields.
139   u32 from_memalign;
140   // 5-th and 6-th 4 bytes
141   // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this
142   // could be shrunk to kMaxAllowedMallocBits if we need space in the future for
143   // more fields.
144   atomic_uint64_t user_requested_size;
145   // 23 bits available
146   // 7-th and 8-th 4 bytes
147   u64 data_type_id; // TODO: hash of type name
148 };
149 
150 static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
151 COMPILER_CHECK(kChunkHeaderSize == 32);
152 
153 struct MemprofChunk : ChunkHeader {
154   uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
155   uptr UsedSize() {
156     return atomic_load(&user_requested_size, memory_order_relaxed);
157   }
158   void *AllocBeg() {
159     if (from_memalign)
160       return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
161     return reinterpret_cast<void *>(this);
162   }
163 };
164 
165 class LargeChunkHeader {
166   static constexpr uptr kAllocBegMagic =
167       FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL);
168   atomic_uintptr_t magic;
169   MemprofChunk *chunk_header;
170 
171 public:
172   MemprofChunk *Get() const {
173     return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic
174                ? chunk_header
175                : nullptr;
176   }
177 
178   void Set(MemprofChunk *p) {
179     if (p) {
180       chunk_header = p;
181       atomic_store(&magic, kAllocBegMagic, memory_order_release);
182       return;
183     }
184 
185     uptr old = kAllocBegMagic;
186     if (!atomic_compare_exchange_strong(&magic, &old, 0,
187                                         memory_order_release)) {
188       CHECK_EQ(old, kAllocBegMagic);
189     }
190   }
191 };
192 
193 void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
194   // Since memprof's mapping is compacting, the shadow chunk may be
195   // not page-aligned, so we only flush the page-aligned portion.
196   ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
197 }
198 
199 void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
200   // Statistics.
201   MemprofStats &thread_stats = GetCurrentThreadStats();
202   thread_stats.mmaps++;
203   thread_stats.mmaped += size;
204 }
205 
206 void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
207   // We are about to unmap a chunk of user memory.
208   // Mark the corresponding shadow memory as not needed.
209   FlushUnneededMemProfShadowMemory(p, size);
210   // Statistics.
211   MemprofStats &thread_stats = GetCurrentThreadStats();
212   thread_stats.munmaps++;
213   thread_stats.munmaped += size;
214 }
215 
216 AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
217   CHECK(ms);
218   return &ms->allocator_cache;
219 }
220 
221 // Accumulates the access count from the shadow for the given pointer and size.
222 u64 GetShadowCount(uptr p, u32 size) {
223   u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
224   u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
225   u64 count = 0;
226   for (; shadow <= shadow_end; shadow++)
227     count += *shadow;
228   return count;
229 }
230 
231 // Accumulates the access count from the shadow for the given pointer and size.
232 // See memprof_mapping.h for an overview on histogram counters.
233 u64 GetShadowCountHistogram(uptr p, u32 size) {
234   u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
235   u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
236   u64 count = 0;
237   for (; shadow <= shadow_end; shadow++)
238     count += *shadow;
239   return count;
240 }
241 
242 // Clears the shadow counters (when memory is allocated).
243 void ClearShadow(uptr addr, uptr size) {
244   CHECK(AddrIsAlignedByGranularity(addr));
245   CHECK(AddrIsInMem(addr));
246   CHECK(AddrIsAlignedByGranularity(addr + size));
247   CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
248   CHECK(REAL(memset));
249   uptr shadow_beg;
250   uptr shadow_end;
251   if (__memprof_histogram) {
252     shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
253     shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
254   } else {
255     shadow_beg = MEM_TO_SHADOW(addr);
256     shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
257   }
258 
259   if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
260     REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
261   } else {
262     uptr page_size = GetPageSizeCached();
263     uptr page_beg = RoundUpTo(shadow_beg, page_size);
264     uptr page_end = RoundDownTo(shadow_end, page_size);
265 
266     if (page_beg >= page_end) {
267       REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
268     } else {
269       if (page_beg != shadow_beg) {
270         REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
271       }
272       if (page_end != shadow_end) {
273         REAL(memset)((void *)page_end, 0, shadow_end - page_end);
274       }
275       ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
276     }
277   }
278 }
279 
280 struct Allocator {
281   static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
282 
283   MemprofAllocator allocator;
284   StaticSpinMutex fallback_mutex;
285   AllocatorCache fallback_allocator_cache;
286 
287   uptr max_user_defined_malloc_size;
288 
289   // Holds the mapping of stack ids to MemInfoBlocks.
290   MIBMapTy MIBMap;
291 
292   atomic_uint8_t destructing;
293   atomic_uint8_t constructed;
294   bool print_text;
295 
296   // ------------------- Initialization ------------------------
297   explicit Allocator(LinkerInitialized) : print_text(flags()->print_text) {
298     atomic_store_relaxed(&destructing, 0);
299     atomic_store_relaxed(&constructed, 1);
300   }
301 
302   ~Allocator() {
303     atomic_store_relaxed(&destructing, 1);
304     if (flags()->dump_at_exit)
305       FinishAndWrite();
306   }
307 
308   static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value,
309                             void *Arg) {
310     SpinMutexLock l(&Value->mutex);
311     Print(Value->mib, Key, bool(Arg));
312   }
313 
314   // See memprof_mapping.h for an overview on histogram counters.
315   static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
316     if (__memprof_histogram) {
317       return CreateNewMIBWithHistogram(p, m, user_size);
318     } else {
319       return CreateNewMIBWithoutHistogram(p, m, user_size);
320     }
321   }
322 
323   static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
324                                                 u64 user_size) {
325 
326     u64 c = GetShadowCountHistogram(p, user_size);
327     long curtime = GetTimestamp();
328     uint32_t HistogramSize =
329         RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
330     uintptr_t Histogram =
331         (uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
332     memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
333     for (size_t i = 0; i < HistogramSize; ++i) {
334       u8 Counter =
335           *((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
336       ((uint64_t *)Histogram)[i] = (uint64_t)Counter;
337     }
338     MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
339                         GetCpuId(), Histogram, HistogramSize);
340     return newMIB;
341   }
342 
343   static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
344                                                    u64 user_size) {
345     u64 c = GetShadowCount(p, user_size);
346     long curtime = GetTimestamp();
347     MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
348                         GetCpuId(), 0, 0);
349     return newMIB;
350   }
351 
352   void FinishAndWrite() {
353     if (print_text && common_flags()->print_module_map)
354       DumpProcessMap();
355 
356     allocator.ForceLock();
357 
358     InsertLiveBlocks();
359     if (print_text) {
360       if (!flags()->print_terse)
361         Printf("Recorded MIBs (incl. live on exit):\n");
362       MIBMap.ForEach(PrintCallback,
363                      reinterpret_cast<void *>(flags()->print_terse));
364       StackDepotPrintAll();
365     } else {
366       // Serialize the contents to a raw profile. Format documented in
367       // memprof_rawprofile.h.
368       char *Buffer = nullptr;
369 
370       __sanitizer::ListOfModules List;
371       List.init();
372       ArrayRef<LoadedModule> Modules(List.begin(), List.end());
373       u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer);
374       CHECK(Buffer && BytesSerialized && "could not serialize to buffer");
375       report_file.Write(Buffer, BytesSerialized);
376     }
377 
378     allocator.ForceUnlock();
379   }
380 
381   // Inserts any blocks which have been allocated but not yet deallocated.
382   void InsertLiveBlocks() {
383     allocator.ForEachChunk(
384         [](uptr chunk, void *alloc) {
385           u64 user_requested_size;
386           Allocator *A = (Allocator *)alloc;
387           MemprofChunk *m =
388               A->GetMemprofChunk((void *)chunk, user_requested_size);
389           if (!m)
390             return;
391           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
392           MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
393           InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
394         },
395         this);
396   }
397 
398   void InitLinkerInitialized() {
399     SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
400     allocator.InitLinkerInitialized(
401         common_flags()->allocator_release_to_os_interval_ms);
402     max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
403                                        ? common_flags()->max_allocation_size_mb
404                                              << 20
405                                        : kMaxAllowedMallocSize;
406   }
407 
408   // -------------------- Allocation/Deallocation routines ---------------
409   void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
410                  AllocType alloc_type) {
411     if (UNLIKELY(!memprof_inited))
412       MemprofInitFromRtl();
413     if (UNLIKELY(IsRssLimitExceeded())) {
414       if (AllocatorMayReturnNull())
415         return nullptr;
416       ReportRssLimitExceeded(stack);
417     }
418     CHECK(stack);
419     const uptr min_alignment = MEMPROF_ALIGNMENT;
420     if (alignment < min_alignment)
421       alignment = min_alignment;
422     if (size == 0) {
423       // We'd be happy to avoid allocating memory for zero-size requests, but
424       // some programs/tests depend on this behavior and assume that malloc
425       // would not return NULL even for zero-size allocations. Moreover, it
426       // looks like operator new should never return NULL, and results of
427       // consecutive "new" calls must be different even if the allocated size
428       // is zero.
429       size = 1;
430     }
431     CHECK(IsPowerOfTwo(alignment));
432     uptr rounded_size = RoundUpTo(size, alignment);
433     uptr needed_size = rounded_size + kChunkHeaderSize;
434     if (alignment > min_alignment)
435       needed_size += alignment;
436     CHECK(IsAligned(needed_size, min_alignment));
437     if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
438         size > max_user_defined_malloc_size) {
439       if (AllocatorMayReturnNull()) {
440         Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", size);
441         return nullptr;
442       }
443       uptr malloc_limit =
444           Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
445       ReportAllocationSizeTooBig(size, malloc_limit, stack);
446     }
447 
448     MemprofThread *t = GetCurrentThread();
449     void *allocated;
450     if (t) {
451       AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
452       allocated = allocator.Allocate(cache, needed_size, 8);
453     } else {
454       SpinMutexLock l(&fallback_mutex);
455       AllocatorCache *cache = &fallback_allocator_cache;
456       allocated = allocator.Allocate(cache, needed_size, 8);
457     }
458     if (UNLIKELY(!allocated)) {
459       SetAllocatorOutOfMemory();
460       if (AllocatorMayReturnNull())
461         return nullptr;
462       ReportOutOfMemory(size, stack);
463     }
464 
465     uptr alloc_beg = reinterpret_cast<uptr>(allocated);
466     uptr alloc_end = alloc_beg + needed_size;
467     uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
468     uptr user_beg = beg_plus_header;
469     if (!IsAligned(user_beg, alignment))
470       user_beg = RoundUpTo(user_beg, alignment);
471     uptr user_end = user_beg + size;
472     CHECK_LE(user_end, alloc_end);
473     uptr chunk_beg = user_beg - kChunkHeaderSize;
474     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
475     m->from_memalign = alloc_beg != chunk_beg;
476     CHECK(size);
477 
478     m->cpu_id = GetCpuId();
479     m->timestamp_ms = GetTimestamp();
480     m->alloc_context_id = StackDepotPut(*stack);
481 
482     uptr size_rounded_down_to_granularity =
483         RoundDownTo(size, SHADOW_GRANULARITY);
484     if (size_rounded_down_to_granularity)
485       ClearShadow(user_beg, size_rounded_down_to_granularity);
486 
487     MemprofStats &thread_stats = GetCurrentThreadStats();
488     thread_stats.mallocs++;
489     thread_stats.malloced += size;
490     thread_stats.malloced_overhead += needed_size - size;
491     if (needed_size > SizeClassMap::kMaxSize)
492       thread_stats.malloc_large++;
493     else
494       thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
495 
496     void *res = reinterpret_cast<void *>(user_beg);
497     atomic_store(&m->user_requested_size, size, memory_order_release);
498     if (alloc_beg != chunk_beg) {
499       CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg);
500       reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m);
501     }
502     RunMallocHooks(res, size);
503     return res;
504   }
505 
506   void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
507                   BufferedStackTrace *stack, AllocType alloc_type) {
508     uptr p = reinterpret_cast<uptr>(ptr);
509     if (p == 0)
510       return;
511 
512     RunFreeHooks(ptr);
513 
514     uptr chunk_beg = p - kChunkHeaderSize;
515     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
516 
517     u64 user_requested_size =
518         atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
519     if (memprof_inited && atomic_load_relaxed(&constructed) &&
520         !atomic_load_relaxed(&destructing)) {
521       MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
522       InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
523     }
524 
525     MemprofStats &thread_stats = GetCurrentThreadStats();
526     thread_stats.frees++;
527     thread_stats.freed += user_requested_size;
528 
529     void *alloc_beg = m->AllocBeg();
530     if (alloc_beg != m) {
531       // Clear the magic value, as allocator internals may overwrite the
532       // contents of deallocated chunk, confusing GetMemprofChunk lookup.
533       reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr);
534     }
535 
536     MemprofThread *t = GetCurrentThread();
537     if (t) {
538       AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
539       allocator.Deallocate(cache, alloc_beg);
540     } else {
541       SpinMutexLock l(&fallback_mutex);
542       AllocatorCache *cache = &fallback_allocator_cache;
543       allocator.Deallocate(cache, alloc_beg);
544     }
545   }
546 
547   void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
548     CHECK(old_ptr && new_size);
549     uptr p = reinterpret_cast<uptr>(old_ptr);
550     uptr chunk_beg = p - kChunkHeaderSize;
551     MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
552 
553     MemprofStats &thread_stats = GetCurrentThreadStats();
554     thread_stats.reallocs++;
555     thread_stats.realloced += new_size;
556 
557     void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
558     if (new_ptr) {
559       CHECK_NE(REAL(memcpy), nullptr);
560       uptr memcpy_size = Min(new_size, m->UsedSize());
561       REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
562       Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
563     }
564     return new_ptr;
565   }
566 
567   void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
568     if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
569       if (AllocatorMayReturnNull())
570         return nullptr;
571       ReportCallocOverflow(nmemb, size, stack);
572     }
573     void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
574     // If the memory comes from the secondary allocator no need to clear it
575     // as it comes directly from mmap.
576     if (ptr && allocator.FromPrimary(ptr))
577       REAL(memset)(ptr, 0, nmemb * size);
578     return ptr;
579   }
580 
581   void CommitBack(MemprofThreadLocalMallocStorage *ms) {
582     AllocatorCache *ac = GetAllocatorCache(ms);
583     allocator.SwallowCache(ac);
584   }
585 
586   // -------------------------- Chunk lookup ----------------------
587 
588   // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
589   MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) {
590     if (!alloc_beg)
591       return nullptr;
592     MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get();
593     if (!p) {
594       if (!allocator.FromPrimary(alloc_beg))
595         return nullptr;
596       p = reinterpret_cast<MemprofChunk *>(alloc_beg);
597     }
598     // The size is reset to 0 on deallocation (and a min of 1 on
599     // allocation).
600     user_requested_size =
601         atomic_load(&p->user_requested_size, memory_order_acquire);
602     if (user_requested_size)
603       return p;
604     return nullptr;
605   }
606 
607   MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) {
608     void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
609     return GetMemprofChunk(alloc_beg, user_requested_size);
610   }
611 
612   uptr AllocationSize(uptr p) {
613     u64 user_requested_size;
614     MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size);
615     if (!m)
616       return 0;
617     if (m->Beg() != p)
618       return 0;
619     return user_requested_size;
620   }
621 
622   uptr AllocationSizeFast(uptr p) {
623     return reinterpret_cast<MemprofChunk *>(p - kChunkHeaderSize)->UsedSize();
624   }
625 
626   void Purge() { allocator.ForceReleaseToOS(); }
627 
628   void PrintStats() { allocator.PrintStats(); }
629 
630   void ForceLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
631     allocator.ForceLock();
632     fallback_mutex.Lock();
633   }
634 
635   void ForceUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
636     fallback_mutex.Unlock();
637     allocator.ForceUnlock();
638   }
639 };
640 
641 static Allocator instance(LINKER_INITIALIZED);
642 
643 static MemprofAllocator &get_allocator() { return instance.allocator; }
644 
645 void InitializeAllocator() { instance.InitLinkerInitialized(); }
646 
647 void MemprofThreadLocalMallocStorage::CommitBack() {
648   instance.CommitBack(this);
649 }
650 
651 void PrintInternalAllocatorStats() { instance.PrintStats(); }
652 
653 void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
654   instance.Deallocate(ptr, 0, 0, stack, alloc_type);
655 }
656 
657 void memprof_delete(void *ptr, uptr size, uptr alignment,
658                     BufferedStackTrace *stack, AllocType alloc_type) {
659   instance.Deallocate(ptr, size, alignment, stack, alloc_type);
660 }
661 
662 void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
663   return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
664 }
665 
666 void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
667   return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
668 }
669 
670 void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
671                            BufferedStackTrace *stack) {
672   if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
673     errno = errno_ENOMEM;
674     if (AllocatorMayReturnNull())
675       return nullptr;
676     ReportReallocArrayOverflow(nmemb, size, stack);
677   }
678   return memprof_realloc(p, nmemb * size, stack);
679 }
680 
681 void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
682   if (!p)
683     return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
684   if (size == 0) {
685     if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
686       instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
687       return nullptr;
688     }
689     // Allocate a size of 1 if we shouldn't free() on Realloc to 0
690     size = 1;
691   }
692   return SetErrnoOnNull(instance.Reallocate(p, size, stack));
693 }
694 
695 void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
696   return SetErrnoOnNull(
697       instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
698 }
699 
700 void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
701   uptr PageSize = GetPageSizeCached();
702   if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
703     errno = errno_ENOMEM;
704     if (AllocatorMayReturnNull())
705       return nullptr;
706     ReportPvallocOverflow(size, stack);
707   }
708   // pvalloc(0) should allocate one page.
709   size = size ? RoundUpTo(size, PageSize) : PageSize;
710   return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
711 }
712 
713 void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
714                        AllocType alloc_type) {
715   if (UNLIKELY(!IsPowerOfTwo(alignment))) {
716     errno = errno_EINVAL;
717     if (AllocatorMayReturnNull())
718       return nullptr;
719     ReportInvalidAllocationAlignment(alignment, stack);
720   }
721   return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
722 }
723 
724 void *memprof_aligned_alloc(uptr alignment, uptr size,
725                             BufferedStackTrace *stack) {
726   if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
727     errno = errno_EINVAL;
728     if (AllocatorMayReturnNull())
729       return nullptr;
730     ReportInvalidAlignedAllocAlignment(size, alignment, stack);
731   }
732   return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
733 }
734 
735 int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
736                            BufferedStackTrace *stack) {
737   if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
738     if (AllocatorMayReturnNull())
739       return errno_EINVAL;
740     ReportInvalidPosixMemalignAlignment(alignment, stack);
741   }
742   void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
743   if (UNLIKELY(!ptr))
744     // OOM error is already taken care of by Allocate.
745     return errno_ENOMEM;
746   CHECK(IsAligned((uptr)ptr, alignment));
747   *memptr = ptr;
748   return 0;
749 }
750 
751 static const void *memprof_malloc_begin(const void *p) {
752   u64 user_requested_size;
753   MemprofChunk *m =
754       instance.GetMemprofChunkByAddr((uptr)p, user_requested_size);
755   if (!m)
756     return nullptr;
757   if (user_requested_size == 0)
758     return nullptr;
759 
760   return (const void *)m->Beg();
761 }
762 
763 uptr memprof_malloc_usable_size(const void *ptr) {
764   if (!ptr)
765     return 0;
766   uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
767   return usable_size;
768 }
769 
770 } // namespace __memprof
771 
772 // ---------------------- Interface ---------------- {{{1
773 using namespace __memprof;
774 
775 uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
776 
777 int __sanitizer_get_ownership(const void *p) {
778   return memprof_malloc_usable_size(p) != 0;
779 }
780 
781 const void *__sanitizer_get_allocated_begin(const void *p) {
782   return memprof_malloc_begin(p);
783 }
784 
785 uptr __sanitizer_get_allocated_size(const void *p) {
786   return memprof_malloc_usable_size(p);
787 }
788 
789 uptr __sanitizer_get_allocated_size_fast(const void *p) {
790   DCHECK_EQ(p, __sanitizer_get_allocated_begin(p));
791   uptr ret = instance.AllocationSizeFast(reinterpret_cast<uptr>(p));
792   DCHECK_EQ(ret, __sanitizer_get_allocated_size(p));
793   return ret;
794 }
795 
796 void __sanitizer_purge_allocator() { instance.Purge(); }
797 
798 int __memprof_profile_dump() {
799   instance.FinishAndWrite();
800   // In the future we may want to return non-zero if there are any errors
801   // detected during the dumping process.
802   return 0;
803 }
804 
805 void __memprof_profile_reset() {
806   if (report_file.fd != kInvalidFd && report_file.fd != kStdoutFd &&
807       report_file.fd != kStderrFd) {
808     CloseFile(report_file.fd);
809     // Setting the file descriptor to kInvalidFd ensures that we will reopen the
810     // file when invoking Write again.
811     report_file.fd = kInvalidFd;
812   }
813 }
814