1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===// 2 // 3 // This file is distributed under the University of Illinois Open Source 4 // License. See LICENSE.TXT for details. 5 // 6 //===----------------------------------------------------------------------===// 7 // 8 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc. 9 // 10 //===----------------------------------------------------------------------===// 11 12 #ifndef SANITIZER_ALLOCATOR_H 13 #define SANITIZER_ALLOCATOR_H 14 15 #include "sanitizer_internal_defs.h" 16 #include "sanitizer_common.h" 17 #include "sanitizer_libc.h" 18 #include "sanitizer_list.h" 19 #include "sanitizer_mutex.h" 20 #include "sanitizer_lfstack.h" 21 22 namespace __sanitizer { 23 24 // SizeClassMap maps allocation sizes into size classes and back. 25 // Class 0 corresponds to size 0. 26 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16). 27 // Next 8 classes: 256 + i * 32 (i = 1 to 8). 28 // Next 8 classes: 512 + i * 64 (i = 1 to 8). 29 // ... 30 // Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8). 31 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog. 32 // 33 // This structure of the size class map gives us: 34 // - Efficient table-free class-to-size and size-to-class functions. 35 // - Difference between two consequent size classes is betweed 12% and 6% 36 // 37 // This class also gives a hint to a thread-caching allocator about the amount 38 // of chunks that need to be cached per-thread: 39 // - kMaxNumCached is the maximal number of chunks per size class. 40 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class. 41 // 42 // Part of output of SizeClassMap::Print(): 43 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 44 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1 45 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2 46 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3 47 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4 48 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5 49 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6 50 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7 51 // 52 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8 53 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9 54 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10 55 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11 56 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12 57 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13 58 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14 59 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15 60 // 61 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16 62 // c17 => s: 288 diff: +32 12% l 8 cached: 227 65376; id 17 63 // c18 => s: 320 diff: +32 11% l 8 cached: 204 65280; id 18 64 // c19 => s: 352 diff: +32 10% l 8 cached: 186 65472; id 19 65 // c20 => s: 384 diff: +32 09% l 8 cached: 170 65280; id 20 66 // c21 => s: 416 diff: +32 08% l 8 cached: 157 65312; id 21 67 // c22 => s: 448 diff: +32 07% l 8 cached: 146 65408; id 22 68 // c23 => s: 480 diff: +32 07% l 8 cached: 136 65280; id 23 69 // 70 // c24 => s: 512 diff: +32 06% l 9 cached: 128 65536; id 24 71 // c25 => s: 576 diff: +64 12% l 9 cached: 113 65088; id 25 72 // c26 => s: 640 diff: +64 11% l 9 cached: 102 65280; id 26 73 // c27 => s: 704 diff: +64 10% l 9 cached: 93 65472; id 27 74 // c28 => s: 768 diff: +64 09% l 9 cached: 85 65280; id 28 75 // c29 => s: 832 diff: +64 08% l 9 cached: 78 64896; id 29 76 // c30 => s: 896 diff: +64 07% l 9 cached: 73 65408; id 30 77 // c31 => s: 960 diff: +64 07% l 9 cached: 68 65280; id 31 78 // 79 // c32 => s: 1024 diff: +64 06% l 10 cached: 64 65536; id 32 80 81 template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog, 82 uptr kMinBatchClassT> 83 class SizeClassMap { 84 static const uptr kMinSizeLog = 4; 85 static const uptr kMidSizeLog = kMinSizeLog + 4; 86 static const uptr kMinSize = 1 << kMinSizeLog; 87 static const uptr kMidSize = 1 << kMidSizeLog; 88 static const uptr kMidClass = kMidSize / kMinSize; 89 static const uptr S = 3; 90 static const uptr M = (1 << S) - 1; 91 92 public: 93 static const uptr kMaxNumCached = kMaxNumCachedT; 94 struct TransferBatch { 95 TransferBatch *next; 96 uptr count; 97 void *batch[kMaxNumCached]; 98 }; 99 100 static const uptr kMinBatchClass = kMinBatchClassT; 101 static const uptr kMaxSize = 1 << kMaxSizeLog; 102 static const uptr kNumClasses = 103 kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1; 104 COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256); 105 static const uptr kNumClassesRounded = 106 kNumClasses == 32 ? 32 : 107 kNumClasses <= 64 ? 64 : 108 kNumClasses <= 128 ? 128 : 256; 109 110 static uptr Size(uptr class_id) { 111 if (class_id <= kMidClass) 112 return kMinSize * class_id; 113 class_id -= kMidClass; 114 uptr t = kMidSize << (class_id >> S); 115 return t + (t >> S) * (class_id & M); 116 } 117 118 static uptr ClassID(uptr size) { 119 if (size <= kMidSize) 120 return (size + kMinSize - 1) >> kMinSizeLog; 121 if (size > kMaxSize) return 0; 122 uptr l = MostSignificantSetBitIndex(size); 123 uptr hbits = (size >> (l - S)) & M; 124 uptr lbits = size & ((1 << (l - S)) - 1); 125 uptr l1 = l - kMidSizeLog; 126 return kMidClass + (l1 << S) + hbits + (lbits > 0); 127 } 128 129 static uptr MaxCached(uptr class_id) { 130 if (class_id == 0) return 0; 131 uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id); 132 return Max<uptr>(1, Min(kMaxNumCached, n)); 133 } 134 135 static void Print() { 136 uptr prev_s = 0; 137 uptr total_cached = 0; 138 for (uptr i = 0; i < kNumClasses; i++) { 139 uptr s = Size(i); 140 if (s >= kMidSize / 2 && (s & (s - 1)) == 0) 141 Printf("\n"); 142 uptr d = s - prev_s; 143 uptr p = prev_s ? (d * 100 / prev_s) : 0; 144 uptr l = MostSignificantSetBitIndex(s); 145 uptr cached = MaxCached(i) * s; 146 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd " 147 "cached: %zd %zd; id %zd\n", 148 i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s)); 149 total_cached += cached; 150 prev_s = s; 151 } 152 Printf("Total cached: %zd\n", total_cached); 153 } 154 155 static void Validate() { 156 for (uptr c = 1; c < kNumClasses; c++) { 157 // Printf("Validate: c%zd\n", c); 158 uptr s = Size(c); 159 CHECK_EQ(ClassID(s), c); 160 if (c != kNumClasses - 1) 161 CHECK_EQ(ClassID(s + 1), c + 1); 162 CHECK_EQ(ClassID(s - 1), c); 163 if (c) 164 CHECK_GT(Size(c), Size(c-1)); 165 } 166 CHECK_EQ(ClassID(kMaxSize + 1), 0); 167 168 for (uptr s = 1; s <= kMaxSize; s++) { 169 uptr c = ClassID(s); 170 // Printf("s%zd => c%zd\n", s, c); 171 CHECK_LT(c, kNumClasses); 172 CHECK_GE(Size(c), s); 173 if (c > 0) 174 CHECK_LT(Size(c-1), s); 175 } 176 177 // TransferBatch for kMinBatchClass must fit into the block itself. 178 const uptr batch_size = sizeof(TransferBatch) 179 - sizeof(void*) // NOLINT 180 * (kMaxNumCached - MaxCached(kMinBatchClass)); 181 CHECK_LE(batch_size, Size(kMinBatchClass)); 182 // TransferBatch for kMinBatchClass-1 must not fit into the block itself. 183 const uptr batch_size1 = sizeof(TransferBatch) 184 - sizeof(void*) // NOLINT 185 * (kMaxNumCached - MaxCached(kMinBatchClass - 1)); 186 CHECK_GT(batch_size1, Size(kMinBatchClass - 1)); 187 } 188 }; 189 190 typedef SizeClassMap<17, 256, 16, FIRST_32_SECOND_64(25, 28)> 191 DefaultSizeClassMap; 192 typedef SizeClassMap<17, 64, 14, FIRST_32_SECOND_64(17, 20)> 193 CompactSizeClassMap; 194 template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache; 195 196 // Memory allocator statistics 197 enum AllocatorStat { 198 AllocatorStatMalloced, 199 AllocatorStatFreed, 200 AllocatorStatMmapped, 201 AllocatorStatUnmapped, 202 AllocatorStatCount 203 }; 204 205 typedef u64 AllocatorStatCounters[AllocatorStatCount]; 206 207 // Per-thread stats, live in per-thread cache. 208 class AllocatorStats { 209 public: 210 void Init() { 211 internal_memset(this, 0, sizeof(*this)); 212 } 213 214 void Add(AllocatorStat i, u64 v) { 215 v += atomic_load(&stats_[i], memory_order_relaxed); 216 atomic_store(&stats_[i], v, memory_order_relaxed); 217 } 218 219 void Set(AllocatorStat i, u64 v) { 220 atomic_store(&stats_[i], v, memory_order_relaxed); 221 } 222 223 u64 Get(AllocatorStat i) const { 224 return atomic_load(&stats_[i], memory_order_relaxed); 225 } 226 227 private: 228 friend class AllocatorGlobalStats; 229 AllocatorStats *next_; 230 AllocatorStats *prev_; 231 atomic_uint64_t stats_[AllocatorStatCount]; 232 }; 233 234 // Global stats, used for aggregation and querying. 235 class AllocatorGlobalStats : public AllocatorStats { 236 public: 237 void Init() { 238 internal_memset(this, 0, sizeof(*this)); 239 next_ = this; 240 prev_ = this; 241 } 242 243 void Register(AllocatorStats *s) { 244 SpinMutexLock l(&mu_); 245 s->next_ = next_; 246 s->prev_ = this; 247 next_->prev_ = s; 248 next_ = s; 249 } 250 251 void Unregister(AllocatorStats *s) { 252 SpinMutexLock l(&mu_); 253 s->prev_->next_ = s->next_; 254 s->next_->prev_ = s->prev_; 255 for (int i = 0; i < AllocatorStatCount; i++) 256 Add(AllocatorStat(i), s->Get(AllocatorStat(i))); 257 } 258 259 void Get(AllocatorStatCounters s) const { 260 internal_memset(s, 0, AllocatorStatCount * sizeof(u64)); 261 SpinMutexLock l(&mu_); 262 const AllocatorStats *stats = this; 263 for (;;) { 264 for (int i = 0; i < AllocatorStatCount; i++) 265 s[i] += stats->Get(AllocatorStat(i)); 266 stats = stats->next_; 267 if (stats == this) 268 break; 269 } 270 } 271 272 private: 273 mutable SpinMutex mu_; 274 }; 275 276 // Allocators call these callbacks on mmap/munmap. 277 struct NoOpMapUnmapCallback { 278 void OnMap(uptr p, uptr size) const { } 279 void OnUnmap(uptr p, uptr size) const { } 280 }; 281 282 // SizeClassAllocator64 -- allocator for 64-bit address space. 283 // 284 // Space: a portion of address space of kSpaceSize bytes starting at 285 // a fixed address (kSpaceBeg). Both constants are powers of two and 286 // kSpaceBeg is kSpaceSize-aligned. 287 // At the beginning the entire space is mprotect-ed, then small parts of it 288 // are mapped on demand. 289 // 290 // Region: a part of Space dedicated to a single size class. 291 // There are kNumClasses Regions of equal size. 292 // 293 // UserChunk: a piece of memory returned to user. 294 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk. 295 // 296 // A Region looks like this: 297 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1 298 template <const uptr kSpaceBeg, const uptr kSpaceSize, 299 const uptr kMetadataSize, class SizeClassMap, 300 class MapUnmapCallback = NoOpMapUnmapCallback> 301 class SizeClassAllocator64 { 302 public: 303 typedef typename SizeClassMap::TransferBatch Batch; 304 typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize, 305 SizeClassMap, MapUnmapCallback> ThisT; 306 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache; 307 308 void Init() { 309 CHECK_EQ(kSpaceBeg, 310 reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize))); 311 MapWithCallback(kSpaceEnd, AdditionalSize()); 312 } 313 314 void MapWithCallback(uptr beg, uptr size) { 315 CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size))); 316 MapUnmapCallback().OnMap(beg, size); 317 } 318 319 void UnmapWithCallback(uptr beg, uptr size) { 320 MapUnmapCallback().OnUnmap(beg, size); 321 UnmapOrDie(reinterpret_cast<void *>(beg), size); 322 } 323 324 static bool CanAllocate(uptr size, uptr alignment) { 325 return size <= SizeClassMap::kMaxSize && 326 alignment <= SizeClassMap::kMaxSize; 327 } 328 329 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c, 330 uptr class_id) { 331 CHECK_LT(class_id, kNumClasses); 332 RegionInfo *region = GetRegionInfo(class_id); 333 Batch *b = region->free_list.Pop(); 334 if (b == 0) 335 b = PopulateFreeList(stat, c, class_id, region); 336 region->n_allocated += b->count; 337 return b; 338 } 339 340 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) { 341 RegionInfo *region = GetRegionInfo(class_id); 342 region->free_list.Push(b); 343 region->n_freed += b->count; 344 } 345 346 static bool PointerIsMine(void *p) { 347 return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize; 348 } 349 350 static uptr GetSizeClass(void *p) { 351 return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded; 352 } 353 354 void *GetBlockBegin(void *p) { 355 uptr class_id = GetSizeClass(p); 356 uptr size = SizeClassMap::Size(class_id); 357 uptr chunk_idx = GetChunkIdx((uptr)p, size); 358 uptr reg_beg = (uptr)p & ~(kRegionSize - 1); 359 uptr beg = chunk_idx * size; 360 uptr next_beg = beg + size; 361 RegionInfo *region = GetRegionInfo(class_id); 362 if (region->mapped_user >= next_beg) 363 return reinterpret_cast<void*>(reg_beg + beg); 364 return 0; 365 } 366 367 static uptr GetActuallyAllocatedSize(void *p) { 368 CHECK(PointerIsMine(p)); 369 return SizeClassMap::Size(GetSizeClass(p)); 370 } 371 372 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } 373 374 void *GetMetaData(void *p) { 375 uptr class_id = GetSizeClass(p); 376 uptr size = SizeClassMap::Size(class_id); 377 uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size); 378 return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) - 379 (1 + chunk_idx) * kMetadataSize); 380 } 381 382 uptr TotalMemoryUsed() { 383 uptr res = 0; 384 for (uptr i = 0; i < kNumClasses; i++) 385 res += GetRegionInfo(i)->allocated_user; 386 return res; 387 } 388 389 // Test-only. 390 void TestOnlyUnmap() { 391 UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize()); 392 } 393 394 void PrintStats() { 395 uptr total_mapped = 0; 396 uptr n_allocated = 0; 397 uptr n_freed = 0; 398 for (uptr class_id = 1; class_id < kNumClasses; class_id++) { 399 RegionInfo *region = GetRegionInfo(class_id); 400 total_mapped += region->mapped_user; 401 n_allocated += region->n_allocated; 402 n_freed += region->n_freed; 403 } 404 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; " 405 "remains %zd\n", 406 total_mapped >> 20, n_allocated, n_allocated - n_freed); 407 for (uptr class_id = 1; class_id < kNumClasses; class_id++) { 408 RegionInfo *region = GetRegionInfo(class_id); 409 if (region->mapped_user == 0) continue; 410 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n", 411 class_id, 412 SizeClassMap::Size(class_id), 413 region->mapped_user >> 10, 414 region->n_allocated, 415 region->n_allocated - region->n_freed); 416 } 417 } 418 419 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone 420 // introspection API. 421 void ForceLock() { 422 for (uptr i = 0; i < kNumClasses; i++) { 423 GetRegionInfo(i)->mutex.Lock(); 424 } 425 } 426 427 void ForceUnlock() { 428 for (int i = (int)kNumClasses - 1; i >= 0; i--) { 429 GetRegionInfo(i)->mutex.Unlock(); 430 } 431 } 432 433 typedef SizeClassMap SizeClassMapT; 434 static const uptr kNumClasses = SizeClassMap::kNumClasses; 435 static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded; 436 437 private: 438 static const uptr kRegionSize = kSpaceSize / kNumClassesRounded; 439 static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize; 440 COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0); 441 // kRegionSize must be >= 2^32. 442 COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2))); 443 // Populate the free list with at most this number of bytes at once 444 // or with one element if its size is greater. 445 static const uptr kPopulateSize = 1 << 14; 446 // Call mmap for user memory with at least this size. 447 static const uptr kUserMapSize = 1 << 16; 448 // Call mmap for metadata memory with at least this size. 449 static const uptr kMetaMapSize = 1 << 16; 450 451 struct RegionInfo { 452 BlockingMutex mutex; 453 LFStack<Batch> free_list; 454 uptr allocated_user; // Bytes allocated for user memory. 455 uptr allocated_meta; // Bytes allocated for metadata. 456 uptr mapped_user; // Bytes mapped for user memory. 457 uptr mapped_meta; // Bytes mapped for metadata. 458 uptr n_allocated, n_freed; // Just stats. 459 }; 460 COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize); 461 462 static uptr AdditionalSize() { 463 return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded, 464 GetPageSizeCached()); 465 } 466 467 RegionInfo *GetRegionInfo(uptr class_id) { 468 CHECK_LT(class_id, kNumClasses); 469 RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize); 470 return ®ions[class_id]; 471 } 472 473 static uptr GetChunkIdx(uptr chunk, uptr size) { 474 u32 offset = chunk % kRegionSize; 475 // Here we divide by a non-constant. This is costly. 476 // We require that kRegionSize is at least 2^32 so that offset is 32-bit. 477 // We save 2x by using 32-bit div, but may need to use a 256-way switch. 478 return offset / (u32)size; 479 } 480 481 NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c, 482 uptr class_id, RegionInfo *region) { 483 BlockingMutexLock l(®ion->mutex); 484 Batch *b = region->free_list.Pop(); 485 if (b) 486 return b; 487 uptr size = SizeClassMap::Size(class_id); 488 uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1; 489 uptr beg_idx = region->allocated_user; 490 uptr end_idx = beg_idx + count * size; 491 uptr region_beg = kSpaceBeg + kRegionSize * class_id; 492 if (end_idx + size > region->mapped_user) { 493 // Do the mmap for the user memory. 494 uptr map_size = kUserMapSize; 495 while (end_idx + size > region->mapped_user + map_size) 496 map_size += kUserMapSize; 497 CHECK_GE(region->mapped_user + map_size, end_idx); 498 MapWithCallback(region_beg + region->mapped_user, map_size); 499 stat->Add(AllocatorStatMmapped, map_size); 500 region->mapped_user += map_size; 501 } 502 uptr total_count = (region->mapped_user - beg_idx - size) 503 / size / count * count; 504 region->allocated_meta += total_count * kMetadataSize; 505 if (region->allocated_meta > region->mapped_meta) { 506 uptr map_size = kMetaMapSize; 507 while (region->allocated_meta > region->mapped_meta + map_size) 508 map_size += kMetaMapSize; 509 // Do the mmap for the metadata. 510 CHECK_GE(region->mapped_meta + map_size, region->allocated_meta); 511 MapWithCallback(region_beg + kRegionSize - 512 region->mapped_meta - map_size, map_size); 513 region->mapped_meta += map_size; 514 } 515 CHECK_LE(region->allocated_meta, region->mapped_meta); 516 if (region->allocated_user + region->allocated_meta > kRegionSize) { 517 Printf("Out of memory. Dying.\n"); 518 Printf("The process has exhausted %zuMB for size class %zu.\n", 519 kRegionSize / 1024 / 1024, size); 520 Die(); 521 } 522 for (;;) { 523 if (class_id < SizeClassMap::kMinBatchClass) 524 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch))); 525 else 526 b = (Batch*)(region_beg + beg_idx); 527 b->count = count; 528 for (uptr i = 0; i < count; i++) 529 b->batch[i] = (void*)(region_beg + beg_idx + i * size); 530 region->allocated_user += count * size; 531 CHECK_LE(region->allocated_user, region->mapped_user); 532 beg_idx += count * size; 533 if (beg_idx + count * size + size > region->mapped_user) 534 break; 535 region->free_list.Push(b); 536 } 537 return b; 538 } 539 }; 540 541 // SizeClassAllocator32 -- allocator for 32-bit address space. 542 // This allocator can theoretically be used on 64-bit arch, but there it is less 543 // efficient than SizeClassAllocator64. 544 // 545 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can 546 // be returned by MmapOrDie(). 547 // 548 // Region: 549 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize). 550 // Since the regions are aligned by kRegionSize, there are exactly 551 // kNumPossibleRegions possible regions in the address space and so we keep 552 // an u8 array possible_regions[kNumPossibleRegions] to store the size classes. 553 // 0 size class means the region is not used by the allocator. 554 // 555 // One Region is used to allocate chunks of a single size class. 556 // A Region looks like this: 557 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1 558 // 559 // In order to avoid false sharing the objects of this class should be 560 // chache-line aligned. 561 template <const uptr kSpaceBeg, const u64 kSpaceSize, 562 const uptr kMetadataSize, class SizeClassMap, 563 class MapUnmapCallback = NoOpMapUnmapCallback> 564 class SizeClassAllocator32 { 565 public: 566 typedef typename SizeClassMap::TransferBatch Batch; 567 typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize, 568 SizeClassMap, MapUnmapCallback> ThisT; 569 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache; 570 571 void Init() { 572 state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State))); 573 } 574 575 void *MapWithCallback(uptr size) { 576 size = RoundUpTo(size, GetPageSizeCached()); 577 void *res = MmapOrDie(size, "SizeClassAllocator32"); 578 MapUnmapCallback().OnMap((uptr)res, size); 579 return res; 580 } 581 582 void UnmapWithCallback(uptr beg, uptr size) { 583 MapUnmapCallback().OnUnmap(beg, size); 584 UnmapOrDie(reinterpret_cast<void *>(beg), size); 585 } 586 587 static bool CanAllocate(uptr size, uptr alignment) { 588 return size <= SizeClassMap::kMaxSize && 589 alignment <= SizeClassMap::kMaxSize; 590 } 591 592 void *GetMetaData(void *p) { 593 CHECK(PointerIsMine(p)); 594 uptr mem = reinterpret_cast<uptr>(p); 595 uptr beg = ComputeRegionBeg(mem); 596 uptr size = SizeClassMap::Size(GetSizeClass(p)); 597 u32 offset = mem - beg; 598 uptr n = offset / (u32)size; // 32-bit division 599 uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize; 600 return reinterpret_cast<void*>(meta); 601 } 602 603 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c, 604 uptr class_id) { 605 CHECK_LT(class_id, kNumClasses); 606 SizeClassInfo *sci = GetSizeClassInfo(class_id); 607 SpinMutexLock l(&sci->mutex); 608 if (sci->free_list.empty()) 609 PopulateFreeList(stat, c, sci, class_id); 610 CHECK(!sci->free_list.empty()); 611 Batch *b = sci->free_list.front(); 612 sci->free_list.pop_front(); 613 return b; 614 } 615 616 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) { 617 CHECK_LT(class_id, kNumClasses); 618 SizeClassInfo *sci = GetSizeClassInfo(class_id); 619 SpinMutexLock l(&sci->mutex); 620 sci->free_list.push_front(b); 621 } 622 623 bool PointerIsMine(void *p) { 624 return GetSizeClass(p) != 0; 625 } 626 627 uptr GetSizeClass(void *p) { 628 return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))]; 629 } 630 631 void *GetBlockBegin(void *p) { 632 CHECK(PointerIsMine(p)); 633 uptr mem = reinterpret_cast<uptr>(p); 634 uptr beg = ComputeRegionBeg(mem); 635 uptr size = SizeClassMap::Size(GetSizeClass(p)); 636 u32 offset = mem - beg; 637 u32 n = offset / (u32)size; // 32-bit division 638 uptr res = beg + (n * (u32)size); 639 return reinterpret_cast<void*>(res); 640 } 641 642 uptr GetActuallyAllocatedSize(void *p) { 643 CHECK(PointerIsMine(p)); 644 return SizeClassMap::Size(GetSizeClass(p)); 645 } 646 647 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } 648 649 uptr TotalMemoryUsed() { 650 // No need to lock here. 651 uptr res = 0; 652 for (uptr i = 0; i < kNumPossibleRegions; i++) 653 if (state_->possible_regions[i]) 654 res += kRegionSize; 655 return res; 656 } 657 658 void TestOnlyUnmap() { 659 for (uptr i = 0; i < kNumPossibleRegions; i++) 660 if (state_->possible_regions[i]) 661 UnmapWithCallback((i * kRegionSize), kRegionSize); 662 UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State)); 663 } 664 665 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone 666 // introspection API. 667 void ForceLock() { 668 for (uptr i = 0; i < kNumClasses; i++) { 669 GetSizeClassInfo(i)->mutex.Lock(); 670 } 671 } 672 673 void ForceUnlock() { 674 for (int i = kNumClasses - 1; i >= 0; i--) { 675 GetSizeClassInfo(i)->mutex.Unlock(); 676 } 677 } 678 679 void PrintStats() { 680 } 681 682 typedef SizeClassMap SizeClassMapT; 683 static const uptr kNumClasses = SizeClassMap::kNumClasses; 684 685 private: 686 static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20; 687 static const uptr kRegionSize = 1 << kRegionSizeLog; 688 static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize; 689 690 struct SizeClassInfo { 691 SpinMutex mutex; 692 IntrusiveList<Batch> free_list; 693 char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)]; 694 }; 695 COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize); 696 697 uptr ComputeRegionId(uptr mem) { 698 uptr res = mem >> kRegionSizeLog; 699 CHECK_LT(res, kNumPossibleRegions); 700 return res; 701 } 702 703 uptr ComputeRegionBeg(uptr mem) { 704 return mem & ~(kRegionSize - 1); 705 } 706 707 uptr AllocateRegion(AllocatorStats *stat, uptr class_id) { 708 CHECK_LT(class_id, kNumClasses); 709 uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize, 710 "SizeClassAllocator32")); 711 MapUnmapCallback().OnMap(res, kRegionSize); 712 stat->Add(AllocatorStatMmapped, kRegionSize); 713 CHECK_EQ(0U, (res & (kRegionSize - 1))); 714 CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]); 715 state_->possible_regions[ComputeRegionId(res)] = class_id; 716 return res; 717 } 718 719 SizeClassInfo *GetSizeClassInfo(uptr class_id) { 720 CHECK_LT(class_id, kNumClasses); 721 return &state_->size_class_info_array[class_id]; 722 } 723 724 void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c, 725 SizeClassInfo *sci, uptr class_id) { 726 uptr size = SizeClassMap::Size(class_id); 727 uptr reg = AllocateRegion(stat, class_id); 728 uptr n_chunks = kRegionSize / (size + kMetadataSize); 729 uptr max_count = SizeClassMap::MaxCached(class_id); 730 Batch *b = 0; 731 for (uptr i = reg; i < reg + n_chunks * size; i += size) { 732 if (b == 0) { 733 if (class_id < SizeClassMap::kMinBatchClass) 734 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch))); 735 else 736 b = (Batch*)i; 737 b->count = 0; 738 } 739 b->batch[b->count++] = (void*)i; 740 if (b->count == max_count) { 741 sci->free_list.push_back(b); 742 b = 0; 743 } 744 } 745 if (b) 746 sci->free_list.push_back(b); 747 } 748 749 struct State { 750 u8 possible_regions[kNumPossibleRegions]; 751 SizeClassInfo size_class_info_array[kNumClasses]; 752 }; 753 State *state_; 754 }; 755 756 // Objects of this type should be used as local caches for SizeClassAllocator64 757 // or SizeClassAllocator32. Since the typical use of this class is to have one 758 // object per thread in TLS, is has to be POD. 759 template<class SizeClassAllocator> 760 struct SizeClassAllocatorLocalCache { 761 typedef SizeClassAllocator Allocator; 762 static const uptr kNumClasses = SizeClassAllocator::kNumClasses; 763 764 void Init(AllocatorGlobalStats *s) { 765 stats_.Init(); 766 if (s) 767 s->Register(&stats_); 768 } 769 770 void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) { 771 Drain(allocator); 772 if (s) 773 s->Unregister(&stats_); 774 } 775 776 void *Allocate(SizeClassAllocator *allocator, uptr class_id) { 777 CHECK_NE(class_id, 0UL); 778 CHECK_LT(class_id, kNumClasses); 779 stats_.Add(AllocatorStatMalloced, SizeClassMap::Size(class_id)); 780 PerClass *c = &per_class_[class_id]; 781 if (UNLIKELY(c->count == 0)) 782 Refill(allocator, class_id); 783 void *res = c->batch[--c->count]; 784 PREFETCH(c->batch[c->count - 1]); 785 return res; 786 } 787 788 void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) { 789 CHECK_NE(class_id, 0UL); 790 CHECK_LT(class_id, kNumClasses); 791 stats_.Add(AllocatorStatFreed, SizeClassMap::Size(class_id)); 792 PerClass *c = &per_class_[class_id]; 793 if (UNLIKELY(c->count == c->max_count)) 794 Drain(allocator, class_id); 795 c->batch[c->count++] = p; 796 } 797 798 void Drain(SizeClassAllocator *allocator) { 799 for (uptr class_id = 0; class_id < kNumClasses; class_id++) { 800 PerClass *c = &per_class_[class_id]; 801 while (c->count > 0) 802 Drain(allocator, class_id); 803 } 804 } 805 806 // private: 807 typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap; 808 typedef typename SizeClassMap::TransferBatch Batch; 809 struct PerClass { 810 uptr count; 811 uptr max_count; 812 void *batch[2 * SizeClassMap::kMaxNumCached]; 813 }; 814 PerClass per_class_[kNumClasses]; 815 AllocatorStats stats_; 816 817 void InitCache() { 818 if (per_class_[0].max_count) 819 return; 820 for (uptr i = 0; i < kNumClasses; i++) { 821 PerClass *c = &per_class_[i]; 822 c->max_count = 2 * SizeClassMap::MaxCached(i); 823 } 824 } 825 826 NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) { 827 InitCache(); 828 PerClass *c = &per_class_[class_id]; 829 Batch *b = allocator->AllocateBatch(&stats_, this, class_id); 830 CHECK_GT(b->count, 0); 831 for (uptr i = 0; i < b->count; i++) 832 c->batch[i] = b->batch[i]; 833 c->count = b->count; 834 if (class_id < SizeClassMap::kMinBatchClass) 835 Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b); 836 } 837 838 NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) { 839 InitCache(); 840 PerClass *c = &per_class_[class_id]; 841 Batch *b; 842 if (class_id < SizeClassMap::kMinBatchClass) 843 b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch))); 844 else 845 b = (Batch*)c->batch[0]; 846 uptr cnt = Min(c->max_count / 2, c->count); 847 for (uptr i = 0; i < cnt; i++) { 848 b->batch[i] = c->batch[i]; 849 c->batch[i] = c->batch[i + c->max_count / 2]; 850 } 851 b->count = cnt; 852 c->count -= cnt; 853 allocator->DeallocateBatch(&stats_, class_id, b); 854 } 855 }; 856 857 // This class can (de)allocate only large chunks of memory using mmap/unmap. 858 // The main purpose of this allocator is to cover large and rare allocation 859 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64). 860 template <class MapUnmapCallback = NoOpMapUnmapCallback> 861 class LargeMmapAllocator { 862 public: 863 void Init() { 864 internal_memset(this, 0, sizeof(*this)); 865 page_size_ = GetPageSizeCached(); 866 } 867 868 void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) { 869 CHECK(IsPowerOfTwo(alignment)); 870 uptr map_size = RoundUpMapSize(size); 871 if (alignment > page_size_) 872 map_size += alignment; 873 if (map_size < size) return 0; // Overflow. 874 uptr map_beg = reinterpret_cast<uptr>( 875 MmapOrDie(map_size, "LargeMmapAllocator")); 876 MapUnmapCallback().OnMap(map_beg, map_size); 877 uptr map_end = map_beg + map_size; 878 uptr res = map_beg + page_size_; 879 if (res & (alignment - 1)) // Align. 880 res += alignment - (res & (alignment - 1)); 881 CHECK_EQ(0, res & (alignment - 1)); 882 CHECK_LE(res + size, map_end); 883 Header *h = GetHeader(res); 884 h->size = size; 885 h->map_beg = map_beg; 886 h->map_size = map_size; 887 uptr size_log = MostSignificantSetBitIndex(map_size); 888 CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log)); 889 { 890 SpinMutexLock l(&mutex_); 891 uptr idx = n_chunks_++; 892 CHECK_LT(idx, kMaxNumChunks); 893 h->chunk_idx = idx; 894 chunks_[idx] = h; 895 stats.n_allocs++; 896 stats.currently_allocated += map_size; 897 stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated); 898 stats.by_size_log[size_log]++; 899 stat->Add(AllocatorStatMalloced, map_size); 900 stat->Add(AllocatorStatMmapped, map_size); 901 } 902 return reinterpret_cast<void*>(res); 903 } 904 905 void Deallocate(AllocatorStats *stat, void *p) { 906 Header *h = GetHeader(p); 907 { 908 SpinMutexLock l(&mutex_); 909 uptr idx = h->chunk_idx; 910 CHECK_EQ(chunks_[idx], h); 911 CHECK_LT(idx, n_chunks_); 912 chunks_[idx] = chunks_[n_chunks_ - 1]; 913 chunks_[idx]->chunk_idx = idx; 914 n_chunks_--; 915 stats.n_frees++; 916 stats.currently_allocated -= h->map_size; 917 stat->Add(AllocatorStatFreed, h->map_size); 918 stat->Add(AllocatorStatUnmapped, h->map_size); 919 } 920 MapUnmapCallback().OnUnmap(h->map_beg, h->map_size); 921 UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size); 922 } 923 924 uptr TotalMemoryUsed() { 925 SpinMutexLock l(&mutex_); 926 uptr res = 0; 927 for (uptr i = 0; i < n_chunks_; i++) { 928 Header *h = chunks_[i]; 929 CHECK_EQ(h->chunk_idx, i); 930 res += RoundUpMapSize(h->size); 931 } 932 return res; 933 } 934 935 bool PointerIsMine(void *p) { 936 return GetBlockBegin(p) != 0; 937 } 938 939 uptr GetActuallyAllocatedSize(void *p) { 940 return RoundUpTo(GetHeader(p)->size, page_size_); 941 } 942 943 // At least page_size_/2 metadata bytes is available. 944 void *GetMetaData(void *p) { 945 // Too slow: CHECK_EQ(p, GetBlockBegin(p)); 946 CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_)); 947 return GetHeader(p) + 1; 948 } 949 950 void *GetBlockBegin(void *ptr) { 951 uptr p = reinterpret_cast<uptr>(ptr); 952 SpinMutexLock l(&mutex_); 953 uptr nearest_chunk = 0; 954 // Cache-friendly linear search. 955 for (uptr i = 0; i < n_chunks_; i++) { 956 uptr ch = reinterpret_cast<uptr>(chunks_[i]); 957 if (p < ch) continue; // p is at left to this chunk, skip it. 958 if (p - ch < p - nearest_chunk) 959 nearest_chunk = ch; 960 } 961 if (!nearest_chunk) 962 return 0; 963 Header *h = reinterpret_cast<Header *>(nearest_chunk); 964 CHECK_GE(nearest_chunk, h->map_beg); 965 CHECK_LT(nearest_chunk, h->map_beg + h->map_size); 966 CHECK_LE(nearest_chunk, p); 967 if (h->map_beg + h->map_size < p) 968 return 0; 969 return GetUser(h); 970 } 971 972 void PrintStats() { 973 Printf("Stats: LargeMmapAllocator: allocated %zd times, " 974 "remains %zd (%zd K) max %zd M; by size logs: ", 975 stats.n_allocs, stats.n_allocs - stats.n_frees, 976 stats.currently_allocated >> 10, stats.max_allocated >> 20); 977 for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) { 978 uptr c = stats.by_size_log[i]; 979 if (!c) continue; 980 Printf("%zd:%zd; ", i, c); 981 } 982 Printf("\n"); 983 } 984 985 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone 986 // introspection API. 987 void ForceLock() { 988 mutex_.Lock(); 989 } 990 991 void ForceUnlock() { 992 mutex_.Unlock(); 993 } 994 995 private: 996 static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18); 997 struct Header { 998 uptr map_beg; 999 uptr map_size; 1000 uptr size; 1001 uptr chunk_idx; 1002 }; 1003 1004 Header *GetHeader(uptr p) { 1005 CHECK_EQ(p % page_size_, 0); 1006 return reinterpret_cast<Header*>(p - page_size_); 1007 } 1008 Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); } 1009 1010 void *GetUser(Header *h) { 1011 CHECK_EQ((uptr)h % page_size_, 0); 1012 return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_); 1013 } 1014 1015 uptr RoundUpMapSize(uptr size) { 1016 return RoundUpTo(size, page_size_) + page_size_; 1017 } 1018 1019 uptr page_size_; 1020 Header *chunks_[kMaxNumChunks]; 1021 uptr n_chunks_; 1022 struct Stats { 1023 uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64]; 1024 } stats; 1025 SpinMutex mutex_; 1026 }; 1027 1028 // This class implements a complete memory allocator by using two 1029 // internal allocators: 1030 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments). 1031 // When allocating 2^x bytes it should return 2^x aligned chunk. 1032 // PrimaryAllocator is used via a local AllocatorCache. 1033 // SecondaryAllocator can allocate anything, but is not efficient. 1034 template <class PrimaryAllocator, class AllocatorCache, 1035 class SecondaryAllocator> // NOLINT 1036 class CombinedAllocator { 1037 public: 1038 void Init() { 1039 primary_.Init(); 1040 secondary_.Init(); 1041 stats_.Init(); 1042 } 1043 1044 void *Allocate(AllocatorCache *cache, uptr size, uptr alignment, 1045 bool cleared = false) { 1046 // Returning 0 on malloc(0) may break a lot of code. 1047 if (size == 0) 1048 size = 1; 1049 if (size + alignment < size) 1050 return 0; 1051 if (alignment > 8) 1052 size = RoundUpTo(size, alignment); 1053 void *res; 1054 if (primary_.CanAllocate(size, alignment)) 1055 res = cache->Allocate(&primary_, primary_.ClassID(size)); 1056 else 1057 res = secondary_.Allocate(&stats_, size, alignment); 1058 if (alignment > 8) 1059 CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0); 1060 if (cleared && res) 1061 internal_memset(res, 0, size); 1062 return res; 1063 } 1064 1065 void Deallocate(AllocatorCache *cache, void *p) { 1066 if (!p) return; 1067 if (primary_.PointerIsMine(p)) 1068 cache->Deallocate(&primary_, primary_.GetSizeClass(p), p); 1069 else 1070 secondary_.Deallocate(&stats_, p); 1071 } 1072 1073 void *Reallocate(AllocatorCache *cache, void *p, uptr new_size, 1074 uptr alignment) { 1075 if (!p) 1076 return Allocate(cache, new_size, alignment); 1077 if (!new_size) { 1078 Deallocate(cache, p); 1079 return 0; 1080 } 1081 CHECK(PointerIsMine(p)); 1082 uptr old_size = GetActuallyAllocatedSize(p); 1083 uptr memcpy_size = Min(new_size, old_size); 1084 void *new_p = Allocate(cache, new_size, alignment); 1085 if (new_p) 1086 internal_memcpy(new_p, p, memcpy_size); 1087 Deallocate(cache, p); 1088 return new_p; 1089 } 1090 1091 bool PointerIsMine(void *p) { 1092 if (primary_.PointerIsMine(p)) 1093 return true; 1094 return secondary_.PointerIsMine(p); 1095 } 1096 1097 bool FromPrimary(void *p) { 1098 return primary_.PointerIsMine(p); 1099 } 1100 1101 void *GetMetaData(void *p) { 1102 if (primary_.PointerIsMine(p)) 1103 return primary_.GetMetaData(p); 1104 return secondary_.GetMetaData(p); 1105 } 1106 1107 void *GetBlockBegin(void *p) { 1108 if (primary_.PointerIsMine(p)) 1109 return primary_.GetBlockBegin(p); 1110 return secondary_.GetBlockBegin(p); 1111 } 1112 1113 uptr GetActuallyAllocatedSize(void *p) { 1114 if (primary_.PointerIsMine(p)) 1115 return primary_.GetActuallyAllocatedSize(p); 1116 return secondary_.GetActuallyAllocatedSize(p); 1117 } 1118 1119 uptr TotalMemoryUsed() { 1120 return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed(); 1121 } 1122 1123 void TestOnlyUnmap() { primary_.TestOnlyUnmap(); } 1124 1125 void InitCache(AllocatorCache *cache) { 1126 cache->Init(&stats_); 1127 } 1128 1129 void DestroyCache(AllocatorCache *cache) { 1130 cache->Destroy(&primary_, &stats_); 1131 } 1132 1133 void SwallowCache(AllocatorCache *cache) { 1134 cache->Drain(&primary_); 1135 } 1136 1137 void GetStats(AllocatorStatCounters s) const { 1138 stats_.Get(s); 1139 } 1140 1141 void PrintStats() { 1142 primary_.PrintStats(); 1143 secondary_.PrintStats(); 1144 } 1145 1146 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone 1147 // introspection API. 1148 void ForceLock() { 1149 primary_.ForceLock(); 1150 secondary_.ForceLock(); 1151 } 1152 1153 void ForceUnlock() { 1154 secondary_.ForceUnlock(); 1155 primary_.ForceUnlock(); 1156 } 1157 1158 private: 1159 PrimaryAllocator primary_; 1160 SecondaryAllocator secondary_; 1161 AllocatorGlobalStats stats_; 1162 }; 1163 1164 // Returns true if calloc(size, n) should return 0 due to overflow in size*n. 1165 bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n); 1166 1167 } // namespace __sanitizer 1168 1169 #endif // SANITIZER_ALLOCATOR_H 1170