1*3cab2bb3Spatrick #include <stdio.h>
2*3cab2bb3Spatrick #include <vector>
3*3cab2bb3Spatrick #include <pthread.h>
4*3cab2bb3Spatrick #include <malloc.h>
5*3cab2bb3Spatrick #include <algorithm>
6*3cab2bb3Spatrick
7*3cab2bb3Spatrick using namespace std;
8*3cab2bb3Spatrick
9*3cab2bb3Spatrick const size_t kNumThreds = 16;
10*3cab2bb3Spatrick const size_t kNumIters = 1 << 23;
11*3cab2bb3Spatrick
break_optimization(void * arg)12*3cab2bb3Spatrick inline void break_optimization(void *arg) {
13*3cab2bb3Spatrick __asm__ __volatile__("" : : "r" (arg) : "memory");
14*3cab2bb3Spatrick }
15*3cab2bb3Spatrick
16*3cab2bb3Spatrick __attribute__((noinline))
MallocThread(void * t)17*3cab2bb3Spatrick static void *MallocThread(void *t) {
18*3cab2bb3Spatrick size_t total_malloced = 0, total_freed = 0;
19*3cab2bb3Spatrick size_t max_in_use = 0;
20*3cab2bb3Spatrick size_t tid = reinterpret_cast<size_t>(t);
21*3cab2bb3Spatrick vector<pair<char *, size_t> > allocated;
22*3cab2bb3Spatrick allocated.reserve(kNumIters);
23*3cab2bb3Spatrick for (size_t i = 1; i < kNumIters; i++) {
24*3cab2bb3Spatrick if ((i % (kNumIters / 4)) == 0 && tid == 0)
25*3cab2bb3Spatrick fprintf(stderr, " T[%ld] iter %ld\n", tid, i);
26*3cab2bb3Spatrick bool allocate = (i % 5) <= 2; // 60% malloc, 40% free
27*3cab2bb3Spatrick if (i > kNumIters / 4)
28*3cab2bb3Spatrick allocate = i % 2; // then switch to 50% malloc, 50% free
29*3cab2bb3Spatrick if (allocate) {
30*3cab2bb3Spatrick size_t size = 1 + (i % 200);
31*3cab2bb3Spatrick if ((i % 10001) == 0)
32*3cab2bb3Spatrick size *= 4096;
33*3cab2bb3Spatrick total_malloced += size;
34*3cab2bb3Spatrick char *x = new char[size];
35*3cab2bb3Spatrick x[0] = x[size - 1] = x[size / 2] = 0;
36*3cab2bb3Spatrick allocated.push_back(make_pair(x, size));
37*3cab2bb3Spatrick max_in_use = max(max_in_use, total_malloced - total_freed);
38*3cab2bb3Spatrick } else {
39*3cab2bb3Spatrick if (allocated.empty()) continue;
40*3cab2bb3Spatrick size_t slot = i % allocated.size();
41*3cab2bb3Spatrick char *p = allocated[slot].first;
42*3cab2bb3Spatrick p[0] = 0; // emulate last user touch of the block
43*3cab2bb3Spatrick size_t size = allocated[slot].second;
44*3cab2bb3Spatrick total_freed += size;
45*3cab2bb3Spatrick swap(allocated[slot], allocated.back());
46*3cab2bb3Spatrick allocated.pop_back();
47*3cab2bb3Spatrick delete [] p;
48*3cab2bb3Spatrick }
49*3cab2bb3Spatrick }
50*3cab2bb3Spatrick if (tid == 0)
51*3cab2bb3Spatrick fprintf(stderr, " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
52*3cab2bb3Spatrick tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
53*3cab2bb3Spatrick max_in_use >> 20);
54*3cab2bb3Spatrick for (size_t i = 0; i < allocated.size(); i++)
55*3cab2bb3Spatrick delete [] allocated[i].first;
56*3cab2bb3Spatrick return 0;
57*3cab2bb3Spatrick }
58*3cab2bb3Spatrick
59*3cab2bb3Spatrick template <int depth>
60*3cab2bb3Spatrick struct DeepStack {
61*3cab2bb3Spatrick __attribute__((noinline))
runDeepStack62*3cab2bb3Spatrick static void *run(void *t) {
63*3cab2bb3Spatrick break_optimization(0);
64*3cab2bb3Spatrick DeepStack<depth - 1>::run(t);
65*3cab2bb3Spatrick break_optimization(0);
66*3cab2bb3Spatrick return 0;
67*3cab2bb3Spatrick }
68*3cab2bb3Spatrick };
69*3cab2bb3Spatrick
70*3cab2bb3Spatrick template<>
71*3cab2bb3Spatrick struct DeepStack<0> {
runDeepStack72*3cab2bb3Spatrick static void *run(void *t) {
73*3cab2bb3Spatrick MallocThread(t);
74*3cab2bb3Spatrick return 0;
75*3cab2bb3Spatrick }
76*3cab2bb3Spatrick };
77*3cab2bb3Spatrick
78*3cab2bb3Spatrick // Build with -Dstandalone_malloc_test=main to make it a separate program.
standalone_malloc_test()79*3cab2bb3Spatrick int standalone_malloc_test() {
80*3cab2bb3Spatrick pthread_t t[kNumThreds];
81*3cab2bb3Spatrick for (size_t i = 0; i < kNumThreds; i++)
82*3cab2bb3Spatrick pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
83*3cab2bb3Spatrick for (size_t i = 0; i < kNumThreds; i++)
84*3cab2bb3Spatrick pthread_join(t[i], 0);
85*3cab2bb3Spatrick malloc_stats();
86*3cab2bb3Spatrick return 0;
87*3cab2bb3Spatrick }
88