xref: /openbsd-src/gnu/llvm/compiler-rt/lib/tsan/benchmarks/vts_many_threads_bench.cpp (revision 3cab2bb3f667058bece8e38b12449a63a9d73c4b)
1*3cab2bb3Spatrick // Mini-benchmark for tsan VTS worst case performance
2*3cab2bb3Spatrick // Idea:
3*3cab2bb3Spatrick // 1) Spawn M + N threads (M >> N)
4*3cab2bb3Spatrick //    We'll call the 'M' threads as 'garbage threads'.
5*3cab2bb3Spatrick // 2) Make sure all threads have created thus no TIDs were reused
6*3cab2bb3Spatrick // 3) Join the garbage threads
7*3cab2bb3Spatrick // 4) Do many sync operations on the remaining N threads
8*3cab2bb3Spatrick //
9*3cab2bb3Spatrick // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
10*3cab2bb3Spatrick // when N is large.
11*3cab2bb3Spatrick //
12*3cab2bb3Spatrick // Some numbers:
13*3cab2bb3Spatrick // a) clang++ native O1 with n_iterations=200kk takes
14*3cab2bb3Spatrick //      5s regardless of M
15*3cab2bb3Spatrick //    clang++ tsanv2 O1 with n_iterations=20kk takes
16*3cab2bb3Spatrick //      23.5s with M=200
17*3cab2bb3Spatrick //      11.5s with M=1
18*3cab2bb3Spatrick //    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
19*3cab2bb3Spatrick // b) g++ native O1 with n_iterations=200kk takes
20*3cab2bb3Spatrick //      5.5s regardless of M
21*3cab2bb3Spatrick //    g++ tsanv1 O1 with n_iterations=2kk takes
22*3cab2bb3Spatrick //      39.5s with M=200
23*3cab2bb3Spatrick //      20.5s with M=1
24*3cab2bb3Spatrick //    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
25*3cab2bb3Spatrick 
26*3cab2bb3Spatrick #include <assert.h>
27*3cab2bb3Spatrick #include <pthread.h>
28*3cab2bb3Spatrick #include <stdio.h>
29*3cab2bb3Spatrick #include <stdlib.h>
30*3cab2bb3Spatrick 
31*3cab2bb3Spatrick class __attribute__((aligned(64))) Mutex {
32*3cab2bb3Spatrick  public:
Mutex()33*3cab2bb3Spatrick   Mutex()  { pthread_mutex_init(&m_, NULL); }
~Mutex()34*3cab2bb3Spatrick   ~Mutex() { pthread_mutex_destroy(&m_); }
Lock()35*3cab2bb3Spatrick   void Lock() { pthread_mutex_lock(&m_); }
Unlock()36*3cab2bb3Spatrick   void Unlock() { pthread_mutex_unlock(&m_); }
37*3cab2bb3Spatrick 
38*3cab2bb3Spatrick  private:
39*3cab2bb3Spatrick   pthread_mutex_t m_;
40*3cab2bb3Spatrick };
41*3cab2bb3Spatrick 
42*3cab2bb3Spatrick const int kNumMutexes = 1024;
43*3cab2bb3Spatrick Mutex mutexes[kNumMutexes];
44*3cab2bb3Spatrick 
45*3cab2bb3Spatrick int n_threads, n_iterations;
46*3cab2bb3Spatrick 
47*3cab2bb3Spatrick pthread_barrier_t all_threads_ready, main_threads_ready;
48*3cab2bb3Spatrick 
GarbageThread(void * unused)49*3cab2bb3Spatrick void* GarbageThread(void *unused) {
50*3cab2bb3Spatrick   pthread_barrier_wait(&all_threads_ready);
51*3cab2bb3Spatrick   return 0;
52*3cab2bb3Spatrick }
53*3cab2bb3Spatrick 
Thread(void * arg)54*3cab2bb3Spatrick void *Thread(void *arg) {
55*3cab2bb3Spatrick   long idx = (long)arg;
56*3cab2bb3Spatrick   pthread_barrier_wait(&all_threads_ready);
57*3cab2bb3Spatrick 
58*3cab2bb3Spatrick   // Wait for the main thread to join the garbage threads.
59*3cab2bb3Spatrick   pthread_barrier_wait(&main_threads_ready);
60*3cab2bb3Spatrick 
61*3cab2bb3Spatrick   printf("Thread %ld go!\n", idx);
62*3cab2bb3Spatrick   int offset = idx * kNumMutexes / n_threads;
63*3cab2bb3Spatrick   for (int i = 0; i < n_iterations; i++) {
64*3cab2bb3Spatrick     mutexes[(offset + i) % kNumMutexes].Lock();
65*3cab2bb3Spatrick     mutexes[(offset + i) % kNumMutexes].Unlock();
66*3cab2bb3Spatrick   }
67*3cab2bb3Spatrick   printf("Thread %ld done\n", idx);
68*3cab2bb3Spatrick   return 0;
69*3cab2bb3Spatrick }
70*3cab2bb3Spatrick 
main(int argc,char ** argv)71*3cab2bb3Spatrick int main(int argc, char **argv) {
72*3cab2bb3Spatrick   int n_garbage_threads;
73*3cab2bb3Spatrick   if (argc == 1) {
74*3cab2bb3Spatrick     n_threads = 2;
75*3cab2bb3Spatrick     n_garbage_threads = 200;
76*3cab2bb3Spatrick     n_iterations = 20000000;
77*3cab2bb3Spatrick   } else if (argc == 4) {
78*3cab2bb3Spatrick     n_threads = atoi(argv[1]);
79*3cab2bb3Spatrick     assert(n_threads > 0 && n_threads <= 32);
80*3cab2bb3Spatrick     n_garbage_threads = atoi(argv[2]);
81*3cab2bb3Spatrick     assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
82*3cab2bb3Spatrick     n_iterations = atoi(argv[3]);
83*3cab2bb3Spatrick   } else {
84*3cab2bb3Spatrick     printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
85*3cab2bb3Spatrick     return 1;
86*3cab2bb3Spatrick   }
87*3cab2bb3Spatrick   printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
88*3cab2bb3Spatrick          __FILE__, n_threads, n_garbage_threads, n_iterations);
89*3cab2bb3Spatrick 
90*3cab2bb3Spatrick   pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
91*3cab2bb3Spatrick   pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
92*3cab2bb3Spatrick 
93*3cab2bb3Spatrick   pthread_t *t = new pthread_t[n_threads];
94*3cab2bb3Spatrick   {
95*3cab2bb3Spatrick     pthread_t *g_t = new pthread_t[n_garbage_threads];
96*3cab2bb3Spatrick     for (int i = 0; i < n_garbage_threads; i++) {
97*3cab2bb3Spatrick       int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
98*3cab2bb3Spatrick       assert(status == 0);
99*3cab2bb3Spatrick     }
100*3cab2bb3Spatrick     for (int i = 0; i < n_threads; i++) {
101*3cab2bb3Spatrick       int status = pthread_create(&t[i], 0, Thread, (void*)i);
102*3cab2bb3Spatrick       assert(status == 0);
103*3cab2bb3Spatrick     }
104*3cab2bb3Spatrick     pthread_barrier_wait(&all_threads_ready);
105*3cab2bb3Spatrick     printf("All threads started! Killing the garbage threads.\n");
106*3cab2bb3Spatrick     for (int i = 0; i < n_garbage_threads; i++) {
107*3cab2bb3Spatrick       pthread_join(g_t[i], 0);
108*3cab2bb3Spatrick     }
109*3cab2bb3Spatrick     delete [] g_t;
110*3cab2bb3Spatrick   }
111*3cab2bb3Spatrick   printf("Resuming the main threads.\n");
112*3cab2bb3Spatrick   pthread_barrier_wait(&main_threads_ready);
113*3cab2bb3Spatrick 
114*3cab2bb3Spatrick 
115*3cab2bb3Spatrick   for (int i = 0; i < n_threads; i++) {
116*3cab2bb3Spatrick     pthread_join(t[i], 0);
117*3cab2bb3Spatrick   }
118*3cab2bb3Spatrick   delete [] t;
119*3cab2bb3Spatrick   return 0;
120*3cab2bb3Spatrick }
121