xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/tsan/benchmarks/vts_many_threads_bench.cpp (revision 5b27928474e6a4103d65b347544705c40c9618fd)
1*68d75effSDimitry Andric // Mini-benchmark for tsan VTS worst case performance
2*68d75effSDimitry Andric // Idea:
3*68d75effSDimitry Andric // 1) Spawn M + N threads (M >> N)
4*68d75effSDimitry Andric //    We'll call the 'M' threads as 'garbage threads'.
5*68d75effSDimitry Andric // 2) Make sure all threads have created thus no TIDs were reused
6*68d75effSDimitry Andric // 3) Join the garbage threads
7*68d75effSDimitry Andric // 4) Do many sync operations on the remaining N threads
8*68d75effSDimitry Andric //
9*68d75effSDimitry Andric // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
10*68d75effSDimitry Andric // when N is large.
11*68d75effSDimitry Andric //
12*68d75effSDimitry Andric // Some numbers:
13*68d75effSDimitry Andric // a) clang++ native O1 with n_iterations=200kk takes
14*68d75effSDimitry Andric //      5s regardless of M
15*68d75effSDimitry Andric //    clang++ tsanv2 O1 with n_iterations=20kk takes
16*68d75effSDimitry Andric //      23.5s with M=200
17*68d75effSDimitry Andric //      11.5s with M=1
18*68d75effSDimitry Andric //    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
19*68d75effSDimitry Andric // b) g++ native O1 with n_iterations=200kk takes
20*68d75effSDimitry Andric //      5.5s regardless of M
21*68d75effSDimitry Andric //    g++ tsanv1 O1 with n_iterations=2kk takes
22*68d75effSDimitry Andric //      39.5s with M=200
23*68d75effSDimitry Andric //      20.5s with M=1
24*68d75effSDimitry Andric //    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
25*68d75effSDimitry Andric 
26*68d75effSDimitry Andric #include <assert.h>
27*68d75effSDimitry Andric #include <pthread.h>
28*68d75effSDimitry Andric #include <stdio.h>
29*68d75effSDimitry Andric #include <stdlib.h>
30*68d75effSDimitry Andric 
31*68d75effSDimitry Andric class __attribute__((aligned(64))) Mutex {
32*68d75effSDimitry Andric  public:
Mutex()33*68d75effSDimitry Andric   Mutex()  { pthread_mutex_init(&m_, NULL); }
~Mutex()34*68d75effSDimitry Andric   ~Mutex() { pthread_mutex_destroy(&m_); }
Lock()35*68d75effSDimitry Andric   void Lock() { pthread_mutex_lock(&m_); }
Unlock()36*68d75effSDimitry Andric   void Unlock() { pthread_mutex_unlock(&m_); }
37*68d75effSDimitry Andric 
38*68d75effSDimitry Andric  private:
39*68d75effSDimitry Andric   pthread_mutex_t m_;
40*68d75effSDimitry Andric };
41*68d75effSDimitry Andric 
42*68d75effSDimitry Andric const int kNumMutexes = 1024;
43*68d75effSDimitry Andric Mutex mutexes[kNumMutexes];
44*68d75effSDimitry Andric 
45*68d75effSDimitry Andric int n_threads, n_iterations;
46*68d75effSDimitry Andric 
47*68d75effSDimitry Andric pthread_barrier_t all_threads_ready, main_threads_ready;
48*68d75effSDimitry Andric 
GarbageThread(void * unused)49*68d75effSDimitry Andric void* GarbageThread(void *unused) {
50*68d75effSDimitry Andric   pthread_barrier_wait(&all_threads_ready);
51*68d75effSDimitry Andric   return 0;
52*68d75effSDimitry Andric }
53*68d75effSDimitry Andric 
Thread(void * arg)54*68d75effSDimitry Andric void *Thread(void *arg) {
55*68d75effSDimitry Andric   long idx = (long)arg;
56*68d75effSDimitry Andric   pthread_barrier_wait(&all_threads_ready);
57*68d75effSDimitry Andric 
58*68d75effSDimitry Andric   // Wait for the main thread to join the garbage threads.
59*68d75effSDimitry Andric   pthread_barrier_wait(&main_threads_ready);
60*68d75effSDimitry Andric 
61*68d75effSDimitry Andric   printf("Thread %ld go!\n", idx);
62*68d75effSDimitry Andric   int offset = idx * kNumMutexes / n_threads;
63*68d75effSDimitry Andric   for (int i = 0; i < n_iterations; i++) {
64*68d75effSDimitry Andric     mutexes[(offset + i) % kNumMutexes].Lock();
65*68d75effSDimitry Andric     mutexes[(offset + i) % kNumMutexes].Unlock();
66*68d75effSDimitry Andric   }
67*68d75effSDimitry Andric   printf("Thread %ld done\n", idx);
68*68d75effSDimitry Andric   return 0;
69*68d75effSDimitry Andric }
70*68d75effSDimitry Andric 
main(int argc,char ** argv)71*68d75effSDimitry Andric int main(int argc, char **argv) {
72*68d75effSDimitry Andric   int n_garbage_threads;
73*68d75effSDimitry Andric   if (argc == 1) {
74*68d75effSDimitry Andric     n_threads = 2;
75*68d75effSDimitry Andric     n_garbage_threads = 200;
76*68d75effSDimitry Andric     n_iterations = 20000000;
77*68d75effSDimitry Andric   } else if (argc == 4) {
78*68d75effSDimitry Andric     n_threads = atoi(argv[1]);
79*68d75effSDimitry Andric     assert(n_threads > 0 && n_threads <= 32);
80*68d75effSDimitry Andric     n_garbage_threads = atoi(argv[2]);
81*68d75effSDimitry Andric     assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
82*68d75effSDimitry Andric     n_iterations = atoi(argv[3]);
83*68d75effSDimitry Andric   } else {
84*68d75effSDimitry Andric     printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
85*68d75effSDimitry Andric     return 1;
86*68d75effSDimitry Andric   }
87*68d75effSDimitry Andric   printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
88*68d75effSDimitry Andric          __FILE__, n_threads, n_garbage_threads, n_iterations);
89*68d75effSDimitry Andric 
90*68d75effSDimitry Andric   pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
91*68d75effSDimitry Andric   pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
92*68d75effSDimitry Andric 
93*68d75effSDimitry Andric   pthread_t *t = new pthread_t[n_threads];
94*68d75effSDimitry Andric   {
95*68d75effSDimitry Andric     pthread_t *g_t = new pthread_t[n_garbage_threads];
96*68d75effSDimitry Andric     for (int i = 0; i < n_garbage_threads; i++) {
97*68d75effSDimitry Andric       int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
98*68d75effSDimitry Andric       assert(status == 0);
99*68d75effSDimitry Andric     }
100*68d75effSDimitry Andric     for (int i = 0; i < n_threads; i++) {
101*68d75effSDimitry Andric       int status = pthread_create(&t[i], 0, Thread, (void*)i);
102*68d75effSDimitry Andric       assert(status == 0);
103*68d75effSDimitry Andric     }
104*68d75effSDimitry Andric     pthread_barrier_wait(&all_threads_ready);
105*68d75effSDimitry Andric     printf("All threads started! Killing the garbage threads.\n");
106*68d75effSDimitry Andric     for (int i = 0; i < n_garbage_threads; i++) {
107*68d75effSDimitry Andric       pthread_join(g_t[i], 0);
108*68d75effSDimitry Andric     }
109*68d75effSDimitry Andric     delete [] g_t;
110*68d75effSDimitry Andric   }
111*68d75effSDimitry Andric   printf("Resuming the main threads.\n");
112*68d75effSDimitry Andric   pthread_barrier_wait(&main_threads_ready);
113*68d75effSDimitry Andric 
114*68d75effSDimitry Andric 
115*68d75effSDimitry Andric   for (int i = 0; i < n_threads; i++) {
116*68d75effSDimitry Andric     pthread_join(t[i], 0);
117*68d75effSDimitry Andric   }
118*68d75effSDimitry Andric   delete [] t;
119*68d75effSDimitry Andric   return 0;
120*68d75effSDimitry Andric }
121