1*3cab2bb3Spatrick // Mini-benchmark for tsan VTS worst case performance
2*3cab2bb3Spatrick // Idea:
3*3cab2bb3Spatrick // 1) Spawn M + N threads (M >> N)
4*3cab2bb3Spatrick // We'll call the 'M' threads as 'garbage threads'.
5*3cab2bb3Spatrick // 2) Make sure all threads have created thus no TIDs were reused
6*3cab2bb3Spatrick // 3) Join the garbage threads
7*3cab2bb3Spatrick // 4) Do many sync operations on the remaining N threads
8*3cab2bb3Spatrick //
9*3cab2bb3Spatrick // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
10*3cab2bb3Spatrick // when N is large.
11*3cab2bb3Spatrick //
12*3cab2bb3Spatrick // Some numbers:
13*3cab2bb3Spatrick // a) clang++ native O1 with n_iterations=200kk takes
14*3cab2bb3Spatrick // 5s regardless of M
15*3cab2bb3Spatrick // clang++ tsanv2 O1 with n_iterations=20kk takes
16*3cab2bb3Spatrick // 23.5s with M=200
17*3cab2bb3Spatrick // 11.5s with M=1
18*3cab2bb3Spatrick // i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
19*3cab2bb3Spatrick // b) g++ native O1 with n_iterations=200kk takes
20*3cab2bb3Spatrick // 5.5s regardless of M
21*3cab2bb3Spatrick // g++ tsanv1 O1 with n_iterations=2kk takes
22*3cab2bb3Spatrick // 39.5s with M=200
23*3cab2bb3Spatrick // 20.5s with M=1
24*3cab2bb3Spatrick // i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
25*3cab2bb3Spatrick
26*3cab2bb3Spatrick #include <assert.h>
27*3cab2bb3Spatrick #include <pthread.h>
28*3cab2bb3Spatrick #include <stdio.h>
29*3cab2bb3Spatrick #include <stdlib.h>
30*3cab2bb3Spatrick
31*3cab2bb3Spatrick class __attribute__((aligned(64))) Mutex {
32*3cab2bb3Spatrick public:
Mutex()33*3cab2bb3Spatrick Mutex() { pthread_mutex_init(&m_, NULL); }
~Mutex()34*3cab2bb3Spatrick ~Mutex() { pthread_mutex_destroy(&m_); }
Lock()35*3cab2bb3Spatrick void Lock() { pthread_mutex_lock(&m_); }
Unlock()36*3cab2bb3Spatrick void Unlock() { pthread_mutex_unlock(&m_); }
37*3cab2bb3Spatrick
38*3cab2bb3Spatrick private:
39*3cab2bb3Spatrick pthread_mutex_t m_;
40*3cab2bb3Spatrick };
41*3cab2bb3Spatrick
42*3cab2bb3Spatrick const int kNumMutexes = 1024;
43*3cab2bb3Spatrick Mutex mutexes[kNumMutexes];
44*3cab2bb3Spatrick
45*3cab2bb3Spatrick int n_threads, n_iterations;
46*3cab2bb3Spatrick
47*3cab2bb3Spatrick pthread_barrier_t all_threads_ready, main_threads_ready;
48*3cab2bb3Spatrick
GarbageThread(void * unused)49*3cab2bb3Spatrick void* GarbageThread(void *unused) {
50*3cab2bb3Spatrick pthread_barrier_wait(&all_threads_ready);
51*3cab2bb3Spatrick return 0;
52*3cab2bb3Spatrick }
53*3cab2bb3Spatrick
Thread(void * arg)54*3cab2bb3Spatrick void *Thread(void *arg) {
55*3cab2bb3Spatrick long idx = (long)arg;
56*3cab2bb3Spatrick pthread_barrier_wait(&all_threads_ready);
57*3cab2bb3Spatrick
58*3cab2bb3Spatrick // Wait for the main thread to join the garbage threads.
59*3cab2bb3Spatrick pthread_barrier_wait(&main_threads_ready);
60*3cab2bb3Spatrick
61*3cab2bb3Spatrick printf("Thread %ld go!\n", idx);
62*3cab2bb3Spatrick int offset = idx * kNumMutexes / n_threads;
63*3cab2bb3Spatrick for (int i = 0; i < n_iterations; i++) {
64*3cab2bb3Spatrick mutexes[(offset + i) % kNumMutexes].Lock();
65*3cab2bb3Spatrick mutexes[(offset + i) % kNumMutexes].Unlock();
66*3cab2bb3Spatrick }
67*3cab2bb3Spatrick printf("Thread %ld done\n", idx);
68*3cab2bb3Spatrick return 0;
69*3cab2bb3Spatrick }
70*3cab2bb3Spatrick
main(int argc,char ** argv)71*3cab2bb3Spatrick int main(int argc, char **argv) {
72*3cab2bb3Spatrick int n_garbage_threads;
73*3cab2bb3Spatrick if (argc == 1) {
74*3cab2bb3Spatrick n_threads = 2;
75*3cab2bb3Spatrick n_garbage_threads = 200;
76*3cab2bb3Spatrick n_iterations = 20000000;
77*3cab2bb3Spatrick } else if (argc == 4) {
78*3cab2bb3Spatrick n_threads = atoi(argv[1]);
79*3cab2bb3Spatrick assert(n_threads > 0 && n_threads <= 32);
80*3cab2bb3Spatrick n_garbage_threads = atoi(argv[2]);
81*3cab2bb3Spatrick assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
82*3cab2bb3Spatrick n_iterations = atoi(argv[3]);
83*3cab2bb3Spatrick } else {
84*3cab2bb3Spatrick printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
85*3cab2bb3Spatrick return 1;
86*3cab2bb3Spatrick }
87*3cab2bb3Spatrick printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
88*3cab2bb3Spatrick __FILE__, n_threads, n_garbage_threads, n_iterations);
89*3cab2bb3Spatrick
90*3cab2bb3Spatrick pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
91*3cab2bb3Spatrick pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
92*3cab2bb3Spatrick
93*3cab2bb3Spatrick pthread_t *t = new pthread_t[n_threads];
94*3cab2bb3Spatrick {
95*3cab2bb3Spatrick pthread_t *g_t = new pthread_t[n_garbage_threads];
96*3cab2bb3Spatrick for (int i = 0; i < n_garbage_threads; i++) {
97*3cab2bb3Spatrick int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
98*3cab2bb3Spatrick assert(status == 0);
99*3cab2bb3Spatrick }
100*3cab2bb3Spatrick for (int i = 0; i < n_threads; i++) {
101*3cab2bb3Spatrick int status = pthread_create(&t[i], 0, Thread, (void*)i);
102*3cab2bb3Spatrick assert(status == 0);
103*3cab2bb3Spatrick }
104*3cab2bb3Spatrick pthread_barrier_wait(&all_threads_ready);
105*3cab2bb3Spatrick printf("All threads started! Killing the garbage threads.\n");
106*3cab2bb3Spatrick for (int i = 0; i < n_garbage_threads; i++) {
107*3cab2bb3Spatrick pthread_join(g_t[i], 0);
108*3cab2bb3Spatrick }
109*3cab2bb3Spatrick delete [] g_t;
110*3cab2bb3Spatrick }
111*3cab2bb3Spatrick printf("Resuming the main threads.\n");
112*3cab2bb3Spatrick pthread_barrier_wait(&main_threads_ready);
113*3cab2bb3Spatrick
114*3cab2bb3Spatrick
115*3cab2bb3Spatrick for (int i = 0; i < n_threads; i++) {
116*3cab2bb3Spatrick pthread_join(t[i], 0);
117*3cab2bb3Spatrick }
118*3cab2bb3Spatrick delete [] t;
119*3cab2bb3Spatrick return 0;
120*3cab2bb3Spatrick }
121