xref: /llvm-project/openmp/runtime/test/tasking/task_teams_stress_test.cpp (revision 41ca9104ac1e0bf248d4082f45c5ad03ddd55727)
1 // RUN: %libomp-cxx-compile
2 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
3 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
4 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
5 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
6 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
7 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
8 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
9 //
10 // RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1
11 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
12 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
13 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
14 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
15 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
16 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
17 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
18 
19 // This test stresses the task team mechanism by running a simple
20 // increment task over and over with varying number of threads and nesting.
21 // The test covers nested serial teams and mixing serial teams with
22 // normal active teams.
23 
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <omp.h>
28 
29 // The number of times to run each test
30 #define NTIMES 5
31 
32 // Regular single increment task
task_inc_a(int * a)33 void task_inc_a(int *a) {
34 #pragma omp task
35   {
36 #pragma omp atomic
37     (*a)++;
38   }
39 }
40 
41 // Splitting increment task that binary splits the incrementing task
task_inc_split_a(int * a,int low,int high)42 void task_inc_split_a(int *a, int low, int high) {
43 #pragma omp task firstprivate(low, high)
44   {
45     if (low == high) {
46 #pragma omp atomic
47       (*a)++;
48     } else if (low < high) {
49       int mid = (high - low) / 2 + low;
50       task_inc_split_a(a, low, mid);
51       task_inc_split_a(a, mid + 1, high);
52     }
53   }
54 }
55 
56 #ifdef USE_HIDDEN_HELPERS
57 // Hidden helper tasks force serial regions to create task teams
task_inc_a_hidden_helper(int * a)58 void task_inc_a_hidden_helper(int *a) {
59 #pragma omp target map(tofrom : a[0]) nowait
60   {
61 #pragma omp atomic
62     (*a)++;
63   }
64 }
65 #else
66 // Detached tasks force serial regions to create task teams
task_inc_a_detached(int * a,omp_event_handle_t handle)67 void task_inc_a_detached(int *a, omp_event_handle_t handle) {
68 #pragma omp task detach(handle)
69   {
70 #pragma omp atomic
71     (*a)++;
72     omp_fulfill_event(handle);
73   }
74 }
75 #endif
76 
check_a(int * a,int expected)77 void check_a(int *a, int expected) {
78   if (*a != expected) {
79     fprintf(stderr,
80             "FAIL: a = %d instead of expected = %d. Compile with "
81             "-DVERBOSE for more verbose output.\n",
82             *a, expected);
83     exit(EXIT_FAILURE);
84   }
85 }
86 
87 // Every thread creates a single "increment" task
test_tasks(omp_event_handle_t * handles,int expected,int * a)88 void test_tasks(omp_event_handle_t *handles, int expected, int *a) {
89   int tid = omp_get_thread_num();
90 
91   task_inc_a(a);
92 
93 #pragma omp barrier
94   check_a(a, expected);
95 #pragma omp barrier
96   check_a(a, expected);
97 #pragma omp barrier
98 
99 #ifdef USE_HIDDEN_HELPERS
100   task_inc_a_hidden_helper(a);
101 #else
102   task_inc_a_detached(a, handles[tid]);
103 #endif
104 
105 #pragma omp barrier
106   check_a(a, 2 * expected);
107 #pragma omp barrier
108   task_inc_a(a);
109 #pragma omp barrier
110   check_a(a, 3 * expected);
111 }
112 
113 // Testing single level of parallelism with increment tasks
test_base(int nthreads)114 void test_base(int nthreads) {
115 #ifdef VERBOSE
116 #pragma omp master
117   printf("    test_base(%d)\n", nthreads);
118 #endif
119   int a = 0;
120   omp_event_handle_t *handles;
121   handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
122 #pragma omp parallel num_threads(nthreads) shared(a)
123   { test_tasks(handles, nthreads, &a); }
124   free(handles);
125 }
126 
127 // Testing nested parallel with increment tasks
128 // first = nthreads of outer parallel
129 // second = nthreads of nested parallel
test_nest(int first,int second)130 void test_nest(int first, int second) {
131 #ifdef VERBOSE
132 #pragma omp master
133   printf("   test_nest(%d, %d)\n", first, second);
134 #endif
135 #pragma omp parallel num_threads(first)
136   { test_base(second); }
137 }
138 
139 // Testing 2-level nested parallels with increment tasks
140 // first = nthreads of outer parallel
141 // second = nthreads of nested parallel
142 // third = nthreads of second nested parallel
test_nest2(int first,int second,int third)143 void test_nest2(int first, int second, int third) {
144 #ifdef VERBOSE
145 #pragma omp master
146   printf("  test_nest2(%d, %d, %d)\n", first, second, third);
147 #endif
148 #pragma omp parallel num_threads(first)
149   { test_nest(second, third); }
150 }
151 
152 // Testing 3-level nested parallels with increment tasks
153 // first = nthreads of outer parallel
154 // second = nthreads of nested parallel
155 // third = nthreads of second nested parallel
156 // fourth = nthreads of third nested parallel
test_nest3(int first,int second,int third,int fourth)157 void test_nest3(int first, int second, int third, int fourth) {
158 #ifdef VERBOSE
159 #pragma omp master
160   printf(" test_nest3(%d, %d, %d, %d)\n", first, second, third, fourth);
161 #endif
162 #pragma omp parallel num_threads(first)
163   { test_nest2(second, third, fourth); }
164 }
165 
166 // Testing 4-level nested parallels with increment tasks
167 // first = nthreads of outer parallel
168 // second = nthreads of nested parallel
169 // third = nthreads of second nested parallel
170 // fourth = nthreads of third nested parallel
171 // fifth = nthreads of fourth nested parallel
test_nest4(int first,int second,int third,int fourth,int fifth)172 void test_nest4(int first, int second, int third, int fourth, int fifth) {
173 #ifdef VERBOSE
174 #pragma omp master
175   printf("test_nest4(%d, %d, %d, %d, %d)\n", first, second, third, fourth,
176          fifth);
177 #endif
178 #pragma omp parallel num_threads(first)
179   { test_nest3(second, third, fourth, fifth); }
180 }
181 
182 // Single thread starts a binary splitting "increment" task
183 // Detached tasks are still single "increment" task
test_tasks_split(omp_event_handle_t * handles,int expected,int * a)184 void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) {
185   int tid = omp_get_thread_num();
186 
187 #pragma omp single
188   task_inc_split_a(a, 1, expected); // task team A
189 
190 #pragma omp barrier
191   check_a(a, expected);
192 #pragma omp barrier
193   check_a(a, expected);
194 #pragma omp barrier
195 
196 #ifdef USE_HIDDEN_HELPERS
197   task_inc_a_hidden_helper(a);
198 #else
199   task_inc_a_detached(a, handles[tid]);
200 #endif
201 
202 #pragma omp barrier
203   check_a(a, 2 * expected);
204 #pragma omp barrier
205 #pragma omp single
206   task_inc_split_a(a, 1, expected); // task team B
207 #pragma omp barrier
208   check_a(a, 3 * expected);
209 }
210 
211 // Testing single level of parallelism with splitting incrementing tasks
test_base_split(int nthreads)212 void test_base_split(int nthreads) {
213 #ifdef VERBOSE
214 #pragma omp master
215   printf("  test_base_split(%d)\n", nthreads);
216 #endif
217   int a = 0;
218   omp_event_handle_t *handles;
219   handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
220 #pragma omp parallel num_threads(nthreads) shared(a)
221   { test_tasks_split(handles, nthreads, &a); }
222   free(handles);
223 }
224 
225 // Testing nested parallels with splitting tasks
226 // first = nthreads of outer parallel
227 // second = nthreads of nested parallel
test_nest_split(int first,int second)228 void test_nest_split(int first, int second) {
229 #ifdef VERBOSE
230 #pragma omp master
231   printf(" test_nest_split(%d, %d)\n", first, second);
232 #endif
233 #pragma omp parallel num_threads(first)
234   { test_base_split(second); }
235 }
236 
237 // Testing doubly nested parallels with splitting tasks
238 // first = nthreads of outer parallel
239 // second = nthreads of nested parallel
240 // third = nthreads of second nested parallel
test_nest2_split(int first,int second,int third)241 void test_nest2_split(int first, int second, int third) {
242 #ifdef VERBOSE
243 #pragma omp master
244   printf("test_nest2_split(%d, %d, %d)\n", first, second, third);
245 #endif
246 #pragma omp parallel num_threads(first)
247   { test_nest_split(second, third); }
248 }
249 
250 template <typename... Args>
run_ntimes(int n,void (* func)(Args...),Args...args)251 void run_ntimes(int n, void (*func)(Args...), Args... args) {
252   for (int i = 0; i < n; ++i) {
253     func(args...);
254   }
255 }
256 
main()257 int main() {
258   omp_set_max_active_levels(5);
259 
260   run_ntimes(NTIMES, test_base, 4);
261   run_ntimes(NTIMES, test_base, 1);
262   run_ntimes(NTIMES, test_base, 8);
263   run_ntimes(NTIMES, test_base, 2);
264   run_ntimes(NTIMES, test_base, 6);
265   run_ntimes(NTIMES, test_nest, 1, 1);
266   run_ntimes(NTIMES, test_nest, 1, 5);
267   run_ntimes(NTIMES, test_nest, 2, 6);
268   run_ntimes(NTIMES, test_nest, 1, 1);
269   run_ntimes(NTIMES, test_nest, 4, 3);
270   run_ntimes(NTIMES, test_nest, 3, 2);
271   run_ntimes(NTIMES, test_nest, 1, 1);
272   run_ntimes(NTIMES, test_nest2, 1, 1, 2);
273   run_ntimes(NTIMES, test_nest2, 1, 2, 1);
274   run_ntimes(NTIMES, test_nest2, 2, 2, 1);
275   run_ntimes(NTIMES, test_nest2, 2, 1, 1);
276   run_ntimes(NTIMES, test_nest2, 4, 2, 1);
277   run_ntimes(NTIMES, test_nest2, 4, 2, 2);
278   run_ntimes(NTIMES, test_nest2, 1, 1, 1);
279   run_ntimes(NTIMES, test_nest2, 4, 2, 2);
280   run_ntimes(NTIMES, test_nest3, 1, 1, 1, 1);
281   run_ntimes(NTIMES, test_nest3, 1, 2, 1, 1);
282   run_ntimes(NTIMES, test_nest3, 1, 1, 2, 1);
283   run_ntimes(NTIMES, test_nest3, 1, 1, 1, 2);
284   run_ntimes(NTIMES, test_nest3, 2, 1, 1, 1);
285   run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
286   run_ntimes(NTIMES, test_nest4, 2, 1, 1, 1, 1);
287   run_ntimes(NTIMES, test_nest4, 1, 2, 1, 1, 1);
288   run_ntimes(NTIMES, test_nest4, 1, 1, 2, 1, 1);
289   run_ntimes(NTIMES, test_nest4, 1, 1, 1, 2, 1);
290   run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 2);
291   run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
292   run_ntimes(NTIMES, test_nest4, 1, 2, 1, 2, 1);
293 
294   run_ntimes(NTIMES, test_base_split, 4);
295   run_ntimes(NTIMES, test_base_split, 2);
296 
297   run_ntimes(NTIMES, test_base_split, 7);
298 
299   run_ntimes(NTIMES, test_base_split, 1);
300   run_ntimes(NTIMES, test_nest_split, 4, 2);
301   run_ntimes(NTIMES, test_nest_split, 2, 1);
302 
303   run_ntimes(NTIMES, test_nest_split, 7, 2);
304   run_ntimes(NTIMES, test_nest_split, 1, 1);
305   run_ntimes(NTIMES, test_nest_split, 1, 4);
306 
307   run_ntimes(NTIMES, test_nest2_split, 1, 1, 2);
308   run_ntimes(NTIMES, test_nest2_split, 1, 2, 1);
309   run_ntimes(NTIMES, test_nest2_split, 2, 2, 1);
310   run_ntimes(NTIMES, test_nest2_split, 2, 1, 1);
311   run_ntimes(NTIMES, test_nest2_split, 4, 2, 1);
312   run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
313   run_ntimes(NTIMES, test_nest2_split, 1, 1, 1);
314   run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
315 
316   printf("PASS\n");
317   return EXIT_SUCCESS;
318 }
319