1 // RUN: %libomp-cxx-compile
2 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
3 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
4 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
5 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
6 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
7 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
8 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
9 //
10 // RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1
11 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
12 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
13 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
14 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
15 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
16 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
17 // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
18
19 // This test stresses the task team mechanism by running a simple
20 // increment task over and over with varying number of threads and nesting.
21 // The test covers nested serial teams and mixing serial teams with
22 // normal active teams.
23
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <omp.h>
28
29 // The number of times to run each test
30 #define NTIMES 5
31
32 // Regular single increment task
task_inc_a(int * a)33 void task_inc_a(int *a) {
34 #pragma omp task
35 {
36 #pragma omp atomic
37 (*a)++;
38 }
39 }
40
41 // Splitting increment task that binary splits the incrementing task
task_inc_split_a(int * a,int low,int high)42 void task_inc_split_a(int *a, int low, int high) {
43 #pragma omp task firstprivate(low, high)
44 {
45 if (low == high) {
46 #pragma omp atomic
47 (*a)++;
48 } else if (low < high) {
49 int mid = (high - low) / 2 + low;
50 task_inc_split_a(a, low, mid);
51 task_inc_split_a(a, mid + 1, high);
52 }
53 }
54 }
55
56 #ifdef USE_HIDDEN_HELPERS
57 // Hidden helper tasks force serial regions to create task teams
task_inc_a_hidden_helper(int * a)58 void task_inc_a_hidden_helper(int *a) {
59 #pragma omp target map(tofrom : a[0]) nowait
60 {
61 #pragma omp atomic
62 (*a)++;
63 }
64 }
65 #else
66 // Detached tasks force serial regions to create task teams
task_inc_a_detached(int * a,omp_event_handle_t handle)67 void task_inc_a_detached(int *a, omp_event_handle_t handle) {
68 #pragma omp task detach(handle)
69 {
70 #pragma omp atomic
71 (*a)++;
72 omp_fulfill_event(handle);
73 }
74 }
75 #endif
76
check_a(int * a,int expected)77 void check_a(int *a, int expected) {
78 if (*a != expected) {
79 fprintf(stderr,
80 "FAIL: a = %d instead of expected = %d. Compile with "
81 "-DVERBOSE for more verbose output.\n",
82 *a, expected);
83 exit(EXIT_FAILURE);
84 }
85 }
86
87 // Every thread creates a single "increment" task
test_tasks(omp_event_handle_t * handles,int expected,int * a)88 void test_tasks(omp_event_handle_t *handles, int expected, int *a) {
89 int tid = omp_get_thread_num();
90
91 task_inc_a(a);
92
93 #pragma omp barrier
94 check_a(a, expected);
95 #pragma omp barrier
96 check_a(a, expected);
97 #pragma omp barrier
98
99 #ifdef USE_HIDDEN_HELPERS
100 task_inc_a_hidden_helper(a);
101 #else
102 task_inc_a_detached(a, handles[tid]);
103 #endif
104
105 #pragma omp barrier
106 check_a(a, 2 * expected);
107 #pragma omp barrier
108 task_inc_a(a);
109 #pragma omp barrier
110 check_a(a, 3 * expected);
111 }
112
113 // Testing single level of parallelism with increment tasks
test_base(int nthreads)114 void test_base(int nthreads) {
115 #ifdef VERBOSE
116 #pragma omp master
117 printf(" test_base(%d)\n", nthreads);
118 #endif
119 int a = 0;
120 omp_event_handle_t *handles;
121 handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
122 #pragma omp parallel num_threads(nthreads) shared(a)
123 { test_tasks(handles, nthreads, &a); }
124 free(handles);
125 }
126
127 // Testing nested parallel with increment tasks
128 // first = nthreads of outer parallel
129 // second = nthreads of nested parallel
test_nest(int first,int second)130 void test_nest(int first, int second) {
131 #ifdef VERBOSE
132 #pragma omp master
133 printf(" test_nest(%d, %d)\n", first, second);
134 #endif
135 #pragma omp parallel num_threads(first)
136 { test_base(second); }
137 }
138
139 // Testing 2-level nested parallels with increment tasks
140 // first = nthreads of outer parallel
141 // second = nthreads of nested parallel
142 // third = nthreads of second nested parallel
test_nest2(int first,int second,int third)143 void test_nest2(int first, int second, int third) {
144 #ifdef VERBOSE
145 #pragma omp master
146 printf(" test_nest2(%d, %d, %d)\n", first, second, third);
147 #endif
148 #pragma omp parallel num_threads(first)
149 { test_nest(second, third); }
150 }
151
152 // Testing 3-level nested parallels with increment tasks
153 // first = nthreads of outer parallel
154 // second = nthreads of nested parallel
155 // third = nthreads of second nested parallel
156 // fourth = nthreads of third nested parallel
test_nest3(int first,int second,int third,int fourth)157 void test_nest3(int first, int second, int third, int fourth) {
158 #ifdef VERBOSE
159 #pragma omp master
160 printf(" test_nest3(%d, %d, %d, %d)\n", first, second, third, fourth);
161 #endif
162 #pragma omp parallel num_threads(first)
163 { test_nest2(second, third, fourth); }
164 }
165
166 // Testing 4-level nested parallels with increment tasks
167 // first = nthreads of outer parallel
168 // second = nthreads of nested parallel
169 // third = nthreads of second nested parallel
170 // fourth = nthreads of third nested parallel
171 // fifth = nthreads of fourth nested parallel
test_nest4(int first,int second,int third,int fourth,int fifth)172 void test_nest4(int first, int second, int third, int fourth, int fifth) {
173 #ifdef VERBOSE
174 #pragma omp master
175 printf("test_nest4(%d, %d, %d, %d, %d)\n", first, second, third, fourth,
176 fifth);
177 #endif
178 #pragma omp parallel num_threads(first)
179 { test_nest3(second, third, fourth, fifth); }
180 }
181
182 // Single thread starts a binary splitting "increment" task
183 // Detached tasks are still single "increment" task
test_tasks_split(omp_event_handle_t * handles,int expected,int * a)184 void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) {
185 int tid = omp_get_thread_num();
186
187 #pragma omp single
188 task_inc_split_a(a, 1, expected); // task team A
189
190 #pragma omp barrier
191 check_a(a, expected);
192 #pragma omp barrier
193 check_a(a, expected);
194 #pragma omp barrier
195
196 #ifdef USE_HIDDEN_HELPERS
197 task_inc_a_hidden_helper(a);
198 #else
199 task_inc_a_detached(a, handles[tid]);
200 #endif
201
202 #pragma omp barrier
203 check_a(a, 2 * expected);
204 #pragma omp barrier
205 #pragma omp single
206 task_inc_split_a(a, 1, expected); // task team B
207 #pragma omp barrier
208 check_a(a, 3 * expected);
209 }
210
211 // Testing single level of parallelism with splitting incrementing tasks
test_base_split(int nthreads)212 void test_base_split(int nthreads) {
213 #ifdef VERBOSE
214 #pragma omp master
215 printf(" test_base_split(%d)\n", nthreads);
216 #endif
217 int a = 0;
218 omp_event_handle_t *handles;
219 handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
220 #pragma omp parallel num_threads(nthreads) shared(a)
221 { test_tasks_split(handles, nthreads, &a); }
222 free(handles);
223 }
224
225 // Testing nested parallels with splitting tasks
226 // first = nthreads of outer parallel
227 // second = nthreads of nested parallel
test_nest_split(int first,int second)228 void test_nest_split(int first, int second) {
229 #ifdef VERBOSE
230 #pragma omp master
231 printf(" test_nest_split(%d, %d)\n", first, second);
232 #endif
233 #pragma omp parallel num_threads(first)
234 { test_base_split(second); }
235 }
236
237 // Testing doubly nested parallels with splitting tasks
238 // first = nthreads of outer parallel
239 // second = nthreads of nested parallel
240 // third = nthreads of second nested parallel
test_nest2_split(int first,int second,int third)241 void test_nest2_split(int first, int second, int third) {
242 #ifdef VERBOSE
243 #pragma omp master
244 printf("test_nest2_split(%d, %d, %d)\n", first, second, third);
245 #endif
246 #pragma omp parallel num_threads(first)
247 { test_nest_split(second, third); }
248 }
249
250 template <typename... Args>
run_ntimes(int n,void (* func)(Args...),Args...args)251 void run_ntimes(int n, void (*func)(Args...), Args... args) {
252 for (int i = 0; i < n; ++i) {
253 func(args...);
254 }
255 }
256
main()257 int main() {
258 omp_set_max_active_levels(5);
259
260 run_ntimes(NTIMES, test_base, 4);
261 run_ntimes(NTIMES, test_base, 1);
262 run_ntimes(NTIMES, test_base, 8);
263 run_ntimes(NTIMES, test_base, 2);
264 run_ntimes(NTIMES, test_base, 6);
265 run_ntimes(NTIMES, test_nest, 1, 1);
266 run_ntimes(NTIMES, test_nest, 1, 5);
267 run_ntimes(NTIMES, test_nest, 2, 6);
268 run_ntimes(NTIMES, test_nest, 1, 1);
269 run_ntimes(NTIMES, test_nest, 4, 3);
270 run_ntimes(NTIMES, test_nest, 3, 2);
271 run_ntimes(NTIMES, test_nest, 1, 1);
272 run_ntimes(NTIMES, test_nest2, 1, 1, 2);
273 run_ntimes(NTIMES, test_nest2, 1, 2, 1);
274 run_ntimes(NTIMES, test_nest2, 2, 2, 1);
275 run_ntimes(NTIMES, test_nest2, 2, 1, 1);
276 run_ntimes(NTIMES, test_nest2, 4, 2, 1);
277 run_ntimes(NTIMES, test_nest2, 4, 2, 2);
278 run_ntimes(NTIMES, test_nest2, 1, 1, 1);
279 run_ntimes(NTIMES, test_nest2, 4, 2, 2);
280 run_ntimes(NTIMES, test_nest3, 1, 1, 1, 1);
281 run_ntimes(NTIMES, test_nest3, 1, 2, 1, 1);
282 run_ntimes(NTIMES, test_nest3, 1, 1, 2, 1);
283 run_ntimes(NTIMES, test_nest3, 1, 1, 1, 2);
284 run_ntimes(NTIMES, test_nest3, 2, 1, 1, 1);
285 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
286 run_ntimes(NTIMES, test_nest4, 2, 1, 1, 1, 1);
287 run_ntimes(NTIMES, test_nest4, 1, 2, 1, 1, 1);
288 run_ntimes(NTIMES, test_nest4, 1, 1, 2, 1, 1);
289 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 2, 1);
290 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 2);
291 run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
292 run_ntimes(NTIMES, test_nest4, 1, 2, 1, 2, 1);
293
294 run_ntimes(NTIMES, test_base_split, 4);
295 run_ntimes(NTIMES, test_base_split, 2);
296
297 run_ntimes(NTIMES, test_base_split, 7);
298
299 run_ntimes(NTIMES, test_base_split, 1);
300 run_ntimes(NTIMES, test_nest_split, 4, 2);
301 run_ntimes(NTIMES, test_nest_split, 2, 1);
302
303 run_ntimes(NTIMES, test_nest_split, 7, 2);
304 run_ntimes(NTIMES, test_nest_split, 1, 1);
305 run_ntimes(NTIMES, test_nest_split, 1, 4);
306
307 run_ntimes(NTIMES, test_nest2_split, 1, 1, 2);
308 run_ntimes(NTIMES, test_nest2_split, 1, 2, 1);
309 run_ntimes(NTIMES, test_nest2_split, 2, 2, 1);
310 run_ntimes(NTIMES, test_nest2_split, 2, 1, 1);
311 run_ntimes(NTIMES, test_nest2_split, 4, 2, 1);
312 run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
313 run_ntimes(NTIMES, test_nest2_split, 1, 1, 1);
314 run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
315
316 printf("PASS\n");
317 return EXIT_SUCCESS;
318 }
319