1*d40108e0SAndreyChurbanov // RUN: %libomp-compile-and-run
2*d40108e0SAndreyChurbanov // The runtime currently does not get dependency information from GCC.
3*d40108e0SAndreyChurbanov // UNSUPPORTED: gcc
4*d40108e0SAndreyChurbanov
5*d40108e0SAndreyChurbanov // Tests OMP 5.x task dependence "omp_all_memory",
6*d40108e0SAndreyChurbanov // emulates compiler codegen versions for new dep kind
7*d40108e0SAndreyChurbanov //
8*d40108e0SAndreyChurbanov // Task tree created:
9*d40108e0SAndreyChurbanov // task0 - task1 (in: i1, i2)
10*d40108e0SAndreyChurbanov // \
11*d40108e0SAndreyChurbanov // task2 (inoutset: i2), (in: i1)
12*d40108e0SAndreyChurbanov // /
13*d40108e0SAndreyChurbanov // task3 (omp_all_memory) via flag=0x80
14*d40108e0SAndreyChurbanov // /
15*d40108e0SAndreyChurbanov // task4 - task5 (in: i1, i2)
16*d40108e0SAndreyChurbanov // /
17*d40108e0SAndreyChurbanov // task6 (omp_all_memory) via addr=-1
18*d40108e0SAndreyChurbanov // /
19*d40108e0SAndreyChurbanov // task7 (omp_all_memory) via flag=0x80
20*d40108e0SAndreyChurbanov // /
21*d40108e0SAndreyChurbanov // task8 (in: i3)
22*d40108e0SAndreyChurbanov //
23*d40108e0SAndreyChurbanov #include <stdio.h>
24*d40108e0SAndreyChurbanov #include <omp.h>
25*d40108e0SAndreyChurbanov
26*d40108e0SAndreyChurbanov #ifdef _WIN32
27*d40108e0SAndreyChurbanov #include <windows.h>
28*d40108e0SAndreyChurbanov #define mysleep(n) Sleep(n)
29*d40108e0SAndreyChurbanov #else
30*d40108e0SAndreyChurbanov #include <unistd.h>
31*d40108e0SAndreyChurbanov #define mysleep(n) usleep((n)*1000)
32*d40108e0SAndreyChurbanov #endif
33*d40108e0SAndreyChurbanov
34*d40108e0SAndreyChurbanov // to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
35*d40108e0SAndreyChurbanov static int checker = 0;
36*d40108e0SAndreyChurbanov static int err = 0;
37*d40108e0SAndreyChurbanov #ifndef DELAY
38*d40108e0SAndreyChurbanov #define DELAY 100
39*d40108e0SAndreyChurbanov #endif
40*d40108e0SAndreyChurbanov
41*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
42*d40108e0SAndreyChurbanov // internal data to emulate compiler codegen
43*d40108e0SAndreyChurbanov typedef struct DEP {
44*d40108e0SAndreyChurbanov size_t addr;
45*d40108e0SAndreyChurbanov size_t len;
46*d40108e0SAndreyChurbanov unsigned char flags;
47*d40108e0SAndreyChurbanov } dep;
48*d40108e0SAndreyChurbanov #define DEP_ALL_MEM 0x80
49*d40108e0SAndreyChurbanov typedef struct task {
50*d40108e0SAndreyChurbanov void** shareds;
51*d40108e0SAndreyChurbanov void* entry;
52*d40108e0SAndreyChurbanov int part_id;
53*d40108e0SAndreyChurbanov void* destr_thunk;
54*d40108e0SAndreyChurbanov int priority;
55*d40108e0SAndreyChurbanov long long device_id;
56*d40108e0SAndreyChurbanov int f_priv;
57*d40108e0SAndreyChurbanov } task_t;
58*d40108e0SAndreyChurbanov #define TIED 1
59*d40108e0SAndreyChurbanov typedef int(*entry_t)(int, task_t*);
60*d40108e0SAndreyChurbanov typedef struct ID {
61*d40108e0SAndreyChurbanov int reserved_1;
62*d40108e0SAndreyChurbanov int flags;
63*d40108e0SAndreyChurbanov int reserved_2;
64*d40108e0SAndreyChurbanov int reserved_3;
65*d40108e0SAndreyChurbanov char *psource;
66*d40108e0SAndreyChurbanov } id;
67*d40108e0SAndreyChurbanov // thunk routine for tasks with ALL dependency
thunk_m(int gtid,task_t * ptask)68*d40108e0SAndreyChurbanov int thunk_m(int gtid, task_t* ptask) {
69*d40108e0SAndreyChurbanov int lcheck, th;
70*d40108e0SAndreyChurbanov #pragma omp atomic capture
71*d40108e0SAndreyChurbanov lcheck = ++checker;
72*d40108e0SAndreyChurbanov th = omp_get_thread_num();
73*d40108e0SAndreyChurbanov printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
74*d40108e0SAndreyChurbanov if (lcheck != 1) { // no more than 1 task at a time
75*d40108e0SAndreyChurbanov err++;
76*d40108e0SAndreyChurbanov printf("Error m1, checker %d != 1\n", lcheck);
77*d40108e0SAndreyChurbanov }
78*d40108e0SAndreyChurbanov mysleep(DELAY);
79*d40108e0SAndreyChurbanov #pragma omp atomic read
80*d40108e0SAndreyChurbanov lcheck = checker; // must still be equal to 1
81*d40108e0SAndreyChurbanov if (lcheck != 1) {
82*d40108e0SAndreyChurbanov err++;
83*d40108e0SAndreyChurbanov printf("Error m2, checker %d != 1\n", lcheck);
84*d40108e0SAndreyChurbanov }
85*d40108e0SAndreyChurbanov #pragma omp atomic
86*d40108e0SAndreyChurbanov --checker;
87*d40108e0SAndreyChurbanov return 0;
88*d40108e0SAndreyChurbanov }
89*d40108e0SAndreyChurbanov // thunk routine for tasks with inoutset dependency
thunk_s(int gtid,task_t * ptask)90*d40108e0SAndreyChurbanov int thunk_s(int gtid, task_t* ptask) {
91*d40108e0SAndreyChurbanov int lcheck, th;
92*d40108e0SAndreyChurbanov #pragma omp atomic capture
93*d40108e0SAndreyChurbanov lcheck = ++checker; // 1
94*d40108e0SAndreyChurbanov th = omp_get_thread_num();
95*d40108e0SAndreyChurbanov printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
96*d40108e0SAndreyChurbanov if (lcheck != 1) { // no more than 1 task at a time
97*d40108e0SAndreyChurbanov err++;
98*d40108e0SAndreyChurbanov printf("Error s1, checker %d != 1\n", lcheck);
99*d40108e0SAndreyChurbanov }
100*d40108e0SAndreyChurbanov mysleep(DELAY);
101*d40108e0SAndreyChurbanov #pragma omp atomic read
102*d40108e0SAndreyChurbanov lcheck = checker; // must still be equal to 1
103*d40108e0SAndreyChurbanov if (lcheck != 1) {
104*d40108e0SAndreyChurbanov err++;
105*d40108e0SAndreyChurbanov printf("Error s2, checker %d != 1\n", lcheck);
106*d40108e0SAndreyChurbanov }
107*d40108e0SAndreyChurbanov #pragma omp atomic
108*d40108e0SAndreyChurbanov --checker;
109*d40108e0SAndreyChurbanov return 0;
110*d40108e0SAndreyChurbanov }
111*d40108e0SAndreyChurbanov
112*d40108e0SAndreyChurbanov #ifdef __cplusplus
113*d40108e0SAndreyChurbanov extern "C" {
114*d40108e0SAndreyChurbanov #endif
115*d40108e0SAndreyChurbanov int __kmpc_global_thread_num(id*);
116*d40108e0SAndreyChurbanov task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
117*d40108e0SAndreyChurbanov size_t sz, size_t shar, entry_t rtn);
118*d40108e0SAndreyChurbanov int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
119*d40108e0SAndreyChurbanov dep *dep_lst, int nd_noalias, dep *noalias_lst);
120*d40108e0SAndreyChurbanov static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
121*d40108e0SAndreyChurbanov #ifdef __cplusplus
122*d40108e0SAndreyChurbanov } // extern "C"
123*d40108e0SAndreyChurbanov #endif
124*d40108e0SAndreyChurbanov // End of internal data
125*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
126*d40108e0SAndreyChurbanov
main()127*d40108e0SAndreyChurbanov int main()
128*d40108e0SAndreyChurbanov {
129*d40108e0SAndreyChurbanov int i1,i2,i3;
130*d40108e0SAndreyChurbanov omp_set_num_threads(8);
131*d40108e0SAndreyChurbanov omp_set_dynamic(0);
132*d40108e0SAndreyChurbanov #pragma omp parallel
133*d40108e0SAndreyChurbanov {
134*d40108e0SAndreyChurbanov #pragma omp single nowait
135*d40108e0SAndreyChurbanov {
136*d40108e0SAndreyChurbanov dep sdep[2];
137*d40108e0SAndreyChurbanov task_t *ptr;
138*d40108e0SAndreyChurbanov int gtid = __kmpc_global_thread_num(&loc);
139*d40108e0SAndreyChurbanov int t = omp_get_thread_num();
140*d40108e0SAndreyChurbanov #pragma omp task depend(in: i1, i2)
141*d40108e0SAndreyChurbanov { // task 0
142*d40108e0SAndreyChurbanov int lcheck, th;
143*d40108e0SAndreyChurbanov #pragma omp atomic capture
144*d40108e0SAndreyChurbanov lcheck = ++checker; // 1 or 2
145*d40108e0SAndreyChurbanov th = omp_get_thread_num();
146*d40108e0SAndreyChurbanov printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
147*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
148*d40108e0SAndreyChurbanov err++; // no more than 2 tasks concurrently
149*d40108e0SAndreyChurbanov printf("Error1, checker %d, not 1 or 2\n", lcheck);
150*d40108e0SAndreyChurbanov }
151*d40108e0SAndreyChurbanov mysleep(DELAY);
152*d40108e0SAndreyChurbanov #pragma omp atomic read
153*d40108e0SAndreyChurbanov lcheck = checker; // 1 or 2
154*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
155*d40108e0SAndreyChurbanov #pragma omp atomic
156*d40108e0SAndreyChurbanov err++;
157*d40108e0SAndreyChurbanov printf("Error2, checker %d, not 1 or 2\n", lcheck);
158*d40108e0SAndreyChurbanov }
159*d40108e0SAndreyChurbanov #pragma omp atomic
160*d40108e0SAndreyChurbanov --checker;
161*d40108e0SAndreyChurbanov }
162*d40108e0SAndreyChurbanov #pragma omp task depend(in: i1, i2)
163*d40108e0SAndreyChurbanov { // task 1
164*d40108e0SAndreyChurbanov int lcheck, th;
165*d40108e0SAndreyChurbanov #pragma omp atomic capture
166*d40108e0SAndreyChurbanov lcheck = ++checker; // 1 or 2
167*d40108e0SAndreyChurbanov th = omp_get_thread_num();
168*d40108e0SAndreyChurbanov printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
169*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
170*d40108e0SAndreyChurbanov err++; // no more than 2 tasks concurrently
171*d40108e0SAndreyChurbanov printf("Error3, checker %d, not 1 or 2\n", lcheck);
172*d40108e0SAndreyChurbanov }
173*d40108e0SAndreyChurbanov mysleep(DELAY);
174*d40108e0SAndreyChurbanov #pragma omp atomic read
175*d40108e0SAndreyChurbanov lcheck = checker; // 1 or 2
176*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
177*d40108e0SAndreyChurbanov err++;
178*d40108e0SAndreyChurbanov printf("Error4, checker %d, not 1 or 2\n", lcheck);
179*d40108e0SAndreyChurbanov }
180*d40108e0SAndreyChurbanov #pragma omp atomic
181*d40108e0SAndreyChurbanov --checker;
182*d40108e0SAndreyChurbanov }
183*d40108e0SAndreyChurbanov // compiler codegen start
184*d40108e0SAndreyChurbanov // task2
185*d40108e0SAndreyChurbanov ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
186*d40108e0SAndreyChurbanov sdep[0].addr = (size_t)&i1;
187*d40108e0SAndreyChurbanov sdep[0].len = 0; // not used
188*d40108e0SAndreyChurbanov sdep[0].flags = 1; // IN
189*d40108e0SAndreyChurbanov sdep[1].addr = (size_t)&i2;
190*d40108e0SAndreyChurbanov sdep[1].len = 0; // not used
191*d40108e0SAndreyChurbanov sdep[1].flags = 8; // INOUTSET
192*d40108e0SAndreyChurbanov ptr->f_priv = t + 10; // init single first-private variable
193*d40108e0SAndreyChurbanov __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
194*d40108e0SAndreyChurbanov
195*d40108e0SAndreyChurbanov // task3
196*d40108e0SAndreyChurbanov ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
197*d40108e0SAndreyChurbanov sdep[0].addr = (size_t)&i1; // to be ignored
198*d40108e0SAndreyChurbanov sdep[0].len = 0; // not used
199*d40108e0SAndreyChurbanov sdep[0].flags = 1; // IN
200*d40108e0SAndreyChurbanov sdep[1].addr = 0;
201*d40108e0SAndreyChurbanov sdep[1].len = 0; // not used
202*d40108e0SAndreyChurbanov sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
203*d40108e0SAndreyChurbanov ptr->f_priv = t + 20; // init single first-private variable
204*d40108e0SAndreyChurbanov __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
205*d40108e0SAndreyChurbanov // compiler codegen end
206*d40108e0SAndreyChurbanov #pragma omp task depend(in: i1, i2)
207*d40108e0SAndreyChurbanov { // task 4
208*d40108e0SAndreyChurbanov int lcheck, th;
209*d40108e0SAndreyChurbanov #pragma omp atomic capture
210*d40108e0SAndreyChurbanov lcheck = ++checker; // 1 or 2
211*d40108e0SAndreyChurbanov th = omp_get_thread_num();
212*d40108e0SAndreyChurbanov printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
213*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
214*d40108e0SAndreyChurbanov err++; // no more than 2 tasks concurrently
215*d40108e0SAndreyChurbanov printf("Error5, checker %d, not 1 or 2\n", lcheck);
216*d40108e0SAndreyChurbanov }
217*d40108e0SAndreyChurbanov mysleep(DELAY);
218*d40108e0SAndreyChurbanov #pragma omp atomic read
219*d40108e0SAndreyChurbanov lcheck = checker; // 1 or 2
220*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
221*d40108e0SAndreyChurbanov err++;
222*d40108e0SAndreyChurbanov printf("Error6, checker %d, not 1 or 2\n", lcheck);
223*d40108e0SAndreyChurbanov }
224*d40108e0SAndreyChurbanov #pragma omp atomic
225*d40108e0SAndreyChurbanov --checker;
226*d40108e0SAndreyChurbanov }
227*d40108e0SAndreyChurbanov #pragma omp task depend(in: i1, i2)
228*d40108e0SAndreyChurbanov { // task 5
229*d40108e0SAndreyChurbanov int lcheck, th;
230*d40108e0SAndreyChurbanov #pragma omp atomic capture
231*d40108e0SAndreyChurbanov lcheck = ++checker; // 1 or 2
232*d40108e0SAndreyChurbanov th = omp_get_thread_num();
233*d40108e0SAndreyChurbanov printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
234*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
235*d40108e0SAndreyChurbanov err++; // no more than 2 tasks concurrently
236*d40108e0SAndreyChurbanov printf("Error7, checker %d, not 1 or 2\n", lcheck);
237*d40108e0SAndreyChurbanov }
238*d40108e0SAndreyChurbanov mysleep(DELAY);
239*d40108e0SAndreyChurbanov #pragma omp atomic read
240*d40108e0SAndreyChurbanov lcheck = checker; // 1 or 2
241*d40108e0SAndreyChurbanov if (lcheck > 2 || lcheck < 1) {
242*d40108e0SAndreyChurbanov err++;
243*d40108e0SAndreyChurbanov printf("Error8, checker %d, not 1 or 2\n", lcheck);
244*d40108e0SAndreyChurbanov }
245*d40108e0SAndreyChurbanov #pragma omp atomic
246*d40108e0SAndreyChurbanov --checker;
247*d40108e0SAndreyChurbanov }
248*d40108e0SAndreyChurbanov // compiler codegen start
249*d40108e0SAndreyChurbanov // task6
250*d40108e0SAndreyChurbanov ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
251*d40108e0SAndreyChurbanov sdep[0].addr = (size_t)(-1); // omp_all_memory
252*d40108e0SAndreyChurbanov sdep[0].len = 0; // not used
253*d40108e0SAndreyChurbanov sdep[0].flags = 2; // OUT
254*d40108e0SAndreyChurbanov ptr->f_priv = t + 30; // init single first-private variable
255*d40108e0SAndreyChurbanov __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
256*d40108e0SAndreyChurbanov
257*d40108e0SAndreyChurbanov // task7
258*d40108e0SAndreyChurbanov ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
259*d40108e0SAndreyChurbanov sdep[0].addr = 0;
260*d40108e0SAndreyChurbanov sdep[0].len = 0; // not used
261*d40108e0SAndreyChurbanov sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
262*d40108e0SAndreyChurbanov sdep[1].addr = (size_t)&i3; // to be ignored
263*d40108e0SAndreyChurbanov sdep[1].len = 0; // not used
264*d40108e0SAndreyChurbanov sdep[1].flags = 4; // MUTEXINOUTSET
265*d40108e0SAndreyChurbanov ptr->f_priv = t + 40; // init single first-private variable
266*d40108e0SAndreyChurbanov __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
267*d40108e0SAndreyChurbanov // compiler codegen end
268*d40108e0SAndreyChurbanov #pragma omp task depend(in: i3)
269*d40108e0SAndreyChurbanov { // task 8
270*d40108e0SAndreyChurbanov int lcheck, th;
271*d40108e0SAndreyChurbanov #pragma omp atomic capture
272*d40108e0SAndreyChurbanov lcheck = ++checker; // 1
273*d40108e0SAndreyChurbanov th = omp_get_thread_num();
274*d40108e0SAndreyChurbanov printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
275*d40108e0SAndreyChurbanov if (lcheck != 1) {
276*d40108e0SAndreyChurbanov err++;
277*d40108e0SAndreyChurbanov printf("Error9, checker %d, != 1\n", lcheck);
278*d40108e0SAndreyChurbanov }
279*d40108e0SAndreyChurbanov mysleep(DELAY);
280*d40108e0SAndreyChurbanov #pragma omp atomic read
281*d40108e0SAndreyChurbanov lcheck = checker;
282*d40108e0SAndreyChurbanov if (lcheck != 1) {
283*d40108e0SAndreyChurbanov err++;
284*d40108e0SAndreyChurbanov printf("Error10, checker %d, != 1\n", lcheck);
285*d40108e0SAndreyChurbanov }
286*d40108e0SAndreyChurbanov #pragma omp atomic
287*d40108e0SAndreyChurbanov --checker;
288*d40108e0SAndreyChurbanov }
289*d40108e0SAndreyChurbanov } // single
290*d40108e0SAndreyChurbanov } // parallel
291*d40108e0SAndreyChurbanov if (err == 0 && checker == 0) {
292*d40108e0SAndreyChurbanov printf("passed\n");
293*d40108e0SAndreyChurbanov return 0;
294*d40108e0SAndreyChurbanov } else {
295*d40108e0SAndreyChurbanov printf("failed, err = %d, checker = %d\n", err, checker);
296*d40108e0SAndreyChurbanov return 1;
297*d40108e0SAndreyChurbanov }
298*d40108e0SAndreyChurbanov }
299