xref: /llvm-project/openmp/runtime/test/tasking/kmp_task_depend_all.c (revision d40108e0af08389a791c7b6783f416486068be96)
1*d40108e0SAndreyChurbanov // RUN: %libomp-compile-and-run
2*d40108e0SAndreyChurbanov // The runtime currently does not get dependency information from GCC.
3*d40108e0SAndreyChurbanov // UNSUPPORTED: gcc
4*d40108e0SAndreyChurbanov 
5*d40108e0SAndreyChurbanov // Tests OMP 5.x task dependence "omp_all_memory",
6*d40108e0SAndreyChurbanov // emulates compiler codegen versions for new dep kind
7*d40108e0SAndreyChurbanov //
8*d40108e0SAndreyChurbanov // Task tree created:
9*d40108e0SAndreyChurbanov //      task0 - task1 (in: i1, i2)
10*d40108e0SAndreyChurbanov //             \
11*d40108e0SAndreyChurbanov //        task2 (inoutset: i2), (in: i1)
12*d40108e0SAndreyChurbanov //             /
13*d40108e0SAndreyChurbanov //        task3 (omp_all_memory) via flag=0x80
14*d40108e0SAndreyChurbanov //             /
15*d40108e0SAndreyChurbanov //      task4 - task5 (in: i1, i2)
16*d40108e0SAndreyChurbanov //           /
17*d40108e0SAndreyChurbanov //       task6 (omp_all_memory) via addr=-1
18*d40108e0SAndreyChurbanov //           /
19*d40108e0SAndreyChurbanov //       task7 (omp_all_memory) via flag=0x80
20*d40108e0SAndreyChurbanov //           /
21*d40108e0SAndreyChurbanov //       task8 (in: i3)
22*d40108e0SAndreyChurbanov //
23*d40108e0SAndreyChurbanov #include <stdio.h>
24*d40108e0SAndreyChurbanov #include <omp.h>
25*d40108e0SAndreyChurbanov 
26*d40108e0SAndreyChurbanov #ifdef _WIN32
27*d40108e0SAndreyChurbanov #include <windows.h>
28*d40108e0SAndreyChurbanov #define mysleep(n) Sleep(n)
29*d40108e0SAndreyChurbanov #else
30*d40108e0SAndreyChurbanov #include <unistd.h>
31*d40108e0SAndreyChurbanov #define mysleep(n) usleep((n)*1000)
32*d40108e0SAndreyChurbanov #endif
33*d40108e0SAndreyChurbanov 
34*d40108e0SAndreyChurbanov // to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
35*d40108e0SAndreyChurbanov static int checker = 0;
36*d40108e0SAndreyChurbanov static int err = 0;
37*d40108e0SAndreyChurbanov #ifndef DELAY
38*d40108e0SAndreyChurbanov #define DELAY 100
39*d40108e0SAndreyChurbanov #endif
40*d40108e0SAndreyChurbanov 
41*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
42*d40108e0SAndreyChurbanov // internal data to emulate compiler codegen
43*d40108e0SAndreyChurbanov typedef struct DEP {
44*d40108e0SAndreyChurbanov   size_t addr;
45*d40108e0SAndreyChurbanov   size_t len;
46*d40108e0SAndreyChurbanov   unsigned char flags;
47*d40108e0SAndreyChurbanov } dep;
48*d40108e0SAndreyChurbanov #define DEP_ALL_MEM 0x80
49*d40108e0SAndreyChurbanov typedef struct task {
50*d40108e0SAndreyChurbanov   void** shareds;
51*d40108e0SAndreyChurbanov   void* entry;
52*d40108e0SAndreyChurbanov   int part_id;
53*d40108e0SAndreyChurbanov   void* destr_thunk;
54*d40108e0SAndreyChurbanov   int priority;
55*d40108e0SAndreyChurbanov   long long device_id;
56*d40108e0SAndreyChurbanov   int f_priv;
57*d40108e0SAndreyChurbanov } task_t;
58*d40108e0SAndreyChurbanov #define TIED 1
59*d40108e0SAndreyChurbanov typedef int(*entry_t)(int, task_t*);
60*d40108e0SAndreyChurbanov typedef struct ID {
61*d40108e0SAndreyChurbanov   int reserved_1;
62*d40108e0SAndreyChurbanov   int flags;
63*d40108e0SAndreyChurbanov   int reserved_2;
64*d40108e0SAndreyChurbanov   int reserved_3;
65*d40108e0SAndreyChurbanov   char *psource;
66*d40108e0SAndreyChurbanov } id;
67*d40108e0SAndreyChurbanov // thunk routine for tasks with ALL dependency
thunk_m(int gtid,task_t * ptask)68*d40108e0SAndreyChurbanov int thunk_m(int gtid, task_t* ptask) {
69*d40108e0SAndreyChurbanov   int lcheck, th;
70*d40108e0SAndreyChurbanov   #pragma omp atomic capture
71*d40108e0SAndreyChurbanov     lcheck = ++checker;
72*d40108e0SAndreyChurbanov   th = omp_get_thread_num();
73*d40108e0SAndreyChurbanov   printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
74*d40108e0SAndreyChurbanov   if (lcheck != 1) { // no more than 1 task at a time
75*d40108e0SAndreyChurbanov     err++;
76*d40108e0SAndreyChurbanov     printf("Error m1, checker %d != 1\n", lcheck);
77*d40108e0SAndreyChurbanov   }
78*d40108e0SAndreyChurbanov   mysleep(DELAY);
79*d40108e0SAndreyChurbanov   #pragma omp atomic read
80*d40108e0SAndreyChurbanov     lcheck = checker; // must still be equal to 1
81*d40108e0SAndreyChurbanov   if (lcheck != 1) {
82*d40108e0SAndreyChurbanov     err++;
83*d40108e0SAndreyChurbanov     printf("Error m2, checker %d != 1\n", lcheck);
84*d40108e0SAndreyChurbanov   }
85*d40108e0SAndreyChurbanov   #pragma omp atomic
86*d40108e0SAndreyChurbanov     --checker;
87*d40108e0SAndreyChurbanov   return 0;
88*d40108e0SAndreyChurbanov }
89*d40108e0SAndreyChurbanov // thunk routine for tasks with inoutset dependency
thunk_s(int gtid,task_t * ptask)90*d40108e0SAndreyChurbanov int thunk_s(int gtid, task_t* ptask) {
91*d40108e0SAndreyChurbanov   int lcheck, th;
92*d40108e0SAndreyChurbanov   #pragma omp atomic capture
93*d40108e0SAndreyChurbanov     lcheck = ++checker; // 1
94*d40108e0SAndreyChurbanov   th = omp_get_thread_num();
95*d40108e0SAndreyChurbanov   printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
96*d40108e0SAndreyChurbanov   if (lcheck != 1) { // no more than 1 task at a time
97*d40108e0SAndreyChurbanov     err++;
98*d40108e0SAndreyChurbanov     printf("Error s1, checker %d != 1\n", lcheck);
99*d40108e0SAndreyChurbanov   }
100*d40108e0SAndreyChurbanov   mysleep(DELAY);
101*d40108e0SAndreyChurbanov   #pragma omp atomic read
102*d40108e0SAndreyChurbanov     lcheck = checker; // must still be equal to 1
103*d40108e0SAndreyChurbanov   if (lcheck != 1) {
104*d40108e0SAndreyChurbanov     err++;
105*d40108e0SAndreyChurbanov     printf("Error s2, checker %d != 1\n", lcheck);
106*d40108e0SAndreyChurbanov   }
107*d40108e0SAndreyChurbanov   #pragma omp atomic
108*d40108e0SAndreyChurbanov     --checker;
109*d40108e0SAndreyChurbanov   return 0;
110*d40108e0SAndreyChurbanov }
111*d40108e0SAndreyChurbanov 
112*d40108e0SAndreyChurbanov #ifdef __cplusplus
113*d40108e0SAndreyChurbanov extern "C" {
114*d40108e0SAndreyChurbanov #endif
115*d40108e0SAndreyChurbanov int __kmpc_global_thread_num(id*);
116*d40108e0SAndreyChurbanov task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
117*d40108e0SAndreyChurbanov                               size_t sz, size_t shar, entry_t rtn);
118*d40108e0SAndreyChurbanov int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
119*d40108e0SAndreyChurbanov                               dep *dep_lst, int nd_noalias, dep *noalias_lst);
120*d40108e0SAndreyChurbanov static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
121*d40108e0SAndreyChurbanov #ifdef __cplusplus
122*d40108e0SAndreyChurbanov } // extern "C"
123*d40108e0SAndreyChurbanov #endif
124*d40108e0SAndreyChurbanov // End of internal data
125*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
126*d40108e0SAndreyChurbanov 
main()127*d40108e0SAndreyChurbanov int main()
128*d40108e0SAndreyChurbanov {
129*d40108e0SAndreyChurbanov   int i1,i2,i3;
130*d40108e0SAndreyChurbanov   omp_set_num_threads(8);
131*d40108e0SAndreyChurbanov   omp_set_dynamic(0);
132*d40108e0SAndreyChurbanov   #pragma omp parallel
133*d40108e0SAndreyChurbanov   {
134*d40108e0SAndreyChurbanov     #pragma omp single nowait
135*d40108e0SAndreyChurbanov     {
136*d40108e0SAndreyChurbanov       dep sdep[2];
137*d40108e0SAndreyChurbanov       task_t *ptr;
138*d40108e0SAndreyChurbanov       int gtid = __kmpc_global_thread_num(&loc);
139*d40108e0SAndreyChurbanov       int t = omp_get_thread_num();
140*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
141*d40108e0SAndreyChurbanov       { // task 0
142*d40108e0SAndreyChurbanov         int lcheck, th;
143*d40108e0SAndreyChurbanov         #pragma omp atomic capture
144*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
145*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
146*d40108e0SAndreyChurbanov         printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
147*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
148*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
149*d40108e0SAndreyChurbanov           printf("Error1, checker %d, not 1 or 2\n", lcheck);
150*d40108e0SAndreyChurbanov         }
151*d40108e0SAndreyChurbanov         mysleep(DELAY);
152*d40108e0SAndreyChurbanov         #pragma omp atomic read
153*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
154*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
155*d40108e0SAndreyChurbanov           #pragma omp atomic
156*d40108e0SAndreyChurbanov             err++;
157*d40108e0SAndreyChurbanov           printf("Error2, checker %d, not 1 or 2\n", lcheck);
158*d40108e0SAndreyChurbanov         }
159*d40108e0SAndreyChurbanov         #pragma omp atomic
160*d40108e0SAndreyChurbanov           --checker;
161*d40108e0SAndreyChurbanov       }
162*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
163*d40108e0SAndreyChurbanov       { // task 1
164*d40108e0SAndreyChurbanov         int lcheck, th;
165*d40108e0SAndreyChurbanov         #pragma omp atomic capture
166*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
167*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
168*d40108e0SAndreyChurbanov         printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
169*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
170*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
171*d40108e0SAndreyChurbanov           printf("Error3, checker %d, not 1 or 2\n", lcheck);
172*d40108e0SAndreyChurbanov         }
173*d40108e0SAndreyChurbanov         mysleep(DELAY);
174*d40108e0SAndreyChurbanov         #pragma omp atomic read
175*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
176*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
177*d40108e0SAndreyChurbanov           err++;
178*d40108e0SAndreyChurbanov           printf("Error4, checker %d, not 1 or 2\n", lcheck);
179*d40108e0SAndreyChurbanov         }
180*d40108e0SAndreyChurbanov         #pragma omp atomic
181*d40108e0SAndreyChurbanov           --checker;
182*d40108e0SAndreyChurbanov       }
183*d40108e0SAndreyChurbanov // compiler codegen start
184*d40108e0SAndreyChurbanov       // task2
185*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
186*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)&i1;
187*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
188*d40108e0SAndreyChurbanov       sdep[0].flags = 1; // IN
189*d40108e0SAndreyChurbanov       sdep[1].addr = (size_t)&i2;
190*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
191*d40108e0SAndreyChurbanov       sdep[1].flags = 8; // INOUTSET
192*d40108e0SAndreyChurbanov       ptr->f_priv = t + 10; // init single first-private variable
193*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
194*d40108e0SAndreyChurbanov 
195*d40108e0SAndreyChurbanov       // task3
196*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
197*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)&i1; // to be ignored
198*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
199*d40108e0SAndreyChurbanov       sdep[0].flags = 1; // IN
200*d40108e0SAndreyChurbanov       sdep[1].addr = 0;
201*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
202*d40108e0SAndreyChurbanov       sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
203*d40108e0SAndreyChurbanov       ptr->f_priv = t + 20; // init single first-private variable
204*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
205*d40108e0SAndreyChurbanov // compiler codegen end
206*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
207*d40108e0SAndreyChurbanov       { // task 4
208*d40108e0SAndreyChurbanov         int lcheck, th;
209*d40108e0SAndreyChurbanov         #pragma omp atomic capture
210*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
211*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
212*d40108e0SAndreyChurbanov         printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
213*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
214*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
215*d40108e0SAndreyChurbanov           printf("Error5, checker %d, not 1 or 2\n", lcheck);
216*d40108e0SAndreyChurbanov         }
217*d40108e0SAndreyChurbanov         mysleep(DELAY);
218*d40108e0SAndreyChurbanov         #pragma omp atomic read
219*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
220*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
221*d40108e0SAndreyChurbanov           err++;
222*d40108e0SAndreyChurbanov           printf("Error6, checker %d, not 1 or 2\n", lcheck);
223*d40108e0SAndreyChurbanov         }
224*d40108e0SAndreyChurbanov         #pragma omp atomic
225*d40108e0SAndreyChurbanov           --checker;
226*d40108e0SAndreyChurbanov       }
227*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
228*d40108e0SAndreyChurbanov       { // task 5
229*d40108e0SAndreyChurbanov         int lcheck, th;
230*d40108e0SAndreyChurbanov         #pragma omp atomic capture
231*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
232*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
233*d40108e0SAndreyChurbanov         printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
234*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
235*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
236*d40108e0SAndreyChurbanov           printf("Error7, checker %d, not 1 or 2\n", lcheck);
237*d40108e0SAndreyChurbanov         }
238*d40108e0SAndreyChurbanov         mysleep(DELAY);
239*d40108e0SAndreyChurbanov         #pragma omp atomic read
240*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
241*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
242*d40108e0SAndreyChurbanov           err++;
243*d40108e0SAndreyChurbanov           printf("Error8, checker %d, not 1 or 2\n", lcheck);
244*d40108e0SAndreyChurbanov         }
245*d40108e0SAndreyChurbanov         #pragma omp atomic
246*d40108e0SAndreyChurbanov           --checker;
247*d40108e0SAndreyChurbanov       }
248*d40108e0SAndreyChurbanov // compiler codegen start
249*d40108e0SAndreyChurbanov       // task6
250*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
251*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)(-1); // omp_all_memory
252*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
253*d40108e0SAndreyChurbanov       sdep[0].flags = 2; // OUT
254*d40108e0SAndreyChurbanov       ptr->f_priv = t + 30; // init single first-private variable
255*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
256*d40108e0SAndreyChurbanov 
257*d40108e0SAndreyChurbanov       // task7
258*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
259*d40108e0SAndreyChurbanov       sdep[0].addr = 0;
260*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
261*d40108e0SAndreyChurbanov       sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
262*d40108e0SAndreyChurbanov       sdep[1].addr = (size_t)&i3; // to be ignored
263*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
264*d40108e0SAndreyChurbanov       sdep[1].flags = 4; // MUTEXINOUTSET
265*d40108e0SAndreyChurbanov       ptr->f_priv = t + 40; // init single first-private variable
266*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
267*d40108e0SAndreyChurbanov // compiler codegen end
268*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i3)
269*d40108e0SAndreyChurbanov       { // task 8
270*d40108e0SAndreyChurbanov         int lcheck, th;
271*d40108e0SAndreyChurbanov         #pragma omp atomic capture
272*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1
273*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
274*d40108e0SAndreyChurbanov         printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
275*d40108e0SAndreyChurbanov         if (lcheck != 1) {
276*d40108e0SAndreyChurbanov           err++;
277*d40108e0SAndreyChurbanov           printf("Error9, checker %d, != 1\n", lcheck);
278*d40108e0SAndreyChurbanov         }
279*d40108e0SAndreyChurbanov         mysleep(DELAY);
280*d40108e0SAndreyChurbanov         #pragma omp atomic read
281*d40108e0SAndreyChurbanov           lcheck = checker;
282*d40108e0SAndreyChurbanov         if (lcheck != 1) {
283*d40108e0SAndreyChurbanov           err++;
284*d40108e0SAndreyChurbanov           printf("Error10, checker %d, != 1\n", lcheck);
285*d40108e0SAndreyChurbanov         }
286*d40108e0SAndreyChurbanov         #pragma omp atomic
287*d40108e0SAndreyChurbanov           --checker;
288*d40108e0SAndreyChurbanov       }
289*d40108e0SAndreyChurbanov     } // single
290*d40108e0SAndreyChurbanov   } // parallel
291*d40108e0SAndreyChurbanov   if (err == 0 && checker == 0) {
292*d40108e0SAndreyChurbanov     printf("passed\n");
293*d40108e0SAndreyChurbanov     return 0;
294*d40108e0SAndreyChurbanov   } else {
295*d40108e0SAndreyChurbanov     printf("failed, err = %d, checker = %d\n", err, checker);
296*d40108e0SAndreyChurbanov     return 1;
297*d40108e0SAndreyChurbanov   }
298*d40108e0SAndreyChurbanov }
299