xref: /llvm-project/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c (revision d40108e0af08389a791c7b6783f416486068be96)
1*d40108e0SAndreyChurbanov // RUN: %libomp-compile-and-run
2*d40108e0SAndreyChurbanov // The runtime currently does not get dependency information from GCC.
3*d40108e0SAndreyChurbanov // UNSUPPORTED: gcc
4*d40108e0SAndreyChurbanov 
5*d40108e0SAndreyChurbanov // Tests OMP 5.x task dependence "omp_all_memory",
6*d40108e0SAndreyChurbanov // emulates compiler codegen versions for new dep kind
7*d40108e0SAndreyChurbanov //
8*d40108e0SAndreyChurbanov // Task tree created:
9*d40108e0SAndreyChurbanov //      task0 - task1 (in: i1, i2)
10*d40108e0SAndreyChurbanov //             \
11*d40108e0SAndreyChurbanov //        task2 (inoutset: i2), (in: i1)
12*d40108e0SAndreyChurbanov //             /
13*d40108e0SAndreyChurbanov //        task3 (omp_all_memory) via flag=0x80
14*d40108e0SAndreyChurbanov //             /
15*d40108e0SAndreyChurbanov //      task4 - task5 (in: i1, i2)
16*d40108e0SAndreyChurbanov //           /
17*d40108e0SAndreyChurbanov //       task6 (omp_all_memory) via addr=-1
18*d40108e0SAndreyChurbanov //           /
19*d40108e0SAndreyChurbanov //       task7 (omp_all_memory) via flag=0x80
20*d40108e0SAndreyChurbanov //           /
21*d40108e0SAndreyChurbanov //       task8 (in: i3)
22*d40108e0SAndreyChurbanov //           /
23*d40108e0SAndreyChurbanov //       task9 - no dependences
24*d40108e0SAndreyChurbanov //           /
25*d40108e0SAndreyChurbanov //       taskwait (omp_all_memory) (should not wait for task9, see prints)
26*d40108e0SAndreyChurbanov //
27*d40108e0SAndreyChurbanov #include <stdio.h>
28*d40108e0SAndreyChurbanov #include <omp.h>
29*d40108e0SAndreyChurbanov 
30*d40108e0SAndreyChurbanov #ifdef _WIN32
31*d40108e0SAndreyChurbanov #include <windows.h>
32*d40108e0SAndreyChurbanov #define mysleep(n) Sleep(n)
33*d40108e0SAndreyChurbanov #else
34*d40108e0SAndreyChurbanov #include <unistd.h>
35*d40108e0SAndreyChurbanov #define mysleep(n) usleep((n)*1000)
36*d40108e0SAndreyChurbanov #endif
37*d40108e0SAndreyChurbanov 
38*d40108e0SAndreyChurbanov // to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
39*d40108e0SAndreyChurbanov static int checker = 0;
40*d40108e0SAndreyChurbanov static int err = 0;
41*d40108e0SAndreyChurbanov static int taskwait_flag = 0;
42*d40108e0SAndreyChurbanov #ifndef DELAY
43*d40108e0SAndreyChurbanov // set delay interval in ms for dependent tasks
44*d40108e0SAndreyChurbanov #define DELAY 100
45*d40108e0SAndreyChurbanov #endif
46*d40108e0SAndreyChurbanov 
47*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
48*d40108e0SAndreyChurbanov // internal data to emulate compiler codegen
49*d40108e0SAndreyChurbanov typedef struct DEP {
50*d40108e0SAndreyChurbanov   size_t addr;
51*d40108e0SAndreyChurbanov   size_t len;
52*d40108e0SAndreyChurbanov   unsigned char flags;
53*d40108e0SAndreyChurbanov } dep;
54*d40108e0SAndreyChurbanov #define DEP_ALL_MEM 0x80
55*d40108e0SAndreyChurbanov typedef struct task {
56*d40108e0SAndreyChurbanov   void** shareds;
57*d40108e0SAndreyChurbanov   void* entry;
58*d40108e0SAndreyChurbanov   int part_id;
59*d40108e0SAndreyChurbanov   void* destr_thunk;
60*d40108e0SAndreyChurbanov   int priority;
61*d40108e0SAndreyChurbanov   long long device_id;
62*d40108e0SAndreyChurbanov   int f_priv;
63*d40108e0SAndreyChurbanov } task_t;
64*d40108e0SAndreyChurbanov #define TIED 1
65*d40108e0SAndreyChurbanov typedef int(*entry_t)(int, task_t*);
66*d40108e0SAndreyChurbanov typedef struct ID {
67*d40108e0SAndreyChurbanov   int reserved_1;
68*d40108e0SAndreyChurbanov   int flags;
69*d40108e0SAndreyChurbanov   int reserved_2;
70*d40108e0SAndreyChurbanov   int reserved_3;
71*d40108e0SAndreyChurbanov   char *psource;
72*d40108e0SAndreyChurbanov } id;
73*d40108e0SAndreyChurbanov // thunk routine for tasks with ALL dependency
thunk_m(int gtid,task_t * ptask)74*d40108e0SAndreyChurbanov int thunk_m(int gtid, task_t* ptask) {
75*d40108e0SAndreyChurbanov   int lcheck, th;
76*d40108e0SAndreyChurbanov   #pragma omp atomic capture
77*d40108e0SAndreyChurbanov     lcheck = ++checker;
78*d40108e0SAndreyChurbanov   th = omp_get_thread_num();
79*d40108e0SAndreyChurbanov   printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
80*d40108e0SAndreyChurbanov   if (lcheck != 1) { // no more than 1 task at a time
81*d40108e0SAndreyChurbanov     err++;
82*d40108e0SAndreyChurbanov     printf("Error m1, checker %d != 1\n", lcheck);
83*d40108e0SAndreyChurbanov   }
84*d40108e0SAndreyChurbanov   mysleep(DELAY);
85*d40108e0SAndreyChurbanov   #pragma omp atomic read
86*d40108e0SAndreyChurbanov     lcheck = checker; // must still be equal to 1
87*d40108e0SAndreyChurbanov   if (lcheck != 1) {
88*d40108e0SAndreyChurbanov     err++;
89*d40108e0SAndreyChurbanov     printf("Error m2, checker %d != 1\n", lcheck);
90*d40108e0SAndreyChurbanov   }
91*d40108e0SAndreyChurbanov   #pragma omp atomic
92*d40108e0SAndreyChurbanov     --checker;
93*d40108e0SAndreyChurbanov   return 0;
94*d40108e0SAndreyChurbanov }
95*d40108e0SAndreyChurbanov // thunk routine for tasks with inoutset dependency
thunk_s(int gtid,task_t * ptask)96*d40108e0SAndreyChurbanov int thunk_s(int gtid, task_t* ptask) {
97*d40108e0SAndreyChurbanov   int lcheck, th;
98*d40108e0SAndreyChurbanov   #pragma omp atomic capture
99*d40108e0SAndreyChurbanov     lcheck = ++checker; // 1
100*d40108e0SAndreyChurbanov   th = omp_get_thread_num();
101*d40108e0SAndreyChurbanov   printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
102*d40108e0SAndreyChurbanov   if (lcheck != 1) { // no more than 1 task at a time
103*d40108e0SAndreyChurbanov     err++;
104*d40108e0SAndreyChurbanov     printf("Error s1, checker %d != 1\n", lcheck);
105*d40108e0SAndreyChurbanov   }
106*d40108e0SAndreyChurbanov   mysleep(DELAY);
107*d40108e0SAndreyChurbanov   #pragma omp atomic read
108*d40108e0SAndreyChurbanov     lcheck = checker; // must still be equal to 1
109*d40108e0SAndreyChurbanov   if (lcheck != 1) {
110*d40108e0SAndreyChurbanov     err++;
111*d40108e0SAndreyChurbanov     printf("Error s2, checker %d != 1\n", lcheck);
112*d40108e0SAndreyChurbanov   }
113*d40108e0SAndreyChurbanov   #pragma omp atomic
114*d40108e0SAndreyChurbanov     --checker;
115*d40108e0SAndreyChurbanov   return 0;
116*d40108e0SAndreyChurbanov }
117*d40108e0SAndreyChurbanov 
118*d40108e0SAndreyChurbanov #ifdef __cplusplus
119*d40108e0SAndreyChurbanov extern "C" {
120*d40108e0SAndreyChurbanov #endif
121*d40108e0SAndreyChurbanov int __kmpc_global_thread_num(id*);
122*d40108e0SAndreyChurbanov task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
123*d40108e0SAndreyChurbanov                               size_t sz, size_t shar, entry_t rtn);
124*d40108e0SAndreyChurbanov int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
125*d40108e0SAndreyChurbanov                               dep *dep_lst, int nd_noalias, dep *noalias_lst);
126*d40108e0SAndreyChurbanov void __kmpc_omp_wait_deps(id *loc, int gtid, int ndeps, dep *dep_lst,
127*d40108e0SAndreyChurbanov                           int ndeps_noalias, dep *noalias_dep_lst);
128*d40108e0SAndreyChurbanov static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
129*d40108e0SAndreyChurbanov #ifdef __cplusplus
130*d40108e0SAndreyChurbanov } // extern "C"
131*d40108e0SAndreyChurbanov #endif
132*d40108e0SAndreyChurbanov // End of internal data
133*d40108e0SAndreyChurbanov // ---------------------------------------------------------------------------
134*d40108e0SAndreyChurbanov 
main()135*d40108e0SAndreyChurbanov int main()
136*d40108e0SAndreyChurbanov {
137*d40108e0SAndreyChurbanov   int i1,i2,i3;
138*d40108e0SAndreyChurbanov   omp_set_num_threads(8);
139*d40108e0SAndreyChurbanov   omp_set_dynamic(0);
140*d40108e0SAndreyChurbanov   #pragma omp parallel
141*d40108e0SAndreyChurbanov   {
142*d40108e0SAndreyChurbanov     #pragma omp single nowait
143*d40108e0SAndreyChurbanov     {
144*d40108e0SAndreyChurbanov       dep sdep[2];
145*d40108e0SAndreyChurbanov       task_t *ptr;
146*d40108e0SAndreyChurbanov       int gtid = __kmpc_global_thread_num(&loc);
147*d40108e0SAndreyChurbanov       int t = omp_get_thread_num();
148*d40108e0SAndreyChurbanov       // Create longest task first to ensure it is stolen.
149*d40108e0SAndreyChurbanov       // The test may hang if the task created last and
150*d40108e0SAndreyChurbanov       // executed by a thread which executes taskwait.
151*d40108e0SAndreyChurbanov       #pragma omp task
152*d40108e0SAndreyChurbanov       { // task 9 - long running task
153*d40108e0SAndreyChurbanov         int flag;
154*d40108e0SAndreyChurbanov         int th = omp_get_thread_num();
155*d40108e0SAndreyChurbanov         printf("signalled independent task 9_%d, th %d started....\n", t, th);
156*d40108e0SAndreyChurbanov         // Wait for taskwait depend() to finish
157*d40108e0SAndreyChurbanov         // If the taskwait depend() improperly depends on this task
158*d40108e0SAndreyChurbanov         // to finish, then the test will hang and a timeout should trigger
159*d40108e0SAndreyChurbanov         while (1) {
160*d40108e0SAndreyChurbanov           #pragma omp atomic read
161*d40108e0SAndreyChurbanov           flag = taskwait_flag;
162*d40108e0SAndreyChurbanov           if (flag == 1)
163*d40108e0SAndreyChurbanov             break;
164*d40108e0SAndreyChurbanov         }
165*d40108e0SAndreyChurbanov         printf("signalled independent task 9_%d, th %d finished....\n", t, th);
166*d40108e0SAndreyChurbanov       }
167*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
168*d40108e0SAndreyChurbanov       { // task 0
169*d40108e0SAndreyChurbanov         int lcheck, th;
170*d40108e0SAndreyChurbanov         #pragma omp atomic capture
171*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
172*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
173*d40108e0SAndreyChurbanov         printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
174*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
175*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
176*d40108e0SAndreyChurbanov           printf("Error1, checker %d, not 1 or 2\n", lcheck);
177*d40108e0SAndreyChurbanov         }
178*d40108e0SAndreyChurbanov         mysleep(DELAY);
179*d40108e0SAndreyChurbanov         #pragma omp atomic read
180*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
181*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
182*d40108e0SAndreyChurbanov           #pragma omp atomic
183*d40108e0SAndreyChurbanov             err++;
184*d40108e0SAndreyChurbanov           printf("Error2, checker %d, not 1 or 2\n", lcheck);
185*d40108e0SAndreyChurbanov         }
186*d40108e0SAndreyChurbanov         #pragma omp atomic
187*d40108e0SAndreyChurbanov           --checker;
188*d40108e0SAndreyChurbanov       }
189*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
190*d40108e0SAndreyChurbanov       { // task 1
191*d40108e0SAndreyChurbanov         int lcheck, th;
192*d40108e0SAndreyChurbanov         #pragma omp atomic capture
193*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
194*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
195*d40108e0SAndreyChurbanov         printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
196*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
197*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
198*d40108e0SAndreyChurbanov           printf("Error3, checker %d, not 1 or 2\n", lcheck);
199*d40108e0SAndreyChurbanov         }
200*d40108e0SAndreyChurbanov         mysleep(DELAY);
201*d40108e0SAndreyChurbanov         #pragma omp atomic read
202*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
203*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
204*d40108e0SAndreyChurbanov           err++;
205*d40108e0SAndreyChurbanov           printf("Error4, checker %d, not 1 or 2\n", lcheck);
206*d40108e0SAndreyChurbanov         }
207*d40108e0SAndreyChurbanov         #pragma omp atomic
208*d40108e0SAndreyChurbanov           --checker;
209*d40108e0SAndreyChurbanov       }
210*d40108e0SAndreyChurbanov // compiler codegen start
211*d40108e0SAndreyChurbanov       // task2
212*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
213*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)&i1;
214*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
215*d40108e0SAndreyChurbanov       sdep[0].flags = 1; // IN
216*d40108e0SAndreyChurbanov       sdep[1].addr = (size_t)&i2;
217*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
218*d40108e0SAndreyChurbanov       sdep[1].flags = 8; // INOUTSET
219*d40108e0SAndreyChurbanov       ptr->f_priv = t + 10; // init single first-private variable
220*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
221*d40108e0SAndreyChurbanov 
222*d40108e0SAndreyChurbanov       // task3
223*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
224*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)&i1; // to be ignored
225*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
226*d40108e0SAndreyChurbanov       sdep[0].flags = 1; // IN
227*d40108e0SAndreyChurbanov       sdep[1].addr = 0;
228*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
229*d40108e0SAndreyChurbanov       sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
230*d40108e0SAndreyChurbanov       ptr->f_priv = t + 20; // init single first-private variable
231*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
232*d40108e0SAndreyChurbanov // compiler codegen end
233*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
234*d40108e0SAndreyChurbanov       { // task 4
235*d40108e0SAndreyChurbanov         int lcheck, th;
236*d40108e0SAndreyChurbanov         #pragma omp atomic capture
237*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
238*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
239*d40108e0SAndreyChurbanov         printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
240*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
241*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
242*d40108e0SAndreyChurbanov           printf("Error5, checker %d, not 1 or 2\n", lcheck);
243*d40108e0SAndreyChurbanov         }
244*d40108e0SAndreyChurbanov         mysleep(DELAY);
245*d40108e0SAndreyChurbanov         #pragma omp atomic read
246*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
247*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
248*d40108e0SAndreyChurbanov           err++;
249*d40108e0SAndreyChurbanov           printf("Error6, checker %d, not 1 or 2\n", lcheck);
250*d40108e0SAndreyChurbanov         }
251*d40108e0SAndreyChurbanov         #pragma omp atomic
252*d40108e0SAndreyChurbanov           --checker;
253*d40108e0SAndreyChurbanov       }
254*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i1, i2)
255*d40108e0SAndreyChurbanov       { // task 5
256*d40108e0SAndreyChurbanov         int lcheck, th;
257*d40108e0SAndreyChurbanov         #pragma omp atomic capture
258*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1 or 2
259*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
260*d40108e0SAndreyChurbanov         printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
261*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
262*d40108e0SAndreyChurbanov           err++; // no more than 2 tasks concurrently
263*d40108e0SAndreyChurbanov           printf("Error7, checker %d, not 1 or 2\n", lcheck);
264*d40108e0SAndreyChurbanov         }
265*d40108e0SAndreyChurbanov         mysleep(DELAY);
266*d40108e0SAndreyChurbanov         #pragma omp atomic read
267*d40108e0SAndreyChurbanov           lcheck = checker; // 1 or 2
268*d40108e0SAndreyChurbanov         if (lcheck > 2 || lcheck < 1) {
269*d40108e0SAndreyChurbanov           err++;
270*d40108e0SAndreyChurbanov           printf("Error8, checker %d, not 1 or 2\n", lcheck);
271*d40108e0SAndreyChurbanov         }
272*d40108e0SAndreyChurbanov         #pragma omp atomic
273*d40108e0SAndreyChurbanov           --checker;
274*d40108e0SAndreyChurbanov       }
275*d40108e0SAndreyChurbanov // compiler codegen start
276*d40108e0SAndreyChurbanov       // task6
277*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
278*d40108e0SAndreyChurbanov       sdep[0].addr = (size_t)(-1); // omp_all_memory
279*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
280*d40108e0SAndreyChurbanov       sdep[0].flags = 2; // OUT
281*d40108e0SAndreyChurbanov       ptr->f_priv = t + 30; // init single first-private variable
282*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
283*d40108e0SAndreyChurbanov 
284*d40108e0SAndreyChurbanov       // task7
285*d40108e0SAndreyChurbanov       ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
286*d40108e0SAndreyChurbanov       sdep[0].addr = 0;
287*d40108e0SAndreyChurbanov       sdep[0].len = 0;   // not used
288*d40108e0SAndreyChurbanov       sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
289*d40108e0SAndreyChurbanov       sdep[1].addr = (size_t)&i3; // to be ignored
290*d40108e0SAndreyChurbanov       sdep[1].len = 0;   // not used
291*d40108e0SAndreyChurbanov       sdep[1].flags = 4; // MUTEXINOUTSET
292*d40108e0SAndreyChurbanov       ptr->f_priv = t + 40; // init single first-private variable
293*d40108e0SAndreyChurbanov       __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
294*d40108e0SAndreyChurbanov // compiler codegen end
295*d40108e0SAndreyChurbanov       #pragma omp task depend(in: i3)
296*d40108e0SAndreyChurbanov       { // task 8
297*d40108e0SAndreyChurbanov         int lcheck, th;
298*d40108e0SAndreyChurbanov         #pragma omp atomic capture
299*d40108e0SAndreyChurbanov           lcheck = ++checker; // 1
300*d40108e0SAndreyChurbanov         th = omp_get_thread_num();
301*d40108e0SAndreyChurbanov         printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
302*d40108e0SAndreyChurbanov         if (lcheck != 1) {
303*d40108e0SAndreyChurbanov           err++;
304*d40108e0SAndreyChurbanov           printf("Error9, checker %d, != 1\n", lcheck);
305*d40108e0SAndreyChurbanov         }
306*d40108e0SAndreyChurbanov         mysleep(DELAY);
307*d40108e0SAndreyChurbanov         #pragma omp atomic read
308*d40108e0SAndreyChurbanov           lcheck = checker;
309*d40108e0SAndreyChurbanov         if (lcheck != 1) {
310*d40108e0SAndreyChurbanov           err++;
311*d40108e0SAndreyChurbanov           printf("Error10, checker %d, != 1\n", lcheck);
312*d40108e0SAndreyChurbanov         }
313*d40108e0SAndreyChurbanov         #pragma omp atomic
314*d40108e0SAndreyChurbanov           --checker;
315*d40108e0SAndreyChurbanov       }
316*d40108e0SAndreyChurbanov       mysleep(1); // wait a bit to ensure at least first task is stolen
317*d40108e0SAndreyChurbanov //  #pragma omp taskwait depend(omp_all_memory: out)
318*d40108e0SAndreyChurbanov       printf("all 10 tasks generated;\n"
319*d40108e0SAndreyChurbanov              "taskwait depend(omp_all_memory: out)  started, th %d\n", t);
320*d40108e0SAndreyChurbanov       __kmpc_omp_wait_deps(&loc, gtid, 1, sdep, 0, 0);
321*d40108e0SAndreyChurbanov       #pragma omp atomic write
322*d40108e0SAndreyChurbanov         taskwait_flag = 1;
323*d40108e0SAndreyChurbanov       printf("taskwait depend(omp_all_memory: out)  passed, th %d\n", t);
324*d40108e0SAndreyChurbanov       fflush(0);
325*d40108e0SAndreyChurbanov     } // single
326*d40108e0SAndreyChurbanov   } // parallel
327*d40108e0SAndreyChurbanov   if (err == 0 && checker == 0) {
328*d40108e0SAndreyChurbanov     printf("passed\n");
329*d40108e0SAndreyChurbanov     return 0;
330*d40108e0SAndreyChurbanov   } else {
331*d40108e0SAndreyChurbanov     printf("failed, err = %d, checker = %d\n", err, checker);
332*d40108e0SAndreyChurbanov     return 1;
333*d40108e0SAndreyChurbanov   }
334*d40108e0SAndreyChurbanov }
335