xref: /llvm-project/offload/test/offloading/bug51781.c (revision dcc27ea41ebc7244a10f15114f60f5e8d6b93b34)
1330d8983SJohannes Doerfert // Use the generic state machine.  On some architectures, other threads in the
2330d8983SJohannes Doerfert // main thread's warp must avoid barrier instructions.
3330d8983SJohannes Doerfert //
4330d8983SJohannes Doerfert // RUN: %libomptarget-compile-run-and-check-generic
5330d8983SJohannes Doerfert 
6330d8983SJohannes Doerfert // SPMDize.  There is no main thread, so there's no issue.
7330d8983SJohannes Doerfert //
8*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1
9330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd
10330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd
11330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
12330d8983SJohannes Doerfert //
13330d8983SJohannes Doerfert // SPMD: Transformed generic-mode kernel to SPMD-mode.
14330d8983SJohannes Doerfert 
15330d8983SJohannes Doerfert // Use the custom state machine, which must avoid the same barrier problem as
16330d8983SJohannes Doerfert // the generic state machine.
17330d8983SJohannes Doerfert //
18*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \
19330d8983SJohannes Doerfert // RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
20330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
21330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
22330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
23330d8983SJohannes Doerfert //
24330d8983SJohannes Doerfert // Repeat with reduction clause, which has managed to break the custom state
25330d8983SJohannes Doerfert // machine in the past.
26330d8983SJohannes Doerfert //
27*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \
28330d8983SJohannes Doerfert // RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
29330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
30330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
31330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
32330d8983SJohannes Doerfert //
33330d8983SJohannes Doerfert // CUSTOM: Rewriting generic-mode kernel with a customized state machine.
34330d8983SJohannes Doerfert 
35330d8983SJohannes Doerfert #if ADD_REDUCTION
36330d8983SJohannes Doerfert #define REDUCTION(...) reduction(__VA_ARGS__)
37330d8983SJohannes Doerfert #else
38330d8983SJohannes Doerfert #define REDUCTION(...)
39330d8983SJohannes Doerfert #endif
40330d8983SJohannes Doerfert 
41330d8983SJohannes Doerfert #include <stdio.h>
42330d8983SJohannes Doerfert int main() {
43330d8983SJohannes Doerfert   int x = 0, y = 1;
44330d8983SJohannes Doerfert #pragma omp target teams num_teams(1) map(tofrom : x, y) REDUCTION(+ : x)
45330d8983SJohannes Doerfert   {
46330d8983SJohannes Doerfert     x += 5;
47330d8983SJohannes Doerfert #pragma omp parallel
48330d8983SJohannes Doerfert     y = 6;
49330d8983SJohannes Doerfert   }
50330d8983SJohannes Doerfert   // CHECK: 5, 6
51330d8983SJohannes Doerfert   printf("%d, %d\n", x, y);
52330d8983SJohannes Doerfert   return 0;
53330d8983SJohannes Doerfert }
54