1330d8983SJohannes Doerfert // Use the generic state machine. On some architectures, other threads in the 2330d8983SJohannes Doerfert // main thread's warp must avoid barrier instructions. 3330d8983SJohannes Doerfert // 4330d8983SJohannes Doerfert // RUN: %libomptarget-compile-run-and-check-generic 5330d8983SJohannes Doerfert 6330d8983SJohannes Doerfert // SPMDize. There is no main thread, so there's no issue. 7330d8983SJohannes Doerfert // 8*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1 9330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd 10330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd 11330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic 12330d8983SJohannes Doerfert // 13330d8983SJohannes Doerfert // SPMD: Transformed generic-mode kernel to SPMD-mode. 14330d8983SJohannes Doerfert 15330d8983SJohannes Doerfert // Use the custom state machine, which must avoid the same barrier problem as 16330d8983SJohannes Doerfert // the generic state machine. 17330d8983SJohannes Doerfert // 18*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \ 19330d8983SJohannes Doerfert // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 20330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom 21330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom 22330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic 23330d8983SJohannes Doerfert // 24330d8983SJohannes Doerfert // Repeat with reduction clause, which has managed to break the custom state 25330d8983SJohannes Doerfert // machine in the past. 26330d8983SJohannes Doerfert // 27*dcc27ea4SJoseph Huber // RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \ 28330d8983SJohannes Doerfert // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 29330d8983SJohannes Doerfert // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom 30330d8983SJohannes Doerfert // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom 31330d8983SJohannes Doerfert // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic 32330d8983SJohannes Doerfert // 33330d8983SJohannes Doerfert // CUSTOM: Rewriting generic-mode kernel with a customized state machine. 34330d8983SJohannes Doerfert 35330d8983SJohannes Doerfert #if ADD_REDUCTION 36330d8983SJohannes Doerfert #define REDUCTION(...) reduction(__VA_ARGS__) 37330d8983SJohannes Doerfert #else 38330d8983SJohannes Doerfert #define REDUCTION(...) 39330d8983SJohannes Doerfert #endif 40330d8983SJohannes Doerfert 41330d8983SJohannes Doerfert #include <stdio.h> 42330d8983SJohannes Doerfert int main() { 43330d8983SJohannes Doerfert int x = 0, y = 1; 44330d8983SJohannes Doerfert #pragma omp target teams num_teams(1) map(tofrom : x, y) REDUCTION(+ : x) 45330d8983SJohannes Doerfert { 46330d8983SJohannes Doerfert x += 5; 47330d8983SJohannes Doerfert #pragma omp parallel 48330d8983SJohannes Doerfert y = 6; 49330d8983SJohannes Doerfert } 50330d8983SJohannes Doerfert // CHECK: 5, 6 51330d8983SJohannes Doerfert printf("%d, %d\n", x, y); 52330d8983SJohannes Doerfert return 0; 53330d8983SJohannes Doerfert } 54