test/offloading/bug51781.c

330d8983SJohannes Doerfert// Use the generic state machine.  On some architectures, other threads in the
330d8983SJohannes Doerfert// main thread's warp must avoid barrier instructions.
330d8983SJohannes Doerfert//
330d8983SJohannes Doerfert// RUN: %libomptarget-compile-run-and-check-generic
330d8983SJohannes Doerfert
330d8983SJohannes Doerfert// SPMDize.  There is no main thread, so there's no issue.
330d8983SJohannes Doerfert//
*dcc27ea4SJoseph Huber// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1
330d8983SJohannes Doerfert// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd
330d8983SJohannes Doerfert// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd
330d8983SJohannes Doerfert// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
330d8983SJohannes Doerfert//
330d8983SJohannes Doerfert// SPMD: Transformed generic-mode kernel to SPMD-mode.
330d8983SJohannes Doerfert
330d8983SJohannes Doerfert// Use the custom state machine, which must avoid the same barrier problem as
330d8983SJohannes Doerfert// the generic state machine.
330d8983SJohannes Doerfert//
*dcc27ea4SJoseph Huber// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \
330d8983SJohannes Doerfert// RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
330d8983SJohannes Doerfert// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
330d8983SJohannes Doerfert// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
330d8983SJohannes Doerfert// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
330d8983SJohannes Doerfert//
330d8983SJohannes Doerfert// Repeat with reduction clause, which has managed to break the custom state
330d8983SJohannes Doerfert// machine in the past.
330d8983SJohannes Doerfert//
*dcc27ea4SJoseph Huber// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \
330d8983SJohannes Doerfert// RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
330d8983SJohannes Doerfert// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
330d8983SJohannes Doerfert// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
330d8983SJohannes Doerfert// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
330d8983SJohannes Doerfert//
330d8983SJohannes Doerfert// CUSTOM: Rewriting generic-mode kernel with a customized state machine.
330d8983SJohannes Doerfert
330d8983SJohannes Doerfert#if ADD_REDUCTION
330d8983SJohannes Doerfert#define REDUCTION(...) reduction(__VA_ARGS__)
330d8983SJohannes Doerfert#else
330d8983SJohannes Doerfert#define REDUCTION(...)
330d8983SJohannes Doerfert#endif
330d8983SJohannes Doerfert
330d8983SJohannes Doerfert#include <stdio.h>
330d8983SJohannes Doerfertint main() {
330d8983SJohannes Doerfert  int x = 0, y = 1;
330d8983SJohannes Doerfert#pragma omp target teams num_teams(1) map(tofrom : x, y) REDUCTION(+ : x)
330d8983SJohannes Doerfert  {
330d8983SJohannes Doerfert    x += 5;
330d8983SJohannes Doerfert#pragma omp parallel
330d8983SJohannes Doerfert    y = 6;
330d8983SJohannes Doerfert  }
330d8983SJohannes Doerfert  // CHECK: 5, 6
330d8983SJohannes Doerfert  printf("%d, %d\n", x, y);
330d8983SJohannes Doerfert  return 0;
330d8983SJohannes Doerfert}