1 // RUN: %libomptarget-compile-run-and-check-generic 2 // RUN: %libomptarget-compileopt-run-and-check-generic 3 4 // TODO: This requires malloc support for the threads states. 5 // FIXME: Flaky on all GPU targets. 6 // UNSUPPORTED: amdgcn-amd-amdhsa 7 // UNSUPPORTED: nvptx64-nvidia-cuda 8 // UNSUPPORTED: nvptx64-nvidia-cuda-LTO 9 10 #include <omp.h> 11 #include <stdio.h> 12 #define N 10 13 isCPU()14int isCPU() { return 1; } 15 16 #pragma omp begin declare variant match(device = {kind(gpu)}) isCPU()17int isCPU() { return 0; } 18 #pragma omp end declare variant 19 main(void)20int main(void) { 21 long int aa = 0; 22 int res = 0; 23 24 int ng = 12; 25 int cmom = 14; 26 int nxyz; 27 28 #pragma omp target map(from : nxyz, ng, cmom) 29 { 30 nxyz = isCPU() ? 2 : 5000; 31 ng = isCPU() ? 2 : 12; 32 cmom = isCPU() ? 2 : 14; 33 } 34 35 #pragma omp target teams distribute num_teams(nxyz) \ 36 thread_limit(ng *(cmom - 1)) map(tofrom : aa) 37 for (int gid = 0; gid < nxyz; gid++) { 38 #pragma omp parallel for collapse(2) 39 for (unsigned int g = 0; g < ng; g++) { 40 for (unsigned int l = 0; l < cmom - 1; l++) { 41 int a = 0; 42 #pragma omp parallel for reduction(+ : a) 43 for (int i = 0; i < N; i++) { 44 a += i; 45 } 46 #pragma omp atomic 47 aa += a; 48 } 49 } 50 } 51 long exp = (long)ng * (cmom - 1) * nxyz * (N * (N - 1) / 2); 52 printf("The result is = %ld exp:%ld!\n", aa, exp); 53 if (aa != exp) { 54 printf("Failed %ld\n", aa); 55 return 1; 56 } 57 // CHECK: Success 58 printf("Success\n"); 59 return 0; 60 } 61