1! Offloading test checking the use of the depend clause on the target construct 2! REQUIRES: flang, amdgcn-amd-amdhsa 3! UNSUPPORTED: nvptx64-nvidia-cuda 4! UNSUPPORTED: nvptx64-nvidia-cuda-LTO 5! UNSUPPORTED: aarch64-unknown-linux-gnu 6! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO 7! UNSUPPORTED: x86_64-unknown-linux-gnu 8! UNSUPPORTED: x86_64-unknown-linux-gnu-LTO 9 10! RUN: %libomptarget-compile-fortran-run-and-check-generic 11program main 12 implicit none 13 integer :: a = 0 14 INTERFACE 15 FUNCTION omp_get_device_num() BIND(C) 16 USE, INTRINSIC :: iso_c_binding, ONLY: C_INT 17 integer :: omp_get_device_num 18 END FUNCTION omp_get_device_num 19 END INTERFACE 20 21 call foo(5, a) 22 print*, "======= FORTRAN Test passed! =======" 23 print*, "foo(5) returned ", a, ", expected 6\n" 24 25 ! stop 0 26 contains 27 subroutine foo(N, r) 28 integer, intent(in) :: N 29 integer, intent(out) :: r 30 integer :: z, i, accumulator 31 z = 1 32 accumulator = 0 33 ! Spawn 3 threads 34 !$omp parallel num_threads(3) 35 36 ! A single thread will then create two tasks - one is the 'producer' and 37 ! potentially slower task that updates 'z' to 'N'. The second is an 38 ! offloaded target task that increments 'z'. If the depend clauses work 39 ! properly, the target task should wait for the 'producer' task to 40 ! complete before incrementing 'z'. We use 'omp single' here because the 41 ! depend clause establishes dependencies between sibling tasks only. 42 ! This is the easiest way of creating two sibling tasks. 43 !$omp single 44 !$omp task depend(out: z) shared(z) 45 do i=1, 32766 46 ! dumb loop nest to slow down the update of 'z'. 47 ! Adding a function call slows down the producer to the point 48 ! that removing the depend clause from the target construct below 49 ! frequently results in the wrong answer. 50 accumulator = accumulator + omp_get_device_num() 51 end do 52 z = N 53 !$omp end task 54 55 ! z is 5 now. Increment z to 6. 56 !$omp target map(tofrom: z) depend(in:z) 57 z = z + 1 58 !$omp end target 59 !$omp end single 60 !$omp end parallel 61 ! Use 'accumulator' so it is not optimized away by the compiler. 62 print *, accumulator 63 r = z 64 end subroutine foo 65 66!CHECK: ======= FORTRAN Test passed! ======= 67!CHECK: foo(5) returned 6 , expected 6 68end program main 69