xref: /llvm-project/flang/test/Lower/CUDA/cuda-kernel-do-reduction.cuf (revision 7665d3d90da7f32e56cb57eb192dc8f189730686)
1*7665d3d9SIman Hosseini! Test CUDA Fortran kernel do reduction
2*7665d3d9SIman Hosseini! RUN: bbc -emit-fir -fcuda -o - %s | FileCheck %s
3*7665d3d9SIman Hosseini
4*7665d3d9SIman Hosseinimodule mod1
5*7665d3d9SIman Hosseinicontains
6*7665d3d9SIman Hosseini   subroutine host_sub()
7*7665d3d9SIman Hosseini      integer, parameter :: asize = 4
8*7665d3d9SIman Hosseini      integer, device :: adev(asize)
9*7665d3d9SIman Hosseini      integer :: ahost(asize)
10*7665d3d9SIman Hosseini      integer :: q
11*7665d3d9SIman Hosseini      integer, device :: add_reduce_var
12*7665d3d9SIman Hosseini      integer, device :: mul_reduce_var
13*7665d3d9SIman Hosseini      ! CHECK: %[[VAL_0:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEadd_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
14*7665d3d9SIman Hosseini      ! CHECK: %[[VAL_1:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEmul_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
15*7665d3d9SIman Hosseini      do i = 1, asize
16*7665d3d9SIman Hosseini         ahost(i) = i
17*7665d3d9SIman Hosseini      enddo
18*7665d3d9SIman Hosseini      adev = ahost
19*7665d3d9SIman Hosseini      add_reduce_var = 0.0
20*7665d3d9SIman Hosseini      mul_reduce_var = 1.0
21*7665d3d9SIman Hosseini      ! CHECK: {{.*}} reduce(%[[VAL_0:.*]], %[[VAL_1:.*]] : !fir.ref<i32>, !fir.ref<i32> : [#fir.reduce_attr<add>, #fir.reduce_attr<multiply>]) {{.*}}
22*7665d3d9SIman Hosseini      !$cuf kernel do <<< *, * >>> reduce(+:add_reduce_var) reduce(*:mul_reduce_var)
23*7665d3d9SIman Hosseini      do i = 1, asize
24*7665d3d9SIman Hosseini         add_reduce_var = add_reduce_var + adev(i)
25*7665d3d9SIman Hosseini         mul_reduce_var = mul_reduce_var * adev(i)
26*7665d3d9SIman Hosseini      end do
27*7665d3d9SIman Hosseini      q = rsum
28*7665d3d9SIman Hosseini      ahost = adev
29*7665d3d9SIman Hosseini      print *, q
30*7665d3d9SIman Hosseini   end
31*7665d3d9SIman Hosseiniend
32*7665d3d9SIman Hosseini
33*7665d3d9SIman Hosseiniprogram test
34*7665d3d9SIman Hosseini   use mod1
35*7665d3d9SIman Hosseini   implicit none
36*7665d3d9SIman Hosseini   call host_sub()
37*7665d3d9SIman Hosseiniend program test
38