xref: /llvm-project/flang/test/Lower/CUDA/cuda-data-transfer.cuf (revision 3433e4140d18865fe784061a3cd029c5980f4e2f)
1! RUN: bbc -emit-hlfir -fopenacc -fcuda %s -o - | FileCheck %s
2
3! Test CUDA Fortran data transfer using assignment statements.
4
5module mod1
6  type :: t1
7    integer :: i
8  end type
9
10  integer, device, dimension(11:20) :: cdev
11
12contains
13  function dev1(a)
14    integer, device :: a(:)
15    integer :: dev1
16    dev1 = 1
17  end function
18end
19
20subroutine sub1()
21  use mod1
22  integer, device :: m
23  integer, device :: adev(10)
24  integer :: i, ahost(10), bhost(10)
25
26  m = 1 + i
27
28  m = 1
29
30  adev = ahost
31
32  adev = ahost + 1
33
34  adev(1:5) = ahost(1:5)
35
36  adev = ahost + bhost
37
38  adev = 10
39
40  cdev = 0
41end
42
43! CHECK-LABEL: func.func @_QPsub1()
44
45! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
46! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
47! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
48! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
49
50! CHECK: %[[C1:.*]] = arith.constant 1 : i32
51! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
52! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32
53! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
54! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
55! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
56
57! CHECK: cuf.data_transfer %c1{{.*}} to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<i32>
58
59! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
60
61! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> {
62! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
63! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
64! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
65
66! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}})  shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
67! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}})  shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
68! CHECK: cuf.data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
69
70! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
71! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
72! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
73! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
74
75! CHECK: cuf.data_transfer %c10{{.*}} to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
76
77! CHECK: cuf.data_transfer %c0{{.*}} to %{{.*}}#1, %{{.*}} : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
78
79subroutine sub2()
80  integer, device :: m
81  integer, device :: adev(10), bdev(10)
82  integer :: i, ahost(10), bhost(10)
83
84  ahost = adev
85
86  i = m
87
88  ahost(1:5) = adev(1:5)
89
90  bdev = adev
91
92  ! Implicit data transfer of adev before evaluation.
93  bhost = ahost + adev
94
95end
96
97! CHECK-LABEL: func.func @_QPsub2()
98! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
99! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
100! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
101! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
102! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
103! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
104! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
105! CHECK: cuf.data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
106
107! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}})  shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
108! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}})  shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
109! CHECK: cuf.data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
110
111! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
112
113! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
114! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
115! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
116! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
117! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
118! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
119! CHECK: fir.freemem %[[TEMP]] : !fir.heap<!fir.array<10xi32>>
120
121subroutine sub3()
122  use mod1
123  type(t1), device :: t
124  integer :: ahost(10), bhost(10)
125
126  bhost = ahost + t%i
127end
128
129! CHECK-LABEL: func.func @_QPsub3()
130! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"}
131! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
132! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
133! CHECK: %[[T:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
134! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
135! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
136
137
138! Check that cuf.data_transfer are not generated within cuf kernel
139subroutine sub4()
140  integer, parameter :: n = 10
141  real, device :: adev(n)
142  real :: ahost(n)
143  real, managed :: b
144  integer :: i
145
146  adev = ahost
147  !$cuf kernel do <<<*,*>>>
148  do i = 1, n
149    adev(i) = adev(i) + b
150  enddo
151end subroutine
152
153! CHECK-LABEL: func.func @_QPsub4()
154! CHECK: cuf.data_transfer
155! CHECK: cuf.kernel<<<*, *>>>
156! CHECK-NOT: cuf.data_transfer
157! CHECK: hlfir.assign
158
159attributes(global) subroutine sub5(a)
160  integer, device :: a
161  integer :: i
162  a = i
163end subroutine
164
165! CHECK-LABEL: func.func @_QPsub5
166! CHECK-NOT: cuf.data_transfer
167
168attributes(host,device) subroutine sub6(a)
169  integer, device :: a
170  integer :: i
171  a = i
172end subroutine
173
174! CHECK-LABEL: func.func @_QPsub6
175! CHECK: cuf.data_transfer
176
177subroutine sub7(a, b, c)
178  integer, device, allocatable :: a(:), c(:)
179  integer, allocatable :: b(:)
180  b = a
181
182  a = b
183
184  c = a
185end subroutine
186
187! CHECK-LABEL: func.func @_QPsub7(
188! CHECK-SAME:  %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {fir.bindc_name = "b"}, %[[ARG2:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "c"}) {
189! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
190! CHECK: %[[B:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
191! CHECK: %[[C:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ec"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
192! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
193! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
194! CHECK: cuf.data_transfer %[[A]]#0 to %[[C]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
195
196subroutine sub8(a, b, n)
197  integer :: n
198  integer, device :: a(n)
199  integer :: b(10)
200  b = a
201  a = b
202end subroutine
203
204! CHECK-LABEL: func.func @_QPsub8(
205! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "b"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
206! CHECK: %[[B:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub8Eb"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
207! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
208! CHECK: cuf.data_transfer %[[A]]#1 to %[[B]]#0, %{{.*}} : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
209! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#1, %{{.*}} : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<?xi32>>
210
211subroutine sub9(a)
212  integer, pinned, allocatable :: a(:)
213  do concurrent (i = 1 : 10)
214    a(i) = 10 + a(i)
215  end do
216end subroutine
217
218! CHECK-LABEL: func.func @_QPsub9
219! CHECK-NOT: cuf.data_transfer
220
221subroutine sub10(a, b)
222  integer, device :: a
223  integer, allocatable, pinned :: b
224  integer :: res
225
226  res = a + b
227end subroutine
228
229! CHECK-LABEL: func.func @_QPsub10(
230! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}
231
232! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub10Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
233! CHECK: cuf.data_transfer %[[A]]#1 to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
234! CHECK-NOT: cuf.data_transfer
235
236subroutine sub11(n)
237  integer :: n
238  real, dimension(10) :: h
239  real, dimension(n), device :: d
240  do i=1,10
241    h(i) = d(i)
242  end do
243end subroutine
244
245! CHECK-LABEL: func.func @_QPsub11
246! CHECK: %[[RHS:.*]] = hlfir.designate %{{.*}} (%{{.*}})  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
247! CHECK: %[[LHS:.*]] = hlfir.designate %{{.*}} (%{{.*}})  : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>
248! CHECK: cuf.data_transfer %[[RHS]] to %[[LHS]] {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<f32>, !fir.ref<f32>
249
250subroutine sub12()
251  use mod1
252  integer, device :: a(10)
253  integer :: x
254  x = dev1(a)
255end subroutine
256
257! CHECK-LABEL: func.func @_QPsub12
258! CHECK: %{{.*}} = fir.call @_QMmod1Pdev1
259! CHECK: hlfir.assign
260! CHECK-NOT: cuf.data_transfer
261
262subroutine sub13(a, b, n)
263  integer :: n
264  integer :: a(n)
265  integer, allocatable, device :: b(:)
266  integer :: res(10)
267
268  res = a + b
269end subroutine
270
271! CHECK-LABEL: func.func @_QPsub13
272! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %14#1 {bindc_name = ".tmp", uniq_name = ""}
273! CHECK: cuf.data_transfer
274! CHECK: fir.freemem %[[TEMP]] : !fir.heap<!fir.array<?xi32>>
275
276subroutine sub14()
277  logical(4), device :: log(10)
278  log = .true.
279end subroutine
280
281! CHECK-LABEL: func.func @_QPsub14()
282! CHECK: %[[TRUE:.*]] = arith.constant true
283! CHECK: cuf.data_transfer %[[TRUE]] to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.array<10x!fir.logical<4>>>
284
285subroutine sub15(a_dev, a_host, n, m)
286  integer, intent(in) :: n, m
287  real, device :: a_dev(n*m)
288  real :: a_host(n*m)
289
290  a_dev = a_host
291end subroutine
292
293! CHECK-LABEL: func.func @_QPsub15(
294! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a_dev"}, %[[ARG1:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "a_host"}
295! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub15Ea_dev"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
296! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
297! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub15Ea_host"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
298! CHECK: cuf.data_transfer %[[AHOST]]#1 to %[[ADEV]]#1, %[[SHAPE]] : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>
299
300! Check that cuf.data_transfer are not generated within OpenACC region
301subroutine sub16()
302  integer, parameter :: n = 10
303  real, device :: adev(n)
304  real :: ahost(n)
305  real, managed :: b
306  integer :: i
307
308  adev = ahost
309  !$acc parallel loop deviceptr(adev)
310  do i = 1, n
311    adev(i) = adev(i) + b
312  enddo
313
314  !$acc kernels deviceptr(adev)
315  do i = 1, n
316    adev(i) = adev(i) + b
317  enddo
318  !$acc end kernels
319
320
321  !$acc serial deviceptr(adev)
322  do i = 1, n
323    adev(i) = adev(i) + b
324  enddo
325  !$acc end serial
326end subroutine
327
328! CHECK-LABEL: func.func @_QPsub16()
329! CHECK: cuf.data_transfer
330! CHECK: acc.parallel
331! CHECK-NOT: cuf.data_transfer
332! CHECK: hlfir.assign
333
334! CHECK: acc.kernels
335! CHECK-NOT: cuf.data_transfer
336! CHECK: hlfir.assign
337
338! CHECK: acc.serial
339! CHECK-NOT: cuf.data_transfer
340! CHECK: hlfir.assign
341
342! Check that cuf.data_transfer are not generated within cuf kernel and do not
343! trigger semantic error.
344subroutine sub17()
345  integer, parameter :: n = 10
346  real, device :: adev(n)
347  real, device :: bdev(n)
348  real :: ahost
349  real, managed :: b
350  integer :: i
351
352  adev = ahost
353  !$cuf kernel do <<<*,*>>>
354  do i = 1, n
355    ahost = adev(i) * bdev(i) + b
356  enddo
357end subroutine
358
359! CHECK-LABEL: func.func @_QPsub17()
360! CHECK: cuf.kernel<<<*, *>>>
361! CHECK-NOT: cuf.data_transfer
362
363subroutine sub18(o)
364  integer, device, optional, allocatable :: o(:)
365  integer, device, allocatable :: a(:)
366  integer, device, pointer :: p(:)
367  integer :: b
368  integer :: s(1)
369  logical :: l
370
371  b = size(a)
372  b = lbound(a, dim=1)
373  b = ubound(a, dim=1)
374  s = shape(a)
375  l = allocated(a)
376  l = associated(p)
377  b = kind(a)
378  l = present(o)
379end subroutine
380
381! CHECK-LABEL: func.func @_QPsub18
382! CHECK-NOT: cuf.data_transfer
383
384subroutine sub19()
385  integer, device :: adev(10)
386  integer :: ahost(10)
387  ! Implicit data transfer of adev and then addition on the host
388  ahost = adev + 2
389end subroutine
390
391! CHECK-LABEL: func.func @_QPsub19()
392! CHECK: %[[ADEV_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub19Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
393! CHECK: %[[ALLOC_TMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
394! CHECK: %[[TMP:.*]]:2 = hlfir.declare %[[ALLOC_TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
395! CHECK: cuf.data_transfer %[[ADEV_DECL]]#1 to %[[TMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
396! CHECL: hlfir.assign
397