1! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s 2! RUN: bbc -emit-hlfir -fcuda %s -o - | fir-opt | FileCheck %s 3 4! Test lowering of CUDA kernel loop directive. 5 6subroutine sub1() 7 integer :: i, j 8 integer, parameter :: n = 100 9 integer(8) :: istream 10 real, device :: a(n), b(n) 11 real, device :: c(n,n), d(n,n) 12 13! CHECK-LABEL: func.func @_QPsub1() 14! CHECK: %[[IV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) 15! CHECK: %[[STREAM:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Eistream"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) 16! CHECK: %[[IV_J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) 17 !$cuf kernel do <<< 1, 2 >>> 18 do i = 1, n 19 a(i) = a(i) * b(i) 20 end do 21 22! CHECK: %[[LB:.*]] = fir.convert %c1{{.*}} : (i32) -> index 23! CHECK: %[[UB:.*]] = fir.convert %c100{{.*}} : (i32) -> index 24! CHECK: %[[STEP:.*]] = arith.constant 1 : index 25! CHECK: cuf.kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index) 26! CHECK-NOT: fir.do_loop 27! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32 28! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32> 29! CHECK: hlfir.assign 30 31 32 !$cuf kernel do <<< *, * >>> 33 do i = 1, n 34 a(i) = a(i) * b(i) 35 end do 36 37! CHECK: cuf.kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index) 38 39 !$cuf kernel do(2) <<< 1, (256,1) >>> 40 do i = 1, n 41 do j = 1, n 42 c(i,j) = c(i,j) * d(i,j) 43 end do 44 end do 45 46! CHECK: cuf.kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) 47! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32 48! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32> 49! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32 50! CHECK: fir.store %[[ARG1_I32]] to %[[IV_J]]#1 : !fir.ref<i32> 51! CHECK: {n = 2 : i64} 52 53 !$cuf kernel do(2) <<< (1,*), (256,1) >>> 54 do i = 1, n 55 do j = 1, n 56 c(i,j) = c(i,j) * d(i,j) 57 end do 58 end do 59! CHECK: cuf.kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) 60 61!$cuf kernel do(2) <<< (*,*), (32,4) >>> 62 do i = 1, n 63 do j = 1, n 64 c(i,j) = c(i,j) * d(i,j) 65 end do 66 end do 67 68! CHECK: cuf.kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) 69 70 !$cuf kernel do(2) <<< (*,*), (*,*), stream=istream >>> 71 do i = 1, n 72 do j = 1, n 73 c(i,j) = c(i,j) * d(i,j) 74 end do 75 end do 76end 77 78! CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]]#0 : !fir.ref<i64> 79! CHECK: %[[STREAM_I32:.*]] = fir.convert %[[STREAM_LOAD]] : (i64) -> i32 80! CHECK: cuf.kernel<<<*, *, stream = %[[STREAM_I32]]>>> 81 82 83! Test lowering with unstructured construct inside. 84subroutine sub2(m,a,b) 85 integer :: m 86 real, device :: a(m,m), b(m) 87 integer :: i,j 88 !$cuf kernel do<<<*,*>>> 89 90 do j = 1, m 91 i = 1 92 do while (a(i,j).eq.0) 93 i = i + 1 94 end do 95 b(j) = i 96 end do 97end subroutine 98 99! CHECK-LABEL: func.func @_QPsub2 100! CHECK: cuf.kernel 101 102subroutine sub3() 103 integer, device :: a(10), b(10) 104 integer :: lb = 1 105 integer :: n = 10 106 integer :: s = 1 107 108 !$cuf kernel do <<< *, * >>> 109 do i = lb, n, s 110 a(i) = a(i) * b(i) 111 end do 112end 113 114! CHECK-LABEL: func.func @_QPsub3 115! CHECK: cuf.kernel 116