xref: /llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (revision 0a17bdfc361400cb511368f2edfc68c0d11e1974)
1// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2
3// The aim of the test is to check the GPU LLVM IR codegen
4// for nested omp do loop inside omp target region
5
6module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
7  llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
8      %loop_ub = llvm.mlir.constant(9 : i32) : i32
9      %loop_lb = llvm.mlir.constant(0 : i32) : i32
10      %loop_step = llvm.mlir.constant(1 : i32) : i32
11      omp.wsloop {
12        omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
13          %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
14          llvm.store %loop_cnt, %gep : i32, !llvm.ptr
15          omp.yield
16        }
17      }
18    llvm.return
19  }
20
21  llvm.func @target_empty_wsloop() attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
22      %loop_ub = llvm.mlir.constant(9 : i32) : i32
23      %loop_lb = llvm.mlir.constant(0 : i32) : i32
24      %loop_step = llvm.mlir.constant(1 : i32) : i32
25      omp.wsloop {
26        omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
27          omp.yield
28        }
29      }
30    llvm.return
31  }
32}
33
34// CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
35// CHECK:   %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
36// CHECK:   %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
37// CHECK:   %[[GEP:.*]] = getelementptr { ptr }, ptr addrspace(5) %[[STRUCTARG]], i32 0, i32 0
38// CHECK:   store ptr %[[ARG0]], ptr addrspace(5) %[[GEP]], align 8
39// CHECK:   %[[NUM_THREADS:.*]] = call i32 @omp_get_num_threads()
40// CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), ptr @[[LOOP_BODY_FN:.*]], ptr %[[STRUCTARG_ASCAST]], i32 10, i32 %[[NUM_THREADS]], i32 0)
41
42// CHECK: define internal void @[[LOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
43// CHECK:   %[[GEP2:.*]] = getelementptr { ptr }, ptr %[[LOOP_BODY_ARG]], i32 0, i32 0
44// CHECK:   %[[LOADGEP:.*]] = load ptr, ptr %[[GEP2]], align 8
45// CHECK:   %[[GEP3:.*]] = getelementptr [10 x i32], ptr %[[LOADGEP]], i32 0, i32 %[[TMP2:.*]]
46// CHECK:   store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4
47
48// CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]()
49// CHECK:   call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 10, i32 %[[NUM_THREADS:.*]], i32 0)
50
51// CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]])
52