xref: /llvm-project/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll (revision fbe3919e5477b64e30cf435618ab643700d0952a)
1; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
2
3; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
4; XFAIL: expensive_checks
5
6; CHECK-SPIRV-DAG: %[[#]] = OpGroupAsyncCopy %[[#]] %[[#Scope:]]
7; CHECK-SPIRV-DAG: %[[#Scope]] = OpConstant %[[#]]
8
9%opencl.event_t = type opaque
10
11define spir_kernel void @test_fn(<2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(3)* %localBuffer, i32 %copiesPerWorkgroup, i32 %copiesPerWorkItem) {
12entry:
13  %src.addr = alloca <2 x i8> addrspace(1)*, align 4
14  %dst.addr = alloca <2 x i8> addrspace(1)*, align 4
15  %localBuffer.addr = alloca <2 x i8> addrspace(3)*, align 4
16  %copiesPerWorkgroup.addr = alloca i32, align 4
17  %copiesPerWorkItem.addr = alloca i32, align 4
18  %i = alloca i32, align 4
19  %event = alloca %opencl.event_t*, align 4
20  store <2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)** %src.addr, align 4
21  store <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(1)** %dst.addr, align 4
22  store <2 x i8> addrspace(3)* %localBuffer, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
23  store i32 %copiesPerWorkgroup, i32* %copiesPerWorkgroup.addr, align 4
24  store i32 %copiesPerWorkItem, i32* %copiesPerWorkItem.addr, align 4
25  store i32 0, i32* %i, align 4
26  br label %for.cond
27
28for.cond:                                         ; preds = %for.inc, %entry
29  %0 = load i32, i32* %i, align 4
30  %1 = load i32, i32* %copiesPerWorkItem.addr, align 4
31  %cmp = icmp slt i32 %0, %1
32  br i1 %cmp, label %for.body, label %for.end
33
34for.body:                                         ; preds = %for.cond
35  %call = call spir_func i32 @_Z12get_local_idj(i32 0)
36  %2 = load i32, i32* %copiesPerWorkItem.addr, align 4
37  %mul = mul i32 %call, %2
38  %3 = load i32, i32* %i, align 4
39  %add = add i32 %mul, %3
40  %4 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
41  %arrayidx = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %4, i32 %add
42  store <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* %arrayidx, align 2
43  br label %for.inc
44
45for.inc:                                          ; preds = %for.body
46  %5 = load i32, i32* %i, align 4
47  %inc = add nsw i32 %5, 1
48  store i32 %inc, i32* %i, align 4
49  br label %for.cond
50
51for.end:                                          ; preds = %for.cond
52  call spir_func void @_Z7barrierj(i32 1)
53  store i32 0, i32* %i, align 4
54  br label %for.cond1
55
56for.cond1:                                        ; preds = %for.inc12, %for.end
57  %6 = load i32, i32* %i, align 4
58  %7 = load i32, i32* %copiesPerWorkItem.addr, align 4
59  %cmp2 = icmp slt i32 %6, %7
60  br i1 %cmp2, label %for.body3, label %for.end14
61
62for.body3:                                        ; preds = %for.cond1
63  %call4 = call spir_func i32 @_Z13get_global_idj(i32 0)
64  %8 = load i32, i32* %copiesPerWorkItem.addr, align 4
65  %mul5 = mul i32 %call4, %8
66  %9 = load i32, i32* %i, align 4
67  %add6 = add i32 %mul5, %9
68  %10 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %src.addr, align 4
69  %arrayidx7 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %10, i32 %add6
70  %11 = load <2 x i8>, <2 x i8> addrspace(1)* %arrayidx7, align 2
71  %call8 = call spir_func i32 @_Z12get_local_idj(i32 0)
72  %12 = load i32, i32* %copiesPerWorkItem.addr, align 4
73  %mul9 = mul i32 %call8, %12
74  %13 = load i32, i32* %i, align 4
75  %add10 = add i32 %mul9, %13
76  %14 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
77  %arrayidx11 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %14, i32 %add10
78  store <2 x i8> %11, <2 x i8> addrspace(3)* %arrayidx11, align 2
79  br label %for.inc12
80
81for.inc12:                                        ; preds = %for.body3
82  %15 = load i32, i32* %i, align 4
83  %inc13 = add nsw i32 %15, 1
84  store i32 %inc13, i32* %i, align 4
85  br label %for.cond1
86
87for.end14:                                        ; preds = %for.cond1
88  call spir_func void @_Z7barrierj(i32 1)
89  %16 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %dst.addr, align 4
90  %17 = load i32, i32* %copiesPerWorkgroup.addr, align 4
91  %call15 = call spir_func i32 @_Z12get_group_idj(i32 0)
92  %mul16 = mul i32 %17, %call15
93  %add.ptr = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %16, i32 %mul16
94  %18 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4
95  %19 = load i32, i32* %copiesPerWorkgroup.addr, align 4
96  %call17 = call spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)* %add.ptr, <2 x i8> addrspace(3)* %18, i32 %19, %opencl.event_t* null)
97  store %opencl.event_t* %call17, %opencl.event_t** %event, align 4
98  %20 = addrspacecast %opencl.event_t** %event to %opencl.event_t* addrspace(4)*
99  call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32 1, %opencl.event_t* addrspace(4)* %20)
100  ret void
101}
102
103declare spir_func i32 @_Z12get_local_idj(i32)
104
105declare spir_func void @_Z7barrierj(i32)
106
107declare spir_func i32 @_Z13get_global_idj(i32)
108
109declare spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, %opencl.event_t*)
110
111declare spir_func i32 @_Z12get_group_idj(i32)
112
113declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32, %opencl.event_t* addrspace(4)*)
114