xref: /llvm-project/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.bc %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %t.bc | llvm-readelf --notes - | FileCheck %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.bc | FileCheck --check-prefix=CHECK %s
4
5declare void @function1()
6
7declare void @function2() #0
8
9; Function Attrs: noinline
10define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #4 {
11  store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
12  ret void
13}
14
15; Function Attrs: noinline
16define void @function4(i64 %arg, ptr %a) #4 {
17  store i64 %arg, ptr %a
18  ret void
19}
20
21; Function Attrs: noinline
22define void @function5(ptr addrspace(4) %ptr, ptr %sink) #4 {
23  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 8
24  %x = load i64, ptr addrspace(4) %gep
25  store i64 %x, ptr %sink
26  ret void
27}
28
29; Function Attrs: nounwind readnone speculatable willreturn
30declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
31
32; CHECK: amdhsa.kernels:
33; CHECK:  - .args:
34; CHECK-NOT: hidden_hostcall_buffer
35; CHECK-LABEL:    .name:           test_kernel10
36define amdgpu_kernel void @test_kernel10(ptr %a) #2 {
37  store i8 3, ptr %a, align 1
38  ret void
39}
40
41; Call to an extern function
42
43; CHECK:  - .args:
44; CHECK: hidden_hostcall_buffer
45; CHECK-LABEL:    .name:           test_kernel20
46define amdgpu_kernel void @test_kernel20(ptr %a) #2 {
47  call void @function1()
48  store i8 3, ptr %a, align 1
49  ret void
50}
51
52; Explicit attribute on kernel
53
54; CHECK:  - .args:
55; CHECK-NOT: hidden_hostcall_buffer
56; CHECK-LABEL:    .name:           test_kernel21
57define amdgpu_kernel void @test_kernel21(ptr %a) #3 {
58  call void @function1()
59  store i8 3, ptr %a, align 1
60  ret void
61}
62
63; Explicit attribute on extern callee
64
65; CHECK:  - .args:
66; CHECK-NOT: hidden_hostcall_buffer
67; CHECK-LABEL:    .name:           test_kernel22
68define amdgpu_kernel void @test_kernel22(ptr %a) #2 {
69  call void @function2()
70  store i8 3, ptr %a, align 1
71  ret void
72}
73
74; Access more bytes than the pointer size
75
76; CHECK:  - .args:
77; CHECK: hidden_hostcall_buffer
78; CHECK-LABEL:    .name:           test_kernel30
79define amdgpu_kernel void @test_kernel30(ptr %a) #2 {
80  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
81  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
82  %x = load i128, ptr addrspace(4) %gep
83  store i128 %x, ptr %a
84  ret void
85}
86
87; Typical load of hostcall buffer pointer
88
89; CHECK:  - .args:
90; CHECK: hidden_hostcall_buffer
91; CHECK-LABEL:    .name:           test_kernel40
92define amdgpu_kernel void @test_kernel40(ptr %a) #2 {
93  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
94  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
95  %x = load i64, ptr addrspace(4) %gep
96  store i64 %x, ptr %a
97  ret void
98}
99
100; Typical usage, overriden by explicit attribute on kernel
101
102; CHECK:  - .args:
103; CHECK-NOT: hidden_hostcall_buffer
104; CHECK-LABEL:    .name:           test_kernel41
105define amdgpu_kernel void @test_kernel41(ptr %a) #3 {
106  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
107  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
108  %x = load i64, ptr addrspace(4) %gep
109  store i64 %x, ptr %a
110  ret void
111}
112
113; Access to implicit arg before the hostcall pointer
114
115; CHECK:  - .args:
116; CHECK-NOT: hidden_hostcall_buffer
117; CHECK-LABEL:    .name:           test_kernel42
118define amdgpu_kernel void @test_kernel42(ptr %a) #2 {
119  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
120  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
121  %x = load i64, ptr addrspace(4) %gep
122  store i64 %x, ptr %a
123  ret void
124}
125
126; Access to implicit arg after the hostcall pointer
127
128; CHECK:  - .args:
129; CHECK-NOT: hidden_hostcall_buffer
130; CHECK-LABEL:    .name:           test_kernel43
131define amdgpu_kernel void @test_kernel43(ptr %a) #2 {
132  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
133  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
134  %x = load i64, ptr addrspace(4) %gep
135  store i64 %x, ptr %a
136  ret void
137}
138
139; Accessing a byte just before the hostcall pointer
140
141; CHECK:  - .args:
142; CHECK-NOT: hidden_hostcall_buffer
143; CHECK-LABEL:    .name:           test_kernel44
144define amdgpu_kernel void @test_kernel44(ptr %a) #2 {
145  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
146  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 23
147  %x = load i8, ptr addrspace(4) %gep, align 1
148  store i8 %x, ptr %a, align 1
149  ret void
150}
151
152; Accessing a byte inside the hostcall pointer
153
154; CHECK:  - .args:
155; CHECK: hidden_hostcall_buffer
156; CHECK-LABEL:    .name:           test_kernel45
157define amdgpu_kernel void @test_kernel45(ptr %a) #2 {
158  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
159  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
160  %x = load i8, ptr addrspace(4) %gep, align 1
161  store i8 %x, ptr %a, align 1
162  ret void
163}
164
165; Accessing a byte inside the hostcall pointer
166
167; CHECK:  - .args:
168; CHECK: hidden_hostcall_buffer
169; CHECK-LABEL:    .name:           test_kernel46
170define amdgpu_kernel void @test_kernel46(ptr %a) #2 {
171  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
172  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 31
173  %x = load i8, ptr addrspace(4) %gep, align 1
174  store i8 %x, ptr %a, align 1
175  ret void
176}
177
178; Accessing a byte just after the hostcall pointer
179
180; CHECK:  - .args:
181; CHECK-NOT: hidden_hostcall_buffer
182; CHECK-LABEL:    .name:           test_kernel47
183define amdgpu_kernel void @test_kernel47(ptr %a) #2 {
184  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
185  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
186  %x = load i8, ptr addrspace(4) %gep, align 1
187  store i8 %x, ptr %a, align 1
188  ret void
189}
190
191; Access with an unknown offset
192
193; CHECK:  - .args:
194; CHECK: hidden_hostcall_buffer
195; CHECK-LABEL:    .name:           test_kernel50
196define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) #2 {
197  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
198  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b
199  %x = load i8, ptr addrspace(4) %gep, align 1
200  store i8 %x, ptr %a, align 1
201  ret void
202}
203
204; Multiple geps reaching the hostcall pointer argument.
205
206; CHECK:  - .args:
207; CHECK: hidden_hostcall_buffer
208; CHECK-LABEL:    .name:           test_kernel51
209define amdgpu_kernel void @test_kernel51(ptr %a) #2 {
210  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
211  %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
212  %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 8
213  %x = load i8, ptr addrspace(4) %gep2, align 1
214  store i8 %x, ptr %a, align 1
215  ret void
216}
217
218; Multiple geps not reaching the hostcall pointer argument.
219
220; CHECK:  - .args:
221; CHECK-NOT: hidden_hostcall_buffer
222; CHECK-LABEL:    .name:           test_kernel52
223define amdgpu_kernel void @test_kernel52(ptr %a) #2 {
224  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
225  %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
226  %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16
227  %x = load i8, ptr addrspace(4) %gep2, align 1
228  store i8 %x, ptr %a, align 1
229  ret void
230}
231
232; Hostcall pointer used inside a function call
233
234; CHECK:  - .args:
235; CHECK: hidden_hostcall_buffer
236; CHECK-LABEL:    .name:           test_kernel60
237define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
238  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
239  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
240  %x = load i64, ptr addrspace(4) %gep
241  call void @function4(i64 %x, ptr %a)
242  ret void
243}
244
245; Hostcall pointer retrieved inside a function call; chain of geps
246
247; CHECK:  - .args:
248; CHECK: hidden_hostcall_buffer
249; CHECK-LABEL:    .name:           test_kernel61
250define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
251  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
253  call void @function5(ptr addrspace(4) %gep, ptr %a)
254  ret void
255}
256
257; Pointer captured
258
259; CHECK:  - .args:
260; CHECK: hidden_hostcall_buffer
261; CHECK-LABEL:    .name:           test_kernel70
262define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
263  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
264  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
265  store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
266  ret void
267}
268
269; Pointer captured inside function call
270
271; CHECK:  - .args:
272; CHECK: hidden_hostcall_buffer
273; CHECK-LABEL:    .name:           test_kernel71
274define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
275  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
276  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
277  call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
278  ret void
279}
280
281; Ineffective pointer capture
282
283; CHECK:  - .args:
284; CHECK-NOT: hidden_hostcall_buffer
285; CHECK-LABEL:    .name:           test_kernel72
286define amdgpu_kernel void @test_kernel72() #2 {
287  %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
288  %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
289  store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
290  ret void
291}
292
293attributes #0 = { "amdgpu-no-hostcall-ptr" }
294attributes #1 = { nounwind readnone speculatable willreturn }
295attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
296attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
297attributes #4 = { noinline }
298
299!llvm.module.flags = !{!0}
300!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
301