xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
2; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
3; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
4; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
5
6; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
7
8; MESA: enable_sgpr_kernarg_segment_ptr = 1
9; MESA: kernarg_segment_byte_size = 16
10; MESA: kernarg_segment_alignment = 4
11
12; HSA: s_load_dword s0, s[4:5], 0x0
13
14; COV4: .amdhsa_kernarg_size 56
15; COV5: .amdhsa_kernarg_size 256
16define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
17  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
18  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
19  ret void
20}
21
22; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit:
23; MESA: enable_sgpr_kernarg_segment_ptr = 1
24; MESA: kernarg_segment_byte_size = 16
25; MESA: kernarg_segment_alignment = 4
26
27; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}
28; HSA: s_load_dword s0, [[NULL]], 0x0
29
30; MESA: s_load_dword s0, s[4:5], 0x0
31
32; COV4: .amdhsa_kernarg_size 0
33; COV5: .amdhsa_kernarg_size 0
34define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
35  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
36  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
37  ret void
38}
39
40; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
41
42; MESA: enable_sgpr_kernarg_segment_ptr = 1
43; MESA: kernarg_segment_byte_size = 16
44; MESA: kernarg_segment_alignment = 4
45
46; HSA: s_load_dword s0, s[4:5], 0x0
47
48; HSA: .amdhsa_kernarg_size 48
49define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
50  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
51  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
52  ret void
53}
54
55; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
56
57; MESA: enable_sgpr_kernarg_segment_ptr = 1
58; MESA: kernarg_segment_byte_size = 128
59; MESA: kernarg_segment_alignment = 4
60
61; HSA: s_load_dword s0, s[4:5], 0x1c
62
63; COV4: .amdhsa_kernarg_size 168
64; COV5: .amdhsa_kernarg_size 368
65define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
66  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
67  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
68  ret void
69}
70
71; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
72
73; MESA: enable_sgpr_kernarg_segment_ptr = 1
74; MESA: kernarg_segment_byte_size = 128
75; MESA: kernarg_segment_alignment = 4
76
77; HSA: s_load_dword s0, s[4:5], 0x1c
78
79; HSA: .amdhsa_kernarg_size 160
80define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
81  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
82  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
83  ret void
84}
85
86; GCN-LABEL: {{^}}func_implicitarg_ptr:
87; GCN: s_waitcnt
88; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
89; GCN-NEXT: s_waitcnt
90; GCN-NEXT: s_setpc_b64
91define void @func_implicitarg_ptr() #0 {
92  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
93  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
94  ret void
95}
96
97; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
98; GCN: s_waitcnt
99; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
100; GCN-NEXT: s_waitcnt
101; GCN-NEXT: s_setpc_b64
102define void @opencl_func_implicitarg_ptr() #0 {
103  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
104  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
105  ret void
106}
107
108; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
109
110; MESA: enable_sgpr_kernarg_segment_ptr = 1
111; MESA: kernarg_segment_byte_size = 16
112; MESA: kernarg_segment_alignment = 4
113
114; GCN: s_mov_b64 s[8:9], s[4:5]
115; GCN: s_swappc_b64
116
117; COV4: .amdhsa_kernarg_size 56
118; COV5: .amdhsa_kernarg_size 256
119define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
120  call void @func_implicitarg_ptr()
121  ret void
122}
123
124; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0:
125; MESA: enable_sgpr_kernarg_segment_ptr = 1
126; MESA: kernarg_segment_byte_size = 16
127; MESA: kernarg_segment_alignment = 4
128
129; HSA: s_mov_b64 s[8:9], 0{{$}}
130; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
131; GCN: s_swappc_b64
132
133; HSA: .amdhsa_kernarg_size 0
134define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
135  call void @func_implicitarg_ptr()
136  ret void
137}
138
139; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
140; MESA: enable_sgpr_kernarg_segment_ptr = 1
141; MESA: kernarg_segment_byte_size = 16
142; GCN: s_mov_b64 s[8:9], s[4:5]
143; GCN-NOT: s4
144; GCN-NOT: s5
145; GCN: s_swappc_b64
146
147; HSA: .amdhsa_kernarg_size 48
148define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
149  call void @func_implicitarg_ptr()
150  ret void
151}
152
153; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
154; MESA: enable_sgpr_kernarg_segment_ptr = 1
155; MESA: kernarg_segment_byte_size = 128
156; MESA: kernarg_segment_alignment = 4
157
158; HSA: s_add_u32 s8, s4, 0x70
159; MESA: s_add_u32 s8, s4, 0x70
160
161; GCN: s_addc_u32 s9, s5, 0{{$}}
162; GCN: s_swappc_b64
163
164; COV4: .amdhsa_kernarg_size 168
165; COV5: .amdhsa_kernarg_size 368
166define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
167  call void @func_implicitarg_ptr()
168  ret void
169}
170
171; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
172; MESA: enable_sgpr_kernarg_segment_ptr = 1
173; MESA: kernarg_segment_byte_size = 128
174; MESA: kernarg_segment_alignment = 4
175
176; GCN: s_add_u32 s8, s4, 0x70
177; GCN: s_addc_u32 s9, s5, 0{{$}}
178; GCN: s_swappc_b64
179
180; HSA: .amdhsa_kernarg_size 160
181define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
182  call void @func_implicitarg_ptr()
183  ret void
184}
185
186; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
187; GCN-NOT: s8
188; GCN-NOT: s9
189; GCN-NOT: s[8:9]
190; GCN: s_swappc_b64
191; GCN: s_setpc_b64 s[30:31]
192define void @func_call_implicitarg_ptr_func() #0 {
193  call void @func_implicitarg_ptr()
194  ret void
195}
196
197; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
198; GCN-NOT: s8
199; GCN-NOT: s9
200; GCN-NOT: s[8:9]
201; GCN: s_swappc_b64
202; GCN: s_setpc_b64 s[30:31]
203define void @opencl_func_call_implicitarg_ptr_func() #0 {
204  call void @func_implicitarg_ptr()
205  ret void
206}
207
208; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
209; GCN: s_waitcnt
210; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
211; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
212; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
213; GCN: s_waitcnt lgkmcnt(0)
214define void @func_kernarg_implicitarg_ptr() #0 {
215  %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
216  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
217  %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
218  %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
219  ret void
220}
221
222; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
223; GCN: s_waitcnt
224; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0
225; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
226; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
227; GCN: s_waitcnt lgkmcnt(0)
228define void @opencl_func_kernarg_implicitarg_ptr() #0 {
229  %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
230  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
231  %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
232  %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
233  ret void
234}
235
236; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
237; GCN: s_add_u32 s8, s4, 0x70
238; GCN: s_addc_u32 s9, s5, 0
239; GCN: s_swappc_b64
240define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
241  call void @func_kernarg_implicitarg_ptr()
242  ret void
243}
244
245; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
246; MESA: kernarg_segment_byte_size = 84
247; MESA: kernarg_segment_alignment = 6
248
249; HSA: .amdhsa_kernarg_size 120
250define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
251  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252  %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
253  ret void
254}
255
256; HSA-LABEL:   amdhsa.kernels:
257; HSA:         .kernarg_segment_align: 8
258; COV5-NEXT:    .kernarg_segment_size: 256
259; COV4-NEXT:    .kernarg_segment_size: 56
260; HSA-LABEL:   .name:           kernel_implicitarg_ptr_empty
261
262; HSA:         .kernarg_segment_align: 4
263; HSA-NEXT:    .kernarg_segment_size: 0
264; HSA-LABEL:   .name:           kernel_implicitarg_ptr_empty_0implicit
265
266; HSA:         .kernarg_segment_align: 8
267; HSA-NEXT:    .kernarg_segment_size: 48
268; HSA-LABEL:   .name:           opencl_kernel_implicitarg_ptr_empty
269
270; HSA:         .kernarg_segment_align: 8
271; COV5-NEXT:    .kernarg_segment_size: 368
272; COV4-NEXT:    .kernarg_segment_size: 168
273; HSA-LABEL:   .name:           kernel_implicitarg_ptr
274
275; HSA:         .kernarg_segment_align: 8
276; HSA-NEXT:    .kernarg_segment_size: 160
277; HSA-LABEL:   .name:           opencl_kernel_implicitarg_ptr
278
279; HSA:         .kernarg_segment_align: 8
280; COV5-NEXT:    .kernarg_segment_size: 256
281; COV4-NEXT:    .kernarg_segment_size: 56
282; HSA-LABEL:   .name:           kernel_call_implicitarg_ptr_func_empty
283
284; HSA:         .kernarg_segment_align: 4
285; HSA-NEXT:    .kernarg_segment_size: 0
286; HSA-LABEL:   .name:           kernel_call_implicitarg_ptr_func_empty_implicit0
287
288; HSA:         .kernarg_segment_align: 8
289; HSA-NEXT:    .kernarg_segment_size: 48
290; HSA-LABEL:   .name:           opencl_kernel_call_implicitarg_ptr_func_empty
291
292; HSA:         .kernarg_segment_align: 8
293; COV5-NEXT:    .kernarg_segment_size: 368
294; COV4-NEXT:    .kernarg_segment_size: 168
295; HSA-LABEL:   .name:           kernel_call_implicitarg_ptr_func
296
297; HSA:         .kernarg_segment_align: 8
298; HSA-NEXT:    .kernarg_segment_size: 160
299; HSA-LABEL:   .name:           opencl_kernel_call_implicitarg_ptr_func
300
301; HSA:         .kernarg_segment_align: 8
302; COV5-NEXT:    .kernarg_segment_size: 368
303; COV4-NEXT:    .kernarg_segment_size: 168
304; HSA-LABEL:   .name:           kernel_call_kernarg_implicitarg_ptr_func
305
306; HSA:         .kernarg_segment_align: 64
307; HSA-NEXT:    .kernarg_segment_size: 120
308; HSA-LABEL:   .name:           kernel_implicitarg_no_struct_align_padding
309
310declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
311declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
312
313attributes #0 = { nounwind noinline "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
314attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
315attributes #2 = { nounwind readnone speculatable }
316attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" }
317
318!llvm.module.flags = !{!0}
319!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
320