xref: /llvm-project/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (revision c897c13dde3bb413e723317c0579781fb6045a8b)
1; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
2; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
3; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
4; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
5; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
6
7; Make sure to run a GPU with the SGPR allocation bug.
8
9; GCN-LABEL: {{^}}use_vcc:
10; GCN: ; TotalNumSgprs: 34
11; GCN: ; NumVgprs: 0
12define void @use_vcc() #1 {
13  call void asm sideeffect "", "~{vcc}" () #0
14  ret void
15}
16
17; GCN-LABEL: {{^}}indirect_use_vcc:
18; GCN: s_mov_b32 s4, s33
19; GCN: v_writelane_b32 v40, s4, 2
20; GCN: v_writelane_b32 v40, s30, 0
21; GCN: v_writelane_b32 v40, s31, 1
22; GCN: s_swappc_b64
23; GCN: v_readlane_b32 s31, v40, 1
24; GCN: v_readlane_b32 s30, v40, 0
25; GCN: v_readlane_b32 s4, v40, 2
26; GCN: s_mov_b32 s33, s4
27; GCN: s_setpc_b64 s[30:31]
28; GCN: ; TotalNumSgprs: 36
29; GCN: ; NumVgprs: 41
30define void @indirect_use_vcc() #1 {
31  call void @use_vcc()
32  ret void
33}
34
35; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
36; CI: ; TotalNumSgprs: 38
37; VI-NOBUG: ; TotalNumSgprs: 40
38; VI-BUG: ; TotalNumSgprs: 96
39; GCN: ; NumVgprs: 41
40define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 {
41  call void @indirect_use_vcc()
42  ret void
43}
44
45; GCN-LABEL: {{^}}use_flat_scratch:
46; CI: ; TotalNumSgprs: 36
47; VI: ; TotalNumSgprs: 38
48; GCN: ; NumVgprs: 0
49define void @use_flat_scratch() #1 {
50  call void asm sideeffect "", "~{flat_scratch}" () #0
51  ret void
52}
53
54; GCN-LABEL: {{^}}indirect_use_flat_scratch:
55; CI: ; TotalNumSgprs: 38
56; VI: ; TotalNumSgprs: 40
57; GCN: ; NumVgprs: 41
58define void @indirect_use_flat_scratch() #1 {
59  call void @use_flat_scratch()
60  ret void
61}
62
63; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
64; CI: ; TotalNumSgprs: 38
65; VI-NOBUG: ; TotalNumSgprs: 40
66; VI-BUG: ; TotalNumSgprs: 96
67; GCN: ; NumVgprs: 41
68define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 {
69  call void @indirect_use_flat_scratch()
70  ret void
71}
72
73; GCN-LABEL: {{^}}use_10_vgpr:
74; GCN: ; NumVgprs: 10
75define void @use_10_vgpr() #1 {
76  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
77  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
78  ret void
79}
80
81; GCN-LABEL: {{^}}indirect_use_10_vgpr:
82; GCN: ; NumVgprs: 41
83define void @indirect_use_10_vgpr() #0 {
84  call void @use_10_vgpr()
85  ret void
86}
87
88; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
89; GCN: ; NumVgprs: 41
90define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
91  call void @indirect_use_10_vgpr()
92  ret void
93}
94
95; GCN-LABEL: {{^}}use_50_vgpr:
96; GCN: ; NumVgprs: 50
97define void @use_50_vgpr() #1 {
98  call void asm sideeffect "", "~{v49}"() #0
99  ret void
100}
101
102; GCN-LABEL: {{^}}indirect_use_50_vgpr:
103; GCN: ; NumVgprs: 50
104define void @indirect_use_50_vgpr() #0 {
105  call void @use_50_vgpr()
106  ret void
107}
108
109; GCN-LABEL: {{^}}use_80_sgpr:
110; GCN: ; TotalNumSgprs: 80
111define void @use_80_sgpr() #1 {
112  call void asm sideeffect "", "~{s79}"() #0
113  ret void
114}
115
116; GCN-LABEL: {{^}}indirect_use_80_sgpr:
117; GCN: ; TotalNumSgprs: 82
118define void @indirect_use_80_sgpr() #1 {
119  call void @use_80_sgpr()
120  ret void
121}
122
123; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
124; CI: ; TotalNumSgprs: 84
125; VI-NOBUG: ; TotalNumSgprs: 86
126; VI-BUG: ; TotalNumSgprs: 96
127define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
128  call void @indirect_use_80_sgpr()
129  ret void
130}
131
132
133; GCN-LABEL: {{^}}use_stack0:
134; GCN: ScratchSize: 2052
135define void @use_stack0() #1 {
136  %alloca = alloca [512 x i32], align 4, addrspace(5)
137  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
138  ret void
139}
140
141; GCN-LABEL: {{^}}use_stack1:
142; GCN: ScratchSize: 404
143define void @use_stack1() #1 {
144  %alloca = alloca [100 x i32], align 4, addrspace(5)
145  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
146  ret void
147}
148
149; GCN-LABEL: {{^}}indirect_use_stack:
150; GCN: ScratchSize: 2132
151define void @indirect_use_stack() #1 {
152  %alloca = alloca [16 x i32], align 4, addrspace(5)
153  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
154  call void @use_stack0()
155  ret void
156}
157
158; GCN-LABEL: {{^}}indirect_2_level_use_stack:
159; GCN: ScratchSize: 2132
160define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
161  call void @indirect_use_stack()
162  ret void
163}
164
165
166; Should be maximum of callee usage
167; GCN-LABEL: {{^}}multi_call_use_use_stack:
168; GCN: ScratchSize: 2052
169define amdgpu_kernel void @multi_call_use_use_stack() #0 {
170  call void @use_stack0()
171  call void @use_stack1()
172  ret void
173}
174
175
176declare void @external() #0
177
178; GCN-LABEL: {{^}}usage_external:
179; TotalNumSgprs: 48
180; NumVgprs: 24
181; GCN: ScratchSize: 16384
182;
183; GCN-V5-LABEL: {{^}}usage_external:
184; GCN-V5: ScratchSize: 0
185define amdgpu_kernel void @usage_external() #0 {
186  call void @external()
187  ret void
188}
189
190declare void @external_recurse() #2
191
192; GCN-LABEL: {{^}}usage_external_recurse:
193; TotalNumSgprs: 48
194; NumVgprs: 24
195; GCN: ScratchSize: 16384
196;
197; GCN-V5-LABEL: {{^}}usage_external_recurse:
198; GCN-V5: ScratchSize: 0
199define amdgpu_kernel void @usage_external_recurse() #0 {
200  call void @external_recurse()
201  ret void
202}
203
204; GCN-LABEL: {{^}}direct_recursion_use_stack:
205; GCN: ScratchSize: 18448{{$}}
206;
207; GCN-V5-LABEL: {{^}}direct_recursion_use_stack:
208; GCN-V5: ScratchSize: 2064{{$}}
209define void @direct_recursion_use_stack(i32 %val) #2 {
210  %alloca = alloca [512 x i32], align 4, addrspace(5)
211  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
212  %cmp = icmp eq i32 %val, 0
213  br i1 %cmp, label %ret, label %call
214
215call:
216  %val.sub1 = sub i32 %val, 1
217  call void @direct_recursion_use_stack(i32 %val.sub1)
218  br label %ret
219
220ret:
221  ret void
222}
223
224; GCN-LABEL: {{^}}usage_direct_recursion:
225; GCN: .amdhsa_private_segment_fixed_size 18448
226;
227; GCN-V5-LABEL: {{^}}usage_direct_recursion:
228; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}
229define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
230  call void @direct_recursion_use_stack(i32 %n)
231  ret void
232}
233
234; Make sure there's no assert when a sgpr96 is used.
235; GCN-LABEL: {{^}}count_use_sgpr96_external_call
236; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
237; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
238; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
239; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4
240; VI-BUG: TotalNumSgprs: 96
241; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
242define amdgpu_kernel void @count_use_sgpr96_external_call()  {
243entry:
244  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
245  call void @external()
246  ret void
247}
248
249; Make sure there's no assert when a sgpr160 is used.
250; GCN-LABEL: {{^}}count_use_sgpr160_external_call
251; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
252; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
253; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
254; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4
255; VI-BUG: TotalNumSgprs: 96
256; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
257define amdgpu_kernel void @count_use_sgpr160_external_call()  {
258entry:
259  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
260  call void @external()
261  ret void
262}
263
264; Make sure there's no assert when a vgpr160 is used.
265; GCN-LABEL: {{^}}count_use_vgpr160_external_call
266; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
267; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr)
268; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
269; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4
270; VI-BUG: TotalNumSgprs: 96
271; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
272define amdgpu_kernel void @count_use_vgpr160_external_call()  {
273entry:
274  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
275  call void @external()
276  ret void
277}
278
279; GCN: .set amdgpu.max_num_vgpr, 50
280; GCN: .set amdgpu.max_num_agpr, 0
281; GCN: .set amdgpu.max_num_sgpr, 80
282
283; GCN-LABEL: amdhsa.kernels:
284; GCN:      .name: count_use_sgpr96_external_call
285; CI:       .sgpr_count: 84
286; VI-NOBUG: .sgpr_count: 86
287; VI-BUG:   .sgpr_count: 96
288; GCN:      .vgpr_count: 50
289; GCN:      .name: count_use_sgpr160_external_call
290; CI:       .sgpr_count: 84
291; VI-NOBUG: .sgpr_count: 86
292; VI-BUG:   .sgpr_count: 96
293; GCN:      .vgpr_count: 50
294; GCN:      .name: count_use_vgpr160_external_call
295; CI:       .sgpr_count: 84
296; VI-NOBUG: .sgpr_count: 86
297; VI-BUG:   .sgpr_count: 96
298; GCN:      .vgpr_count: 50
299
300attributes #0 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
301attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
302attributes #2 = { nounwind noinline }
303
304!llvm.module.flags = !{!0}
305!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
306