xref: /llvm-project/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (revision c897c13dde3bb413e723317c0579781fb6045a8b)
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
2
3; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated.
4
5; GCN-LABEL: {{^}}use_vcc:
6; GCN: .set use_vcc.num_vgpr, 0
7; GCN: .set use_vcc.num_agpr, 0
8; GCN: .set use_vcc.numbered_sgpr, 32
9; GCN: .set use_vcc.private_seg_size, 0
10; GCN: .set use_vcc.uses_vcc, 1
11; GCN: .set use_vcc.uses_flat_scratch, 0
12; GCN: .set use_vcc.has_dyn_sized_stack, 0
13; GCN: .set use_vcc.has_recursion, 0
14; GCN: .set use_vcc.has_indirect_call, 0
15; GCN: TotalNumSgprs: 36
16; GCN: NumVgprs: 0
17; GCN: ScratchSize: 0
18define void @use_vcc() #1 {
19  call void asm sideeffect "", "~{vcc}" () #0
20  ret void
21}
22
23; GCN-LABEL: {{^}}indirect_use_vcc:
24; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr)
25; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr)
26; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr)
27; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size))
28; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc)
29; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch)
30; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack)
31; GCN: .set indirect_use_vcc.has_recursion, or(0, use_vcc.has_recursion)
32; GCN: .set indirect_use_vcc.has_indirect_call, or(0, use_vcc.has_indirect_call)
33; GCN: TotalNumSgprs: 38
34; GCN: NumVgprs: 41
35; GCN: ScratchSize: 16
36define void @indirect_use_vcc() #1 {
37  call void @use_vcc()
38  ret void
39}
40
41; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
42; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr)
43; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr)
44; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr)
45; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size))
46; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc)
47; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch)
48; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack)
49; GCN: .set indirect_2level_use_vcc_kernel.has_recursion, or(0, indirect_use_vcc.has_recursion)
50; GCN: .set indirect_2level_use_vcc_kernel.has_indirect_call, or(0, indirect_use_vcc.has_indirect_call)
51; GCN: TotalNumSgprs: 40
52; GCN: NumVgprs: 41
53; GCN: ScratchSize: 16
54define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 {
55  call void @indirect_use_vcc()
56  ret void
57}
58
59; GCN-LABEL: {{^}}use_flat_scratch:
60; GCN: .set use_flat_scratch.num_vgpr, 0
61; GCN: .set use_flat_scratch.num_agpr, 0
62; GCN: .set use_flat_scratch.numbered_sgpr, 32
63; GCN: .set use_flat_scratch.private_seg_size, 0
64; GCN: .set use_flat_scratch.uses_vcc, 0
65; GCN: .set use_flat_scratch.uses_flat_scratch, 1
66; GCN: .set use_flat_scratch.has_dyn_sized_stack, 0
67; GCN: .set use_flat_scratch.has_recursion, 0
68; GCN: .set use_flat_scratch.has_indirect_call, 0
69; GCN: TotalNumSgprs: 38
70; GCN: NumVgprs: 0
71; GCN: ScratchSize: 0
72define void @use_flat_scratch() #1 {
73  call void asm sideeffect "", "~{flat_scratch}" () #0
74  ret void
75}
76
77; GCN-LABEL: {{^}}indirect_use_flat_scratch:
78; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr)
79; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr)
80; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr)
81; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size))
82; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc)
83; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch)
84; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack)
85; GCN: .set indirect_use_flat_scratch.has_recursion, or(0, use_flat_scratch.has_recursion)
86; GCN: .set indirect_use_flat_scratch.has_indirect_call, or(0, use_flat_scratch.has_indirect_call)
87; GCN: TotalNumSgprs: 40
88; GCN: NumVgprs: 41
89; GCN: ScratchSize: 16
90define void @indirect_use_flat_scratch() #1 {
91  call void @use_flat_scratch()
92  ret void
93}
94
95; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
96; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr)
97; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr)
98; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr)
99; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size))
100; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc)
101; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch)
102; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack)
103; GCN: .set indirect_2level_use_flat_scratch_kernel.has_recursion, or(0, indirect_use_flat_scratch.has_recursion)
104; GCN: .set indirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, indirect_use_flat_scratch.has_indirect_call)
105; GCN: TotalNumSgprs: 40
106; GCN: NumVgprs: 41
107; GCN: ScratchSize: 16
108define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 {
109  call void @indirect_use_flat_scratch()
110  ret void
111}
112
113; GCN-LABEL: {{^}}use_10_vgpr:
114; GCN: .set use_10_vgpr.num_vgpr, 10
115; GCN: .set use_10_vgpr.num_agpr, 0
116; GCN: .set use_10_vgpr.numbered_sgpr, 32
117; GCN: .set use_10_vgpr.private_seg_size, 0
118; GCN: .set use_10_vgpr.uses_vcc, 0
119; GCN: .set use_10_vgpr.uses_flat_scratch, 0
120; GCN: .set use_10_vgpr.has_dyn_sized_stack, 0
121; GCN: .set use_10_vgpr.has_recursion, 0
122; GCN: .set use_10_vgpr.has_indirect_call, 0
123; GCN: TotalNumSgprs: 36
124; GCN: NumVgprs: 10
125; GCN: ScratchSize: 0
126define void @use_10_vgpr() #1 {
127  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
128  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
129  ret void
130}
131
132; GCN-LABEL: {{^}}indirect_use_10_vgpr:
133; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr)
134; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr)
135; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr)
136; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size))
137; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc)
138; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch)
139; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack)
140; GCN: .set indirect_use_10_vgpr.has_recursion, or(0, use_10_vgpr.has_recursion)
141; GCN: .set indirect_use_10_vgpr.has_indirect_call, or(0, use_10_vgpr.has_indirect_call)
142; GCN: TotalNumSgprs: 38
143; GCN: NumVgprs: 41
144; GCN: ScratchSize: 16
145define void @indirect_use_10_vgpr() #0 {
146  call void @use_10_vgpr()
147  ret void
148}
149
150; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
151; GCN:	.set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr)
152; GCN:	.set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr)
153; GCN:	.set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr)
154; GCN:	.set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size))
155; GCN:	.set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc)
156; GCN:	.set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch)
157; GCN:	.set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack)
158; GCN:	.set indirect_2_level_use_10_vgpr.has_recursion, or(0, indirect_use_10_vgpr.has_recursion)
159; GCN:	.set indirect_2_level_use_10_vgpr.has_indirect_call, or(0, indirect_use_10_vgpr.has_indirect_call)
160; GCN: TotalNumSgprs: 40
161; GCN: NumVgprs: 41
162; GCN: ScratchSize: 16
163define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
164  call void @indirect_use_10_vgpr()
165  ret void
166}
167
168; GCN-LABEL: {{^}}use_50_vgpr:
169; GCN:	.set use_50_vgpr.num_vgpr, 50
170; GCN:	.set use_50_vgpr.num_agpr, 0
171; GCN:	.set use_50_vgpr.numbered_sgpr, 32
172; GCN:	.set use_50_vgpr.private_seg_size, 0
173; GCN:	.set use_50_vgpr.uses_vcc, 0
174; GCN:	.set use_50_vgpr.uses_flat_scratch, 0
175; GCN:	.set use_50_vgpr.has_dyn_sized_stack, 0
176; GCN:	.set use_50_vgpr.has_recursion, 0
177; GCN:	.set use_50_vgpr.has_indirect_call, 0
178; GCN: TotalNumSgprs: 36
179; GCN: NumVgprs: 50
180; GCN: ScratchSize: 0
181define void @use_50_vgpr() #1 {
182  call void asm sideeffect "", "~{v49}"() #0
183  ret void
184}
185
186; GCN-LABEL: {{^}}indirect_use_50_vgpr:
187; GCN:	.set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr)
188; GCN:	.set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr)
189; GCN:	.set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr)
190; GCN:	.set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size))
191; GCN:	.set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc)
192; GCN:	.set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch)
193; GCN:	.set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack)
194; GCN:	.set indirect_use_50_vgpr.has_recursion, or(0, use_50_vgpr.has_recursion)
195; GCN:	.set indirect_use_50_vgpr.has_indirect_call, or(0, use_50_vgpr.has_indirect_call)
196; GCN: TotalNumSgprs: 38
197; GCN: NumVgprs: 50
198; GCN: ScratchSize: 16
199define void @indirect_use_50_vgpr() #0 {
200  call void @use_50_vgpr()
201  ret void
202}
203
204; GCN-LABEL: {{^}}use_80_sgpr:
205; GCN:	.set use_80_sgpr.num_vgpr, 1
206; GCN:	.set use_80_sgpr.num_agpr, 0
207; GCN:	.set use_80_sgpr.numbered_sgpr, 80
208; GCN:	.set use_80_sgpr.private_seg_size, 8
209; GCN:	.set use_80_sgpr.uses_vcc, 0
210; GCN:	.set use_80_sgpr.uses_flat_scratch, 0
211; GCN:	.set use_80_sgpr.has_dyn_sized_stack, 0
212; GCN:	.set use_80_sgpr.has_recursion, 0
213; GCN:	.set use_80_sgpr.has_indirect_call, 0
214; GCN: TotalNumSgprs: 84
215; GCN: NumVgprs: 1
216; GCN: ScratchSize: 8
217define void @use_80_sgpr() #1 {
218  call void asm sideeffect "", "~{s79}"() #0
219  ret void
220}
221
222; GCN-LABEL: {{^}}indirect_use_80_sgpr:
223; GCN:	.set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr)
224; GCN:	.set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr)
225; GCN:	.set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr)
226; GCN:	.set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size))
227; GCN:	.set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc)
228; GCN:	.set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch)
229; GCN:	.set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack)
230; GCN:	.set indirect_use_80_sgpr.has_recursion, or(0, use_80_sgpr.has_recursion)
231; GCN:	.set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call)
232; GCN: TotalNumSgprs: 84
233; GCN: NumVgprs: 41
234; GCN: ScratchSize: 24
235define void @indirect_use_80_sgpr() #1 {
236  call void @use_80_sgpr()
237  ret void
238}
239
240; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
241; GCN:	.set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr)
242; GCN:	.set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr)
243; GCN:	.set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr)
244; GCN:	.set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size))
245; GCN:	.set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc)
246; GCN:	.set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch)
247; GCN:	.set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack)
248; GCN:	.set indirect_2_level_use_80_sgpr.has_recursion, or(0, indirect_use_80_sgpr.has_recursion)
249; GCN:	.set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call)
250; GCN: TotalNumSgprs: 86
251; GCN: NumVgprs: 41
252; GCN: ScratchSize: 24
253define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
254  call void @indirect_use_80_sgpr()
255  ret void
256}
257
258; GCN-LABEL: {{^}}use_stack0:
259; GCN:	.set use_stack0.num_vgpr, 1
260; GCN:	.set use_stack0.num_agpr, 0
261; GCN:	.set use_stack0.numbered_sgpr, 33
262; GCN:	.set use_stack0.private_seg_size, 2052
263; GCN:	.set use_stack0.uses_vcc, 0
264; GCN:	.set use_stack0.uses_flat_scratch, 0
265; GCN:	.set use_stack0.has_dyn_sized_stack, 0
266; GCN:	.set use_stack0.has_recursion, 0
267; GCN:	.set use_stack0.has_indirect_call, 0
268; GCN: TotalNumSgprs: 37
269; GCN: NumVgprs: 1
270; GCN: ScratchSize: 2052
271define void @use_stack0() #1 {
272  %alloca = alloca [512 x i32], align 4, addrspace(5)
273  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
274  ret void
275}
276
277; GCN-LABEL: {{^}}use_stack1:
278; GCN:	.set use_stack1.num_vgpr, 1
279; GCN:	.set use_stack1.num_agpr, 0
280; GCN:	.set use_stack1.numbered_sgpr, 33
281; GCN:	.set use_stack1.private_seg_size, 404
282; GCN:	.set use_stack1.uses_vcc, 0
283; GCN:	.set use_stack1.uses_flat_scratch, 0
284; GCN:	.set use_stack1.has_dyn_sized_stack, 0
285; GCN:	.set use_stack1.has_recursion, 0
286; GCN:	.set use_stack1.has_indirect_call, 0
287; GCN: TotalNumSgprs: 37
288; GCN: NumVgprs: 1
289; GCN: ScratchSize: 404
290define void @use_stack1() #1 {
291  %alloca = alloca [100 x i32], align 4, addrspace(5)
292  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
293  ret void
294}
295
296; GCN-LABEL: {{^}}indirect_use_stack:
297; GCN:	.set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr)
298; GCN:	.set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr)
299; GCN:	.set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr)
300; GCN:	.set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size))
301; GCN:	.set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc)
302; GCN:	.set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch)
303; GCN:	.set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack)
304; GCN:	.set indirect_use_stack.has_recursion, or(0, use_stack0.has_recursion)
305; GCN:	.set indirect_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call)
306; GCN: TotalNumSgprs: 38
307; GCN: NumVgprs: 41
308; GCN: ScratchSize: 2132
309define void @indirect_use_stack() #1 {
310  %alloca = alloca [16 x i32], align 4, addrspace(5)
311  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
312  call void @use_stack0()
313  ret void
314}
315
316; GCN-LABEL: {{^}}indirect_2_level_use_stack:
317; GCN:	.set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr)
318; GCN:	.set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr)
319; GCN:	.set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr)
320; GCN:	.set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size))
321; GCN:	.set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc)
322; GCN:	.set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch)
323; GCN:	.set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack)
324; GCN:	.set indirect_2_level_use_stack.has_recursion, or(0, indirect_use_stack.has_recursion)
325; GCN:	.set indirect_2_level_use_stack.has_indirect_call, or(0, indirect_use_stack.has_indirect_call)
326; GCN: TotalNumSgprs: 40
327; GCN: NumVgprs: 41
328; GCN: ScratchSize: 2132
329define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
330  call void @indirect_use_stack()
331  ret void
332}
333
334
335; Should be maximum of callee usage
336; GCN-LABEL: {{^}}multi_call_use_use_stack:
337; GCN:	.set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr)
338; GCN:	.set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr)
339; GCN:	.set multi_call_use_use_stack.numbered_sgpr, max(42, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr)
340; GCN:	.set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
341; GCN:	.set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc)
342; GCN:	.set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch)
343; GCN:	.set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack)
344; GCN:	.set multi_call_use_use_stack.has_recursion, or(0, use_stack0.has_recursion, use_stack1.has_recursion)
345; GCN:	.set multi_call_use_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call)
346; GCN: TotalNumSgprs: 48
347; GCN: NumVgprs: 41
348; GCN: ScratchSize: 2052
349define amdgpu_kernel void @multi_call_use_use_stack() #0 {
350  call void @use_stack0()
351  call void @use_stack1()
352  ret void
353}
354
355declare void @external() #0
356
357; GCN-LABEL: {{^}}multi_call_with_external:
358; GCN:	.set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
359; GCN:	.set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
360; GCN:	.set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
361; GCN:	.set multi_call_with_external.private_seg_size, 0
362; GCN:	.set multi_call_with_external.uses_vcc, 1
363; GCN:	.set multi_call_with_external.uses_flat_scratch, 1
364; GCN:	.set multi_call_with_external.has_dyn_sized_stack, 1
365; GCN:	.set multi_call_with_external.has_recursion, 0
366; GCN:	.set multi_call_with_external.has_indirect_call, 1
367; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
368; GCN: NumVgprs: multi_call_with_external.num_vgpr
369; GCN: ScratchSize: 0
370define amdgpu_kernel void @multi_call_with_external() #0 {
371  call void @use_stack0()
372  call void @use_stack1()
373  call void @external()
374  ret void
375}
376
377; GCN-LABEL: {{^}}usage_external:
378; GCN:	.set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
379; GCN:	.set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)
380; GCN:	.set usage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
381; GCN:	.set usage_external.private_seg_size, 0
382; GCN:	.set usage_external.uses_vcc, 1
383; GCN:	.set usage_external.uses_flat_scratch, 1
384; GCN:	.set usage_external.has_dyn_sized_stack, 1
385; GCN:	.set usage_external.has_recursion, 0
386; GCN:	.set usage_external.has_indirect_call, 1
387; GCN: TotalNumSgprs: usage_external.numbered_sgpr+6
388; GCN: NumVgprs: usage_external.num_vgpr
389; GCN: ScratchSize: 0
390define amdgpu_kernel void @usage_external() #0 {
391  call void @external()
392  ret void
393}
394
395declare void @external_recurse() #2
396
397; GCN-LABEL: {{^}}usage_external_recurse:
398; GCN:	.set usage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr)
399; GCN:	.set usage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr)
400; GCN:	.set usage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
401; GCN:	.set usage_external_recurse.private_seg_size, 0
402; GCN:	.set usage_external_recurse.uses_vcc, 1
403; GCN:	.set usage_external_recurse.uses_flat_scratch, 1
404; GCN:	.set usage_external_recurse.has_dyn_sized_stack, 1
405; GCN:	.set usage_external_recurse.has_recursion, 1
406; GCN:	.set usage_external_recurse.has_indirect_call, 1
407; GCN: TotalNumSgprs: usage_external_recurse.numbered_sgpr+6
408; GCN: NumVgprs: usage_external_recurse.num_vgpr
409; GCN: ScratchSize: 0
410define amdgpu_kernel void @usage_external_recurse() #0 {
411  call void @external_recurse()
412  ret void
413}
414
415; GCN-LABEL: {{^}}direct_recursion_use_stack:
416; GCN: .set direct_recursion_use_stack.num_vgpr, 41
417; GCN: .set direct_recursion_use_stack.num_agpr, 0
418; GCN: .set direct_recursion_use_stack.numbered_sgpr, 36
419; GCN: .set direct_recursion_use_stack.private_seg_size, 2064
420; GCN: .set direct_recursion_use_stack.uses_vcc, 1
421; GCN: .set direct_recursion_use_stack.uses_flat_scratch, 0
422; GCN: .set direct_recursion_use_stack.has_dyn_sized_stack, 0
423; GCN: .set direct_recursion_use_stack.has_recursion, 1
424; GCN: .set direct_recursion_use_stack.has_indirect_call, 0
425; GCN: TotalNumSgprs: 40
426; GCN: NumVgprs: 41
427; GCN: ScratchSize: 2064
428define void @direct_recursion_use_stack(i32 %val) #2 {
429  %alloca = alloca [512 x i32], align 4, addrspace(5)
430  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
431  %cmp = icmp eq i32 %val, 0
432  br i1 %cmp, label %ret, label %call
433
434call:
435  %val.sub1 = sub i32 %val, 1
436  call void @direct_recursion_use_stack(i32 %val.sub1)
437  br label %ret
438
439ret:
440  ret void
441}
442
443; GCN-LABEL: {{^}}usage_direct_recursion:
444; GCN:  .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr)
445; GCN:  .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr)
446; GCN:  .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr)
447; GCN:  .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size))
448; GCN:  .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc)
449; GCN:  .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch)
450; GCN:  .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack)
451; GCN:  .set usage_direct_recursion.has_recursion, or(1, direct_recursion_use_stack.has_recursion)
452; GCN:  .set usage_direct_recursion.has_indirect_call, or(0, direct_recursion_use_stack.has_indirect_call)
453; GCN: TotalNumSgprs: 42
454; GCN: NumVgprs: 41
455; GCN: ScratchSize: 2064
456define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
457  call void @direct_recursion_use_stack(i32 %n)
458  ret void
459}
460
461; Make sure there's no assert when a sgpr96 is used.
462; GCN-LABEL: {{^}}count_use_sgpr96_external_call
463; GCN:	.set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
464; GCN:	.set count_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
465; GCN:	.set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
466; GCN:	.set count_use_sgpr96_external_call.private_seg_size, 0
467; GCN:	.set count_use_sgpr96_external_call.uses_vcc, 1
468; GCN:	.set count_use_sgpr96_external_call.uses_flat_scratch, 1
469; GCN:	.set count_use_sgpr96_external_call.has_dyn_sized_stack, 1
470; GCN:	.set count_use_sgpr96_external_call.has_recursion, 0
471; GCN:	.set count_use_sgpr96_external_call.has_indirect_call, 1
472; GCN: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+6
473; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
474; GCN: ScratchSize: 0
475define amdgpu_kernel void @count_use_sgpr96_external_call()  {
476entry:
477  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
478  call void @external()
479  ret void
480}
481
482; Make sure there's no assert when a sgpr160 is used.
483; GCN-LABEL: {{^}}count_use_sgpr160_external_call
484; GCN:	.set count_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
485; GCN:	.set count_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
486; GCN:	.set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
487; GCN:	.set count_use_sgpr160_external_call.private_seg_size, 0
488; GCN:	.set count_use_sgpr160_external_call.uses_vcc, 1
489; GCN:	.set count_use_sgpr160_external_call.uses_flat_scratch, 1
490; GCN:	.set count_use_sgpr160_external_call.has_dyn_sized_stack, 1
491; GCN:	.set count_use_sgpr160_external_call.has_recursion, 0
492; GCN:	.set count_use_sgpr160_external_call.has_indirect_call, 1
493; GCN: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+6
494; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
495; GCN: ScratchSize: 0
496define amdgpu_kernel void @count_use_sgpr160_external_call()  {
497entry:
498  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
499  call void @external()
500  ret void
501}
502
503; Make sure there's no assert when a vgpr160 is used.
504; GCN-LABEL: {{^}}count_use_vgpr160_external_call
505; GCN:	.set count_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
506; GCN:	.set count_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
507; GCN:	.set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
508; GCN:	.set count_use_vgpr160_external_call.private_seg_size, 0
509; GCN:	.set count_use_vgpr160_external_call.uses_vcc, 1
510; GCN:	.set count_use_vgpr160_external_call.uses_flat_scratch, 1
511; GCN:	.set count_use_vgpr160_external_call.has_dyn_sized_stack, 1
512; GCN:	.set count_use_vgpr160_external_call.has_recursion, 0
513; GCN:	.set count_use_vgpr160_external_call.has_indirect_call, 1
514; GCN: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+6
515; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
516; GCN: ScratchSize: 0
517define amdgpu_kernel void @count_use_vgpr160_external_call()  {
518entry:
519  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
520  call void @external()
521  ret void
522}
523
524; Added at the of the .s are the module level maximums
525; GCN:	.set amdgpu.max_num_vgpr, 50
526; GCN:	.set amdgpu.max_num_agpr, 0
527; GCN:	.set amdgpu.max_num_sgpr, 80
528
529attributes #0 = { nounwind noinline norecurse }
530attributes #1 = { nounwind noinline norecurse }
531attributes #2 = { nounwind noinline }
532