xref: /llvm-project/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll (revision c29b265eb9b7b3b6dc44d87fe6fec8a52485847d)
1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
2
3; CHECK:           .amdgpu_pal_metadata
4; CHECK-NEXT: ---
5; CHECK-NEXT: amdpal.pipelines:
6; CHECK-NEXT:  - .api:            Vulkan
7; CHECK-NEXT:    .compute_registers:
8; CHECK-NEXT:      .tg_size_en:     true
9; CHECK-NEXT:      .tgid_x_en:      false
10; CHECK-NEXT:      .tgid_y_en:      false
11; CHECK-NEXT:      .tgid_z_en:      false
12; CHECK-NEXT:      .tidig_comp_cnt: 0x1
13; CHECK-NEXT:    .hardware_stages:
14; CHECK-NEXT:      .cs:
15; CHECK-NEXT:        .checksum_value: 0x9444d7d0
16; CHECK-NEXT:        .debug_mode:     0
17; CHECK-NEXT:        .excp_en:        0
18; CHECK-NEXT:        .float_mode:     0xc0
19; CHECK-NEXT:        .ieee_mode:      true
20; CHECK-NEXT:        .image_op:       false
21; CHECK-NEXT:        .lds_size:       0x200
22; CHECK-NEXT:        .mem_ordered:    true
23; CHECK-NEXT:        .sgpr_limit:     0x6a
24; CHECK-NEXT:        .threadgroup_dimensions:
25; CHECK-NEXT:          - 0x1
26; CHECK-NEXT:          - 0x400
27; CHECK-NEXT:          - 0x1
28; CHECK-NEXT:        .trap_present:   false
29; CHECK-NEXT:        .user_data_reg_map:
30; CHECK-NEXT:          - 0x10000000
31; CHECK-NEXT:          - 0xffffffff
32; CHECK-NEXT:          - 0
33; CHECK-NEXT:          - 0xffffffff
34; CHECK-NEXT:          - 0xffffffff
35; CHECK-NEXT:          - 0xffffffff
36; CHECK-NEXT:          - 0xffffffff
37; CHECK-NEXT:          - 0xffffffff
38; CHECK-NEXT:          - 0xffffffff
39; CHECK-NEXT:          - 0xffffffff
40; CHECK-NEXT:          - 0xffffffff
41; CHECK-NEXT:          - 0xffffffff
42; CHECK-NEXT:          - 0xffffffff
43; CHECK-NEXT:          - 0xffffffff
44; CHECK-NEXT:          - 0xffffffff
45; CHECK-NEXT:          - 0xffffffff
46; CHECK-NEXT:          - 0xffffffff
47; CHECK-NEXT:          - 0xffffffff
48; CHECK-NEXT:          - 0xffffffff
49; CHECK-NEXT:          - 0xffffffff
50; CHECK-NEXT:          - 0xffffffff
51; CHECK-NEXT:          - 0xffffffff
52; CHECK-NEXT:          - 0xffffffff
53; CHECK-NEXT:          - 0xffffffff
54; CHECK-NEXT:          - 0xffffffff
55; CHECK-NEXT:          - 0xffffffff
56; CHECK-NEXT:          - 0xffffffff
57; CHECK-NEXT:          - 0xffffffff
58; CHECK-NEXT:          - 0xffffffff
59; CHECK-NEXT:          - 0xffffffff
60; CHECK-NEXT:          - 0xffffffff
61; CHECK-NEXT:          - 0xffffffff
62; CHECK-NEXT:        .user_sgprs:     0x3
63; CHECK-NEXT:        .vgpr_limit:     0x100
64; CHECK-NEXT:        .wavefront_size: 0x40
65; CHECK-NEXT:        .wgp_mode:       true
66; CHECK:    .registers:      {}
67; CHECK-NEXT:    .shader_functions:
68; CHECK-NEXT:      dynamic_stack:
69; CHECK-NEXT:        .backend_stack_size: 0x10
70; CHECK-NEXT:        .lds_size:       0
71; CHECK-NEXT:        .sgpr_count:     0x22
72; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
73; CHECK-NEXT:        .vgpr_count:     0x2
74; CHECK-NEXT:      dynamic_stack_loop:
75; CHECK-NEXT:        .backend_stack_size: 0x10
76; CHECK-NEXT:        .lds_size:       0
77; CHECK-NEXT:        .sgpr_count:     0x22
78; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
79; CHECK-NEXT:        .vgpr_count:     0x3
80; CHECK-NEXT:      multiple_stack:
81; CHECK-NEXT:        .backend_stack_size: 0x24
82; CHECK-NEXT:        .lds_size:       0
83; CHECK-NEXT:        .sgpr_count:     0x21
84; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x24
85; CHECK-NEXT:        .vgpr_count:     0x3
86; CHECK-NEXT:      no_stack:
87; CHECK-NEXT:        .backend_stack_size: 0
88; CHECK-NEXT:        .lds_size:       0
89; CHECK-NEXT:        .sgpr_count:     0x20
90; CHECK-NEXT:        .stack_frame_size_in_bytes: 0
91; CHECK-NEXT:        .vgpr_count:     0x1
92; CHECK-NEXT:      no_stack_call:
93; CHECK-NEXT:        .backend_stack_size: 0x10
94; CHECK-NEXT:        .lds_size:       0
95; CHECK-NEXT:        .sgpr_count:     0x22
96; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
97; CHECK-NEXT:        .vgpr_count:     0x3
98; CHECK-NEXT:      no_stack_extern_call:
99; CHECK-NEXT:        .backend_stack_size: 0x10
100; CHECK-NEXT:        .lds_size:       0
101; CHECK-NEXT:        .sgpr_count:     0x29
102; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
103; CHECK-NEXT:        .vgpr_count:     0x58
104; CHECK-NEXT:      no_stack_extern_call_many_args:
105; CHECK-NEXT:        .backend_stack_size: 0x90
106; CHECK-NEXT:        .lds_size:       0
107; CHECK-NEXT:        .sgpr_count:     0x29
108; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x90
109; CHECK-NEXT:        .vgpr_count:     0x58
110; CHECK-NEXT:      no_stack_indirect_call:
111; CHECK-NEXT:        .backend_stack_size: 0x10
112; CHECK-NEXT:        .lds_size:       0
113; CHECK-NEXT:        .sgpr_count:     0x29
114; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
115; CHECK-NEXT:        .vgpr_count:     0x58
116; CHECK-NEXT:      simple_lds:
117; CHECK-NEXT:        .backend_stack_size: 0
118; CHECK-NEXT:        .lds_size:       0x100
119; CHECK-NEXT:        .sgpr_count:     0x20
120; CHECK-NEXT:        .stack_frame_size_in_bytes: 0
121; CHECK-NEXT:        .vgpr_count:     0x1
122; CHECK-NEXT:      simple_lds_recurse:
123; CHECK-NEXT:        .backend_stack_size: 0x10
124; CHECK-NEXT:        .lds_size:       0x100
125; CHECK-NEXT:        .sgpr_count:     0x24
126; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
127; CHECK-NEXT:        .vgpr_count:     0x29
128; CHECK-NEXT:      simple_stack:
129; CHECK-NEXT:        .backend_stack_size: 0x14
130; CHECK-NEXT:        .lds_size:       0
131; CHECK-NEXT:        .sgpr_count:     0x21
132; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x14
133; CHECK-NEXT:        .vgpr_count:     0x2
134; CHECK-NEXT:      simple_stack_call:
135; CHECK-NEXT:        .backend_stack_size: 0x20
136; CHECK-NEXT:        .lds_size:       0
137; CHECK-NEXT:        .sgpr_count:     0x22
138; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
139; CHECK-NEXT:        .vgpr_count:     0x4
140; CHECK-NEXT:      simple_stack_extern_call:
141; CHECK-NEXT:        .backend_stack_size: 0x20
142; CHECK-NEXT:        .lds_size:       0
143; CHECK-NEXT:        .sgpr_count:     0x29
144; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
145; CHECK-NEXT:        .vgpr_count:     0x58
146; CHECK-NEXT:      simple_stack_indirect_call:
147; CHECK-NEXT:        .backend_stack_size: 0x20
148; CHECK-NEXT:        .lds_size:       0
149; CHECK-NEXT:        .sgpr_count:     0x29
150; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
151; CHECK-NEXT:        .vgpr_count:     0x58
152; CHECK-NEXT:      simple_stack_recurse:
153; CHECK-NEXT:        .backend_stack_size: 0x20
154; CHECK-NEXT:        .lds_size:       0
155; CHECK-NEXT:        .sgpr_count:     0x24
156; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
157; CHECK-NEXT:        .vgpr_count:     0x2a
158; CHECK:amdpal.version:
159; CHECK-NEXT:  - 0x3
160; CHECK-NEXT:  - 0
161; CHECK-NEXT:...
162; CHECK-NEXT:        .end_amdgpu_pal_metadata
163
164declare amdgpu_gfx float @extern_func(float) #0
165declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
166
167@funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
168
169define amdgpu_gfx float @no_stack(float %arg0) #0 {
170  %add = fadd float %arg0, 1.0
171  ret float %add
172}
173
174define amdgpu_gfx float @simple_stack(float %arg0) #0 {
175  %stack = alloca float, i32 4, align 4, addrspace(5)
176  store volatile float 2.0, ptr addrspace(5) %stack
177  %val = load volatile float, ptr addrspace(5) %stack
178  %add = fadd float %arg0, %val
179  ret float %add
180}
181
182define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
183  %stack = alloca float, i32 4, align 4, addrspace(5)
184  store volatile float 2.0, ptr addrspace(5) %stack
185  %val = load volatile float, ptr addrspace(5) %stack
186  %add = fadd float %arg0, %val
187  %stack2 = alloca float, i32 4, align 4, addrspace(5)
188  store volatile float 2.0, ptr addrspace(5) %stack2
189  %val2 = load volatile float, ptr addrspace(5) %stack2
190  %add2 = fadd float %add, %val2
191  ret float %add2
192}
193
194define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
195bb0:
196  %cmp = fcmp ogt float %arg0, 0.0
197  br i1 %cmp, label %bb1, label %bb2
198
199bb1:
200  %stack = alloca float, i32 4, align 4, addrspace(5)
201  store volatile float 2.0, ptr addrspace(5) %stack
202  %val = load volatile float, ptr addrspace(5) %stack
203  %add = fadd float %arg0, %val
204  br label %bb2
205
206bb2:
207  %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
208  ret float %res
209}
210
211define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
212bb0:
213  br label %bb1
214
215bb1:
216  %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
217  %stack = alloca float, i32 4, align 4, addrspace(5)
218  store volatile float 2.0, ptr addrspace(5) %stack
219  %val = load volatile float, ptr addrspace(5) %stack
220  %add = fadd float %arg0, %val
221  %cmp = icmp sgt i32 %ctr, 0
222  %newctr = sub i32 %ctr, 1
223  br i1 %cmp, label %bb1, label %bb2
224
225bb2:
226  ret float %add
227}
228
229define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
230  %res = call amdgpu_gfx float @simple_stack(float %arg0)
231  ret float %res
232}
233
234define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
235  %stack = alloca float, i32 4, align 4, addrspace(5)
236  store volatile float 2.0, ptr addrspace(5) %stack
237  %val = load volatile float, ptr addrspace(5) %stack
238  %res = call amdgpu_gfx float @simple_stack(float %arg0)
239  %add = fadd float %res, %val
240  ret float %add
241}
242
243define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
244  %res = call amdgpu_gfx float @extern_func(float %arg0)
245  ret float %res
246}
247
248define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
249  %stack = alloca float, i32 4, align 4, addrspace(5)
250  store volatile float 2.0, ptr addrspace(5) %stack
251  %val = load volatile float, ptr addrspace(5) %stack
252  %res = call amdgpu_gfx float @extern_func(float %arg0)
253  %add = fadd float %res, %val
254  ret float %add
255}
256
257define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
258  %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
259  ret float %res
260}
261
262define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
263  %fptr = load ptr, ptr addrspace(4) @funcptr
264  call amdgpu_gfx void %fptr()
265  ret float %arg0
266}
267
268define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
269  %stack = alloca float, i32 4, align 4, addrspace(5)
270  store volatile float 2.0, ptr addrspace(5) %stack
271  %val = load volatile float, ptr addrspace(5) %stack
272  %fptr = load ptr, ptr addrspace(4) @funcptr
273  call amdgpu_gfx void %fptr()
274  %add = fadd float %arg0, %val
275  ret float %add
276}
277
278define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
279  %stack = alloca float, i32 4, align 4, addrspace(5)
280  store volatile float 2.0, ptr addrspace(5) %stack
281  %val = load volatile float, ptr addrspace(5) %stack
282  %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
283  %add = fadd float %res, %val
284  ret float %add
285}
286
287@lds = internal addrspace(3) global [64 x float] undef
288
289define amdgpu_gfx float @simple_lds(float %arg0) #0 {
290  %val = load float, ptr addrspace(3) @lds
291  ret float %val
292}
293
294define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
295  %val = load float, ptr addrspace(3) @lds
296  %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
297  ret float %res
298}
299
300attributes #0 = { nounwind }
301
302!amdgpu.pal.metadata.msgpack = !{!0}
303
304!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
305!1 = !{i32 7}
306