xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ret.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
5
6; GCN-LABEL: {{^}}vgpr:
7; GCN-DAG: v_mov_b32_e32 v1, v0
8; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
9; GCN: s_waitcnt expcnt(0)
10; GCN: v_add_f32_e32 v0, 1.0, v1
11; GCN-NOT: s_endpgm
12define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
13bb:
14  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
15  %x = fadd float %arg3, 1.000000e+00
16  %a = insertvalue { float, float } undef, float %x, 0
17  %b = insertvalue { float, float } %a, float %arg3, 1
18  ret { float, float } %b
19}
20
21; GCN-LABEL: {{^}}vgpr_literal:
22; GCN: exp mrt0 v0, v0, v0, v0 done vm
23
24; GCN-DAG: v_mov_b32_e32 v0, 1.0
25; GCN-DAG: v_mov_b32_e32 v1, 2.0
26; GCN-DAG: v_mov_b32_e32 v2, 4.0
27; GCN-DAG: v_mov_b32_e32 v3, -1.0
28; GCN-DAG: s_waitcnt expcnt(0)
29; GCN-NOT: s_endpgm
30define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
31bb:
32  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
33  ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
34}
35
36; GCN: .long 165580
37; GCN-NEXT: .long 562
38; GCN-NEXT: .long 165584
39; GCN-NEXT: .long 562
40; GCN-LABEL: {{^}}vgpr_ps_addr0:
41; GCN-NOT: v_mov_b32_e32 v0
42; GCN-NOT: v_mov_b32_e32 v1
43; GCN-NOT: v_mov_b32_e32 v2
44; GCN: v_mov_b32_e32 v3, v4
45; GCN: v_mov_b32_e32 v4, v6
46; GCN-NOT: s_endpgm
47define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
48bb:
49  %i0 = extractelement <2 x i32> %arg4, i32 0
50  %i1 = extractelement <2 x i32> %arg4, i32 1
51  %i2 = extractelement <2 x i32> %arg7, i32 0
52  %i3 = extractelement <2 x i32> %arg8, i32 0
53  %f0 = bitcast i32 %i0 to float
54  %f1 = bitcast i32 %i1 to float
55  %f2 = bitcast i32 %i2 to float
56  %f3 = bitcast i32 %i3 to float
57  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
58  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
59  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
60  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
61  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
62  ret { float, float, float, float, float } %r4
63}
64
65; GCN: .long 165580
66; GCN-NEXT: .long 1
67; GCN-NEXT: .long 165584
68; GCN-NEXT: .long 1
69; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
70; GCN: v_mov_b32_e32 v0, 1.0
71; GCN-NOT: s_endpgm
72define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
73bb:
74  ret float 1.000000e+00
75}
76
77; GCN: .long 165580
78; GCN-NEXT: .long 2081
79; GCN-NEXT: .long 165584
80; GCN-NEXT: .long 2081
81; GCN-LABEL: {{^}}ps_input_ena_pos_w:
82; GCN-DAG: v_mov_b32_e32 v0, v4
83; GCN-DAG: v_mov_b32_e32 v1, v2
84; GCN-DAG: v_mov_b32_e32 v2, v3
85; GCN-NOT: s_endpgm
86define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
87bb:
88  %f = bitcast <2 x i32> %arg8 to <2 x float>
89  %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
90  %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
91  ret { float, <2 x float> } %s1
92}
93
94; GCN: .long 165580
95; GCN-NEXT: .long 562
96; GCN-NEXT: .long 165584
97; GCN-NEXT: .long 563
98; GCN-LABEL: {{^}}vgpr_ps_addr1:
99; GCN-DAG: v_mov_b32_e32 v0, v2
100; GCN-DAG: v_mov_b32_e32 v1, v3
101; GCN: v_mov_b32_e32 v2, v4
102; GCN-DAG: v_mov_b32_e32 v3, v6
103; GCN-DAG: v_mov_b32_e32 v4, v8
104; GCN-NOT: s_endpgm
105define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
106bb:
107  %i0 = extractelement <2 x i32> %arg4, i32 0
108  %i1 = extractelement <2 x i32> %arg4, i32 1
109  %i2 = extractelement <2 x i32> %arg7, i32 0
110  %i3 = extractelement <2 x i32> %arg8, i32 0
111  %f0 = bitcast i32 %i0 to float
112  %f1 = bitcast i32 %i1 to float
113  %f2 = bitcast i32 %i2 to float
114  %f3 = bitcast i32 %i3 to float
115  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
116  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
117  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
118  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
119  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
120  ret { float, float, float, float, float } %r4
121}
122
123; GCN: .long 165580
124; GCN-NEXT: .long 562
125; GCN-NEXT: .long 165584
126; GCN-NEXT: .long 631
127; GCN-LABEL: {{^}}vgpr_ps_addr119:
128; GCN-DAG: v_mov_b32_e32 v0, v2
129; GCN-DAG: v_mov_b32_e32 v1, v3
130; GCN-DAG: v_mov_b32_e32 v2, v6
131; GCN-DAG: v_mov_b32_e32 v3, v8
132; GCN-DAG: v_mov_b32_e32 v4, v12
133; GCN-NOT: s_endpgm
134define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
135bb:
136  %i0 = extractelement <2 x i32> %arg4, i32 0
137  %i1 = extractelement <2 x i32> %arg4, i32 1
138  %i2 = extractelement <2 x i32> %arg7, i32 0
139  %i3 = extractelement <2 x i32> %arg8, i32 0
140  %f0 = bitcast i32 %i0 to float
141  %f1 = bitcast i32 %i1 to float
142  %f2 = bitcast i32 %i2 to float
143  %f3 = bitcast i32 %i3 to float
144  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
145  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
146  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
147  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
148  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
149  ret { float, float, float, float, float } %r4
150}
151
152; GCN: .long 165580
153; GCN-NEXT: .long 562
154; GCN-NEXT: .long 165584
155; GCN-NEXT: .long 946
156; GCN-LABEL: {{^}}vgpr_ps_addr418:
157; GCN-NOT: v_mov_b32_e32 v0
158; GCN-NOT: v_mov_b32_e32 v1
159; GCN-NOT: v_mov_b32_e32 v2
160; GCN: v_mov_b32_e32 v3, v4
161; GCN: v_mov_b32_e32 v4, v8
162; GCN-NOT: s_endpgm
163define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
164bb:
165  %i0 = extractelement <2 x i32> %arg4, i32 0
166  %i1 = extractelement <2 x i32> %arg4, i32 1
167  %i2 = extractelement <2 x i32> %arg7, i32 0
168  %i3 = extractelement <2 x i32> %arg8, i32 0
169  %f0 = bitcast i32 %i0 to float
170  %f1 = bitcast i32 %i1 to float
171  %f2 = bitcast i32 %i2 to float
172  %f3 = bitcast i32 %i3 to float
173  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
174  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
175  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
176  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
177  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
178  ret { float, float, float, float, float } %r4
179}
180
181; GCN-LABEL: {{^}}sgpr:
182; GCN-DAG: s_mov_b32 s2, s3
183; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
184; GCN-NOT: s_endpgm
185define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
186bb:
187  %x = add i32 %arg2, 2
188  %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
189  %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
190  %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
191  ret { i32, i32, i32 } %c
192}
193
194; GCN-LABEL: {{^}}sgpr_literal:
195; GCN: s_mov_b32 s0, 5
196; GCN-NOT: s_mov_b32 s0, s0
197; GCN-DAG: s_mov_b32 s1, 6
198; GCN-DAG: s_mov_b32 s2, 7
199; GCN-DAG: s_mov_b32 s3, 8
200; GCN-NOT: s_endpgm
201define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
202bb:
203  %x = add i32 %arg2, 2
204  ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
205}
206
207; GCN-LABEL: {{^}}both:
208; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
209; GCN-DAG: v_mov_b32_e32 v1, v0
210; GCN-DAG: s_mov_b32 s1, s2
211; GCN-DAG: s_waitcnt expcnt(0)
212; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
213; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
214; GCN-DAG: s_mov_b32 s2, s3
215; GCN-NOT: s_endpgm
216define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
217bb:
218  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
219  %v = fadd float %arg3, 1.000000e+00
220  %s = add i32 %arg2, 2
221  %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
222  %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
223  %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
224  %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
225  %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
226  ret { float, i32, float, i32, i32 } %a4
227}
228
229; GCN-LABEL: {{^}}structure_literal:
230; GCN: exp mrt0 v0, v0, v0, v0 done vm
231
232; GCN-DAG: v_mov_b32_e32 v0, 1.0
233; GCN-DAG: s_mov_b32 s0, 2
234; GCN-DAG: s_mov_b32 s1, 3
235; GCN-DAG: v_mov_b32_e32 v1, 2.0
236; GCN-DAG: v_mov_b32_e32 v2, 4.0
237; GCN-DAG: s_waitcnt expcnt(0)
238define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
239bb:
240  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
241  ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
242}
243
244; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
245; GCN: codeLenInByte = 0{{$}}
246define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
247  ret float undef
248}
249
250declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
251
252attributes #0 = { nounwind }
253attributes #1 = { nounwind "InitialPSInputAddr"="0" }
254attributes #2 = { nounwind "InitialPSInputAddr"="1" }
255attributes #3 = { nounwind "InitialPSInputAddr"="119" }
256attributes #4 = { nounwind "InitialPSInputAddr"="418" }
257