xref: /llvm-project/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
5declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
6declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
7declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
8
9declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
10declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
11declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
12declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
13
14define half @test_v1f16(<1 x half> %a, half %s) nounwind {
15; CHECK-LABEL: test_v1f16:
16; CHECK:       @ %bb.0:
17; CHECK-NEXT:    .save {r4, r5, r11, lr}
18; CHECK-NEXT:    push {r4, r5, r11, lr}
19; CHECK-NEXT:    .vsave {d8}
20; CHECK-NEXT:    vpush {d8}
21; CHECK-NEXT:    mov r4, r0
22; CHECK-NEXT:    mov r0, r1
23; CHECK-NEXT:    bl __aeabi_h2f
24; CHECK-NEXT:    mov r5, r0
25; CHECK-NEXT:    mov r0, r4
26; CHECK-NEXT:    bl __aeabi_f2h
27; CHECK-NEXT:    vmov s16, r5
28; CHECK-NEXT:    bl __aeabi_h2f
29; CHECK-NEXT:    vmov s0, r0
30; CHECK-NEXT:    vadd.f32 s0, s16, s0
31; CHECK-NEXT:    vmov r0, s0
32; CHECK-NEXT:    bl __aeabi_f2h
33; CHECK-NEXT:    vpop {d8}
34; CHECK-NEXT:    pop {r4, r5, r11, lr}
35; CHECK-NEXT:    mov pc, lr
36  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
37  ret half %b
38}
39
40define half @test_v1f16_neutral(<1 x half> %a) nounwind {
41; CHECK-LABEL: test_v1f16_neutral:
42; CHECK:       @ %bb.0:
43; CHECK-NEXT:    .save {r11, lr}
44; CHECK-NEXT:    push {r11, lr}
45; CHECK-NEXT:    bl __aeabi_f2h
46; CHECK-NEXT:    pop {r11, lr}
47; CHECK-NEXT:    mov pc, lr
48  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
49  ret half %b
50}
51
52define float @test_v1f32(<1 x float> %a, float %s) nounwind {
53; CHECK-LABEL: test_v1f32:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    vmov s0, r0
56; CHECK-NEXT:    vmov s2, r1
57; CHECK-NEXT:    vadd.f32 s0, s2, s0
58; CHECK-NEXT:    vmov r0, s0
59; CHECK-NEXT:    mov pc, lr
60  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
61  ret float %b
62}
63
64define float @test_v1f32_neutral(<1 x float> %a) nounwind {
65; CHECK-LABEL: test_v1f32_neutral:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    mov pc, lr
68  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
69  ret float %b
70}
71
72define double @test_v1f64(<1 x double> %a, double %s) nounwind {
73; CHECK-LABEL: test_v1f64:
74; CHECK:       @ %bb.0:
75; CHECK-NEXT:    vmov d16, r0, r1
76; CHECK-NEXT:    vmov d17, r2, r3
77; CHECK-NEXT:    vadd.f64 d16, d17, d16
78; CHECK-NEXT:    vmov r0, r1, d16
79; CHECK-NEXT:    mov pc, lr
80  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
81  ret double %b
82}
83
84define double @test_v1f64_neutral(<1 x double> %a) nounwind {
85; CHECK-LABEL: test_v1f64_neutral:
86; CHECK:       @ %bb.0:
87; CHECK-NEXT:    mov pc, lr
88  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
89  ret double %b
90}
91
92define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
93; CHECK-LABEL: test_v1f128:
94; CHECK:       @ %bb.0:
95; CHECK-NEXT:    .save {r4, r5, r11, lr}
96; CHECK-NEXT:    push {r4, r5, r11, lr}
97; CHECK-NEXT:    .pad #16
98; CHECK-NEXT:    sub sp, sp, #16
99; CHECK-NEXT:    ldr r12, [sp, #32]
100; CHECK-NEXT:    ldr lr, [sp, #36]
101; CHECK-NEXT:    ldr r4, [sp, #40]
102; CHECK-NEXT:    ldr r5, [sp, #44]
103; CHECK-NEXT:    stm sp, {r0, r1, r2, r3}
104; CHECK-NEXT:    mov r0, r12
105; CHECK-NEXT:    mov r1, lr
106; CHECK-NEXT:    mov r2, r4
107; CHECK-NEXT:    mov r3, r5
108; CHECK-NEXT:    bl __addtf3
109; CHECK-NEXT:    add sp, sp, #16
110; CHECK-NEXT:    pop {r4, r5, r11, lr}
111; CHECK-NEXT:    mov pc, lr
112  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
113  ret fp128 %b
114}
115
116define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
117; CHECK-LABEL: test_v1f128_neutral:
118; CHECK:       @ %bb.0:
119; CHECK-NEXT:    mov pc, lr
120  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
121  ret fp128 %b
122}
123
124define float @test_v3f32(<3 x float> %a, float %s) nounwind {
125; CHECK-LABEL: test_v3f32:
126; CHECK:       @ %bb.0:
127; CHECK-NEXT:    vmov d3, r2, r3
128; CHECK-NEXT:    vldr s0, [sp]
129; CHECK-NEXT:    vmov d2, r0, r1
130; CHECK-NEXT:    vadd.f32 s0, s0, s4
131; CHECK-NEXT:    vadd.f32 s0, s0, s5
132; CHECK-NEXT:    vadd.f32 s0, s0, s6
133; CHECK-NEXT:    vmov r0, s0
134; CHECK-NEXT:    mov pc, lr
135  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
136  ret float %b
137}
138
139define float @test_v3f32_neutral(<3 x float> %a) nounwind {
140; CHECK-LABEL: test_v3f32_neutral:
141; CHECK:       @ %bb.0:
142; CHECK-NEXT:    vmov d1, r2, r3
143; CHECK-NEXT:    vmov d0, r0, r1
144; CHECK-NEXT:    vadd.f32 s4, s0, s1
145; CHECK-NEXT:    vadd.f32 s0, s4, s2
146; CHECK-NEXT:    vmov r0, s0
147; CHECK-NEXT:    mov pc, lr
148  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
149  ret float %b
150}
151
152define float @test_v5f32(<5 x float> %a, float %s) nounwind {
153; CHECK-LABEL: test_v5f32:
154; CHECK:       @ %bb.0:
155; CHECK-NEXT:    vldr s0, [sp, #4]
156; CHECK-NEXT:    vmov s2, r0
157; CHECK-NEXT:    vadd.f32 s0, s0, s2
158; CHECK-NEXT:    vmov s2, r1
159; CHECK-NEXT:    vadd.f32 s0, s0, s2
160; CHECK-NEXT:    vmov s2, r2
161; CHECK-NEXT:    vadd.f32 s0, s0, s2
162; CHECK-NEXT:    vmov s2, r3
163; CHECK-NEXT:    vadd.f32 s0, s0, s2
164; CHECK-NEXT:    vldr s2, [sp]
165; CHECK-NEXT:    vadd.f32 s0, s0, s2
166; CHECK-NEXT:    vmov r0, s0
167; CHECK-NEXT:    mov pc, lr
168  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
169  ret float %b
170}
171
172define float @test_v5f32_neutral(<5 x float> %a) nounwind {
173; CHECK-LABEL: test_v5f32_neutral:
174; CHECK:       @ %bb.0:
175; CHECK-NEXT:    vmov s0, r1
176; CHECK-NEXT:    vmov s2, r0
177; CHECK-NEXT:    vadd.f32 s0, s2, s0
178; CHECK-NEXT:    vmov s2, r2
179; CHECK-NEXT:    vadd.f32 s0, s0, s2
180; CHECK-NEXT:    vmov s2, r3
181; CHECK-NEXT:    vadd.f32 s0, s0, s2
182; CHECK-NEXT:    vldr s2, [sp]
183; CHECK-NEXT:    vadd.f32 s0, s0, s2
184; CHECK-NEXT:    vmov r0, s0
185; CHECK-NEXT:    mov pc, lr
186  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
187  ret float %b
188}
189
190define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
191; CHECK-LABEL: test_v2f128:
192; CHECK:       @ %bb.0:
193; CHECK-NEXT:    .save {r4, r5, r11, lr}
194; CHECK-NEXT:    push {r4, r5, r11, lr}
195; CHECK-NEXT:    .pad #16
196; CHECK-NEXT:    sub sp, sp, #16
197; CHECK-NEXT:    ldr r12, [sp, #48]
198; CHECK-NEXT:    ldr lr, [sp, #52]
199; CHECK-NEXT:    ldr r4, [sp, #56]
200; CHECK-NEXT:    ldr r5, [sp, #60]
201; CHECK-NEXT:    stm sp, {r0, r1, r2, r3}
202; CHECK-NEXT:    mov r0, r12
203; CHECK-NEXT:    mov r1, lr
204; CHECK-NEXT:    mov r2, r4
205; CHECK-NEXT:    mov r3, r5
206; CHECK-NEXT:    bl __addtf3
207; CHECK-NEXT:    ldr r4, [sp, #32]
208; CHECK-NEXT:    ldr r5, [sp, #40]
209; CHECK-NEXT:    ldr lr, [sp, #44]
210; CHECK-NEXT:    ldr r12, [sp, #36]
211; CHECK-NEXT:    stm sp, {r4, r12}
212; CHECK-NEXT:    str r5, [sp, #8]
213; CHECK-NEXT:    str lr, [sp, #12]
214; CHECK-NEXT:    bl __addtf3
215; CHECK-NEXT:    add sp, sp, #16
216; CHECK-NEXT:    pop {r4, r5, r11, lr}
217; CHECK-NEXT:    mov pc, lr
218  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
219  ret fp128 %b
220}
221
222define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
223; CHECK-LABEL: test_v2f128_neutral:
224; CHECK:       @ %bb.0:
225; CHECK-NEXT:    .save {r4, r5, r11, lr}
226; CHECK-NEXT:    push {r4, r5, r11, lr}
227; CHECK-NEXT:    .pad #16
228; CHECK-NEXT:    sub sp, sp, #16
229; CHECK-NEXT:    ldr r12, [sp, #36]
230; CHECK-NEXT:    ldr lr, [sp, #32]
231; CHECK-NEXT:    ldr r4, [sp, #40]
232; CHECK-NEXT:    ldr r5, [sp, #44]
233; CHECK-NEXT:    str lr, [sp]
234; CHECK-NEXT:    str r12, [sp, #4]
235; CHECK-NEXT:    str r4, [sp, #8]
236; CHECK-NEXT:    str r5, [sp, #12]
237; CHECK-NEXT:    bl __addtf3
238; CHECK-NEXT:    add sp, sp, #16
239; CHECK-NEXT:    pop {r4, r5, r11, lr}
240; CHECK-NEXT:    mov pc, lr
241  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
242  ret fp128 %b
243}
244
245define float @test_v16f32(<16 x float> %a, float %s) nounwind {
246; CHECK-LABEL: test_v16f32:
247; CHECK:       @ %bb.0:
248; CHECK-NEXT:    vmov d3, r2, r3
249; CHECK-NEXT:    vldr s0, [sp, #48]
250; CHECK-NEXT:    vmov d2, r0, r1
251; CHECK-NEXT:    mov r0, sp
252; CHECK-NEXT:    vadd.f32 s0, s0, s4
253; CHECK-NEXT:    vadd.f32 s0, s0, s5
254; CHECK-NEXT:    vadd.f32 s0, s0, s6
255; CHECK-NEXT:    vadd.f32 s0, s0, s7
256; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
257; CHECK-NEXT:    add r0, sp, #16
258; CHECK-NEXT:    vadd.f32 s0, s0, s4
259; CHECK-NEXT:    vadd.f32 s0, s0, s5
260; CHECK-NEXT:    vadd.f32 s0, s0, s6
261; CHECK-NEXT:    vadd.f32 s0, s0, s7
262; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
263; CHECK-NEXT:    add r0, sp, #32
264; CHECK-NEXT:    vadd.f32 s0, s0, s4
265; CHECK-NEXT:    vadd.f32 s0, s0, s5
266; CHECK-NEXT:    vadd.f32 s0, s0, s6
267; CHECK-NEXT:    vadd.f32 s0, s0, s7
268; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
269; CHECK-NEXT:    vadd.f32 s0, s0, s4
270; CHECK-NEXT:    vadd.f32 s0, s0, s5
271; CHECK-NEXT:    vadd.f32 s0, s0, s6
272; CHECK-NEXT:    vadd.f32 s0, s0, s7
273; CHECK-NEXT:    vmov r0, s0
274; CHECK-NEXT:    mov pc, lr
275  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
276  ret float %b
277}
278
279define float @test_v16f32_neutral(<16 x float> %a) nounwind {
280; CHECK-LABEL: test_v16f32_neutral:
281; CHECK:       @ %bb.0:
282; CHECK-NEXT:    vmov d1, r2, r3
283; CHECK-NEXT:    vmov d0, r0, r1
284; CHECK-NEXT:    mov r0, sp
285; CHECK-NEXT:    vadd.f32 s4, s0, s1
286; CHECK-NEXT:    vadd.f32 s4, s4, s2
287; CHECK-NEXT:    vadd.f32 s0, s4, s3
288; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
289; CHECK-NEXT:    add r0, sp, #16
290; CHECK-NEXT:    vadd.f32 s0, s0, s4
291; CHECK-NEXT:    vadd.f32 s0, s0, s5
292; CHECK-NEXT:    vadd.f32 s0, s0, s6
293; CHECK-NEXT:    vadd.f32 s0, s0, s7
294; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
295; CHECK-NEXT:    add r0, sp, #32
296; CHECK-NEXT:    vadd.f32 s0, s0, s4
297; CHECK-NEXT:    vadd.f32 s0, s0, s5
298; CHECK-NEXT:    vadd.f32 s0, s0, s6
299; CHECK-NEXT:    vadd.f32 s0, s0, s7
300; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
301; CHECK-NEXT:    vadd.f32 s0, s0, s4
302; CHECK-NEXT:    vadd.f32 s0, s0, s5
303; CHECK-NEXT:    vadd.f32 s0, s0, s6
304; CHECK-NEXT:    vadd.f32 s0, s0, s7
305; CHECK-NEXT:    vmov r0, s0
306; CHECK-NEXT:    mov pc, lr
307  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
308  ret float %b
309}
310