xref: /llvm-project/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4; Same as vecreduce-fadd-legalization.ll, but without fmf.
5
6declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
7declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
8declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
9declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
10
11declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
12declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
13declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
14declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
15
16define half @test_v1f16(<1 x half> %a, half %s) nounwind {
17; CHECK-LABEL: test_v1f16:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    fcvt s0, h0
20; CHECK-NEXT:    fcvt s1, h1
21; CHECK-NEXT:    fadd s0, s1, s0
22; CHECK-NEXT:    fcvt h0, s0
23; CHECK-NEXT:    ret
24  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
25  ret half %b
26}
27
28define half @test_v1f16_neutral(<1 x half> %a) nounwind {
29; CHECK-LABEL: test_v1f16_neutral:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    ret
32  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
33  ret half %b
34}
35
36define float @test_v1f32(<1 x float> %a, float %s) nounwind {
37; CHECK-LABEL: test_v1f32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
40; CHECK-NEXT:    fadd s0, s1, s0
41; CHECK-NEXT:    ret
42  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
43  ret float %b
44}
45
46define float @test_v1f32_neutral(<1 x float> %a) nounwind {
47; CHECK-LABEL: test_v1f32_neutral:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
50; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
51; CHECK-NEXT:    ret
52  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
53  ret float %b
54}
55
56define double @test_v1f64(<1 x double> %a, double %s) nounwind {
57; CHECK-LABEL: test_v1f64:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    fadd d0, d1, d0
60; CHECK-NEXT:    ret
61  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
62  ret double %b
63}
64
65define double @test_v1f64_neutral(<1 x double> %a) nounwind {
66; CHECK-LABEL: test_v1f64_neutral:
67; CHECK:       // %bb.0:
68; CHECK-NEXT:    ret
69  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
70  ret double %b
71}
72
73define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
74; CHECK-LABEL: test_v1f128:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    mov v2.16b, v0.16b
77; CHECK-NEXT:    mov v0.16b, v1.16b
78; CHECK-NEXT:    mov v1.16b, v2.16b
79; CHECK-NEXT:    b __addtf3
80  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
81  ret fp128 %b
82}
83
84define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
85; CHECK-LABEL: test_v1f128_neutral:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    ret
88  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
89  ret fp128 %b
90}
91
92define float @test_v3f32(<3 x float> %a, float %s) nounwind {
93; CHECK-LABEL: test_v3f32:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    fadd s1, s1, s0
96; CHECK-NEXT:    mov s2, v0.s[1]
97; CHECK-NEXT:    mov s0, v0.s[2]
98; CHECK-NEXT:    fadd s1, s1, s2
99; CHECK-NEXT:    fadd s0, s1, s0
100; CHECK-NEXT:    ret
101  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
102  ret float %b
103}
104
105define float @test_v3f32_neutral(<3 x float> %a) nounwind {
106; CHECK-LABEL: test_v3f32_neutral:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    mov s1, v0.s[2]
109; CHECK-NEXT:    faddp s0, v0.2s
110; CHECK-NEXT:    fadd s0, s0, s1
111; CHECK-NEXT:    ret
112  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
113  ret float %b
114}
115
116define float @test_v5f32(<5 x float> %a, float %s) nounwind {
117; CHECK-LABEL: test_v5f32:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    fadd s0, s5, s0
120; CHECK-NEXT:    fadd s0, s0, s1
121; CHECK-NEXT:    fadd s0, s0, s2
122; CHECK-NEXT:    fadd s0, s0, s3
123; CHECK-NEXT:    fadd s0, s0, s4
124; CHECK-NEXT:    ret
125  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
126  ret float %b
127}
128
129define float @test_v5f32_neutral(<5 x float> %a) nounwind {
130; CHECK-LABEL: test_v5f32_neutral:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    fadd s0, s0, s1
133; CHECK-NEXT:    fadd s0, s0, s2
134; CHECK-NEXT:    fadd s0, s0, s3
135; CHECK-NEXT:    fadd s0, s0, s4
136; CHECK-NEXT:    ret
137  %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
138  ret float %b
139}
140
141define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
142; CHECK-LABEL: test_v2f128:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    sub sp, sp, #32
145; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
146; CHECK-NEXT:    mov v1.16b, v0.16b
147; CHECK-NEXT:    mov v0.16b, v2.16b
148; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
149; CHECK-NEXT:    bl __addtf3
150; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
151; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
152; CHECK-NEXT:    add sp, sp, #32
153; CHECK-NEXT:    b __addtf3
154  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
155  ret fp128 %b
156}
157
158define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
159; CHECK-LABEL: test_v2f128_neutral:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    b __addtf3
162  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
163  ret fp128 %b
164}
165
166define float @test_v16f32(<16 x float> %a, float %s) nounwind {
167; CHECK-LABEL: test_v16f32:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    mov s6, v0.s[1]
170; CHECK-NEXT:    fadd s4, s4, s0
171; CHECK-NEXT:    mov s7, v0.s[2]
172; CHECK-NEXT:    mov s0, v0.s[3]
173; CHECK-NEXT:    mov s5, v2.s[1]
174; CHECK-NEXT:    fadd s4, s4, s6
175; CHECK-NEXT:    mov s6, v1.s[2]
176; CHECK-NEXT:    fadd s4, s4, s7
177; CHECK-NEXT:    fadd s0, s4, s0
178; CHECK-NEXT:    mov s4, v1.s[1]
179; CHECK-NEXT:    fadd s0, s0, s1
180; CHECK-NEXT:    mov s1, v1.s[3]
181; CHECK-NEXT:    fadd s0, s0, s4
182; CHECK-NEXT:    fadd s0, s0, s6
183; CHECK-NEXT:    fadd s0, s0, s1
184; CHECK-NEXT:    mov s1, v2.s[2]
185; CHECK-NEXT:    fadd s0, s0, s2
186; CHECK-NEXT:    mov s2, v2.s[3]
187; CHECK-NEXT:    fadd s0, s0, s5
188; CHECK-NEXT:    fadd s0, s0, s1
189; CHECK-NEXT:    mov s1, v3.s[1]
190; CHECK-NEXT:    fadd s0, s0, s2
191; CHECK-NEXT:    mov s2, v3.s[2]
192; CHECK-NEXT:    fadd s0, s0, s3
193; CHECK-NEXT:    fadd s0, s0, s1
194; CHECK-NEXT:    mov s1, v3.s[3]
195; CHECK-NEXT:    fadd s0, s0, s2
196; CHECK-NEXT:    fadd s0, s0, s1
197; CHECK-NEXT:    ret
198  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
199  ret float %b
200}
201
202define float @test_v16f32_neutral(<16 x float> %a) nounwind {
203; CHECK-LABEL: test_v16f32_neutral:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    mov s5, v0.s[2]
206; CHECK-NEXT:    faddp s6, v0.2s
207; CHECK-NEXT:    mov s0, v0.s[3]
208; CHECK-NEXT:    mov s4, v1.s[1]
209; CHECK-NEXT:    fadd s5, s6, s5
210; CHECK-NEXT:    fadd s0, s5, s0
211; CHECK-NEXT:    mov s5, v1.s[2]
212; CHECK-NEXT:    fadd s0, s0, s1
213; CHECK-NEXT:    mov s1, v1.s[3]
214; CHECK-NEXT:    fadd s0, s0, s4
215; CHECK-NEXT:    mov s4, v2.s[2]
216; CHECK-NEXT:    fadd s0, s0, s5
217; CHECK-NEXT:    fadd s0, s0, s1
218; CHECK-NEXT:    mov s1, v2.s[1]
219; CHECK-NEXT:    fadd s0, s0, s2
220; CHECK-NEXT:    fadd s0, s0, s1
221; CHECK-NEXT:    mov s1, v2.s[3]
222; CHECK-NEXT:    mov s2, v3.s[2]
223; CHECK-NEXT:    fadd s0, s0, s4
224; CHECK-NEXT:    fadd s0, s0, s1
225; CHECK-NEXT:    mov s1, v3.s[1]
226; CHECK-NEXT:    fadd s0, s0, s3
227; CHECK-NEXT:    fadd s0, s0, s1
228; CHECK-NEXT:    mov s1, v3.s[3]
229; CHECK-NEXT:    fadd s0, s0, s2
230; CHECK-NEXT:    fadd s0, s0, s1
231; CHECK-NEXT:    ret
232  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
233  ret float %b
234}
235