xref: /llvm-project/llvm/test/CodeGen/ARM/fusedMAC.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
2; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m7  -fp-contract=fast | FileCheck %s
3; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m4  -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
4; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m33 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
5
6; Check generated fused MAC and MLS.
7
8define arm_aapcs_vfpcc double @fusedMACTest1(double %d1, double %d2, double %d3) {
9;CHECK-LABEL: fusedMACTest1:
10;CHECK: vfma.f64
11  %1 = fmul double %d1, %d2
12  %2 = fadd double %1, %d3
13  ret double %2
14}
15
16define arm_aapcs_vfpcc float @fusedMACTest2(float %f1, float %f2, float %f3) {
17;CHECK-LABEL: fusedMACTest2:
18;CHECK: vfma.f32
19
20;DONT-FUSE-LABEL: fusedMACTest2:
21;DONT-FUSE:       vmul.f32
22;DONT-FUSE-NEXT:  vadd.f32
23
24  %1 = fmul float %f1, %f2
25  %2 = fadd float %1, %f3
26  ret float %2
27}
28
29define arm_aapcs_vfpcc double @fusedMACTest3(double %d1, double %d2, double %d3) {
30;CHECK-LABEL: fusedMACTest3:
31;CHECK: vfms.f64
32  %1 = fmul double %d2, %d3
33  %2 = fsub double %d1, %1
34  ret double %2
35}
36
37define arm_aapcs_vfpcc float @fusedMACTest4(float %f1, float %f2, float %f3) {
38;CHECK-LABEL: fusedMACTest4:
39;CHECK: vfms.f32
40  %1 = fmul float %f2, %f3
41  %2 = fsub float %f1, %1
42  ret float %2
43}
44
45define arm_aapcs_vfpcc double @fusedMACTest5(double %d1, double %d2, double %d3) {
46;CHECK-LABEL: fusedMACTest5:
47;CHECK: vfnma.f64
48  %1 = fmul double %d1, %d2
49  %2 = fsub double -0.0, %1
50  %3 = fsub double %2, %d3
51  ret double %3
52}
53
54define arm_aapcs_vfpcc float @fusedMACTest6(float %f1, float %f2, float %f3) {
55;CHECK-LABEL: fusedMACTest6:
56;CHECK: vfnma.f32
57  %1 = fmul float %f1, %f2
58  %2 = fsub float -0.0, %1
59  %3 = fsub float %2, %f3
60  ret float %3
61}
62
63define arm_aapcs_vfpcc double @fusedMACTest7(double %d1, double %d2, double %d3) {
64;CHECK-LABEL: fusedMACTest7:
65;CHECK: vfnms.f64
66  %1 = fmul double %d1, %d2
67  %2 = fsub double %1, %d3
68  ret double %2
69}
70
71define arm_aapcs_vfpcc float @fusedMACTest8(float %f1, float %f2, float %f3) {
72;CHECK-LABEL: fusedMACTest8:
73;CHECK: vfnms.f32
74  %1 = fmul float %f1, %f2
75  %2 = fsub float %1, %f3
76  ret float %2
77}
78
79define arm_aapcs_vfpcc <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
80;CHECK-LABEL: fusedMACTest9:
81;CHECK: vfma.f32
82  %mul = fmul <2 x float> %a, %b
83  %add = fadd <2 x float> %mul, %a
84  ret <2 x float> %add
85}
86
87define arm_aapcs_vfpcc <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
88;CHECK-LABEL: fusedMACTest10:
89;CHECK: vfms.f32
90  %mul = fmul <2 x float> %a, %b
91  %sub = fsub <2 x float> %a, %mul
92  ret <2 x float> %sub
93}
94
95define arm_aapcs_vfpcc <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
96;CHECK-LABEL: fusedMACTest11:
97;CHECK: vfma.f32
98  %mul = fmul <4 x float> %a, %b
99  %add = fadd <4 x float> %mul, %a
100  ret <4 x float> %add
101}
102
103define arm_aapcs_vfpcc <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
104;CHECK-LABEL: fusedMACTest12:
105;CHECK: vfms.f32
106  %mul = fmul <4 x float> %a, %b
107  %sub = fsub <4 x float> %a, %mul
108  ret <4 x float> %sub
109}
110
111define arm_aapcs_vfpcc float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
112entry:
113; CHECK: test_fma_f32
114; CHECK: vfma.f32
115  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
116  ret float %tmp1
117}
118
119define arm_aapcs_vfpcc double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
120entry:
121; CHECK: test_fma_f64
122; CHECK: vfma.f64
123  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
124  ret double %tmp1
125}
126
127define arm_aapcs_vfpcc <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
128entry:
129; CHECK: test_fma_v2f32
130; CHECK: vfma.f32
131  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
132  ret <2 x float> %tmp1
133}
134
135define arm_aapcs_vfpcc double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
136entry:
137; CHECK: test_fms_f64
138; CHECK: vfms.f64
139  %tmp1 = fsub double -0.0, %a
140  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
141  ret double %tmp2
142}
143
144define arm_aapcs_vfpcc double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
145entry:
146; CHECK: test_fms_f64_2
147; CHECK: vfms.f64
148  %tmp1 = fsub double -0.0, %b
149  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
150  ret double %tmp2
151}
152
153define arm_aapcs_vfpcc float @test_fnms_f32(float %a, float %b, ptr %c) nounwind readnone ssp {
154; CHECK: test_fnms_f32
155; CHECK: vfnms.f32
156  %tmp1 = load float, ptr %c, align 4
157  %tmp2 = fsub float -0.0, %tmp1
158  %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
159  ret float %tmp3
160}
161
162define arm_aapcs_vfpcc double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
163entry:
164; CHECK: test_fnms_f64
165; CHECK: vfnms.f64
166  %tmp1 = fsub double -0.0, %a
167  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
168  %tmp3 = fsub double -0.0, %tmp2
169  ret double %tmp3
170}
171
172define arm_aapcs_vfpcc double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
173entry:
174; CHECK: test_fnms_f64_2
175; CHECK: vfnms.f64
176  %tmp1 = fsub double -0.0, %b
177  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
178  %tmp3 = fsub double -0.0, %tmp2
179  ret double %tmp3
180}
181
182define arm_aapcs_vfpcc double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
183entry:
184; CHECK: test_fnma_f64
185; CHECK: vfnma.f64
186  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
187  %tmp2 = fsub double -0.0, %tmp1
188  ret double %tmp2
189}
190
191define arm_aapcs_vfpcc double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
192entry:
193; CHECK: test_fnma_f64_2
194; CHECK: vfnma.f64
195  %tmp1 = fsub double -0.0, %a
196  %tmp2 = fsub double -0.0, %c
197  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
198  ret double %tmp3
199}
200
201define arm_aapcs_vfpcc float @test_fma_const_fold(float %a, float %b) nounwind {
202; CHECK: test_fma_const_fold
203; CHECK-NOT: vfma
204; CHECK-NOT: vmul
205; CHECK: vadd
206  %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b)
207  ret float %ret
208}
209
210define arm_aapcs_vfpcc float @test_fma_canonicalize(float %a, float %b) nounwind {
211; CHECK: test_fma_canonicalize
212; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00
213; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]]
214  %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b)
215  ret float %ret
216}
217
218; Check that very wide vector fma's can be split into legal fma's.
219define arm_aapcs_vfpcc void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, ptr %p) nounwind readnone ssp {
220; CHECK: test_fma_v8f32
221; CHECK: vfma.f32
222; CHECK: vfma.f32
223entry:
224  %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone
225  store <8 x float> %call, ptr %p, align 16
226  ret void
227}
228
229
230declare float @llvm.fma.f32(float, float, float) nounwind readnone
231declare double @llvm.fma.f64(double, double, double) nounwind readnone
232declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
233declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
234