xref: /llvm-project/llvm/test/CodeGen/AArch64/fadd-combines.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
3
4define double @test1(double %a, double %b) {
5; CHECK-LABEL: test1:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    fadd d1, d1, d1
8; CHECK-NEXT:    fsub d0, d0, d1
9; CHECK-NEXT:    ret
10  %mul = fmul double %b, -2.000000e+00
11  %add1 = fadd double %a, %mul
12  ret double %add1
13}
14
15; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b'
16
17define double @test2(double %a, double %b) {
18; CHECK-LABEL: test2:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    fadd d1, d1, d1
21; CHECK-NEXT:    fsub d0, d0, d1
22; CHECK-NEXT:    ret
23  %mul = fmul double %b, 2.000000e+00
24  %add1 = fsub double %a, %mul
25  ret double %add1
26}
27
28define double @test3(double %a, double %b, double %c) {
29; CHECK-LABEL: test3:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    fmul d0, d0, d1
32; CHECK-NEXT:    fadd d1, d2, d2
33; CHECK-NEXT:    fsub d0, d0, d1
34; CHECK-NEXT:    ret
35  %mul = fmul double %a, %b
36  %mul1 = fmul double %c, 2.000000e+00
37  %sub = fsub double %mul, %mul1
38  ret double %sub
39}
40
41define double @test4(double %a, double %b, double %c) {
42; CHECK-LABEL: test4:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    fmul d0, d0, d1
45; CHECK-NEXT:    fadd d1, d2, d2
46; CHECK-NEXT:    fsub d0, d0, d1
47; CHECK-NEXT:    ret
48  %mul = fmul double %a, %b
49  %mul1 = fmul double %c, -2.000000e+00
50  %add2 = fadd double %mul, %mul1
51  ret double %add2
52}
53
54define <4 x float> @fmulnegtwo_vec(<4 x float> %a, <4 x float> %b) {
55; CHECK-LABEL: fmulnegtwo_vec:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    fadd v1.4s, v1.4s, v1.4s
58; CHECK-NEXT:    fsub v0.4s, v0.4s, v1.4s
59; CHECK-NEXT:    ret
60  %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0>
61  %add = fadd <4 x float> %a, %mul
62  ret <4 x float> %add
63}
64
65define <4 x float> @fmulnegtwo_vec_commute(<4 x float> %a, <4 x float> %b) {
66; CHECK-LABEL: fmulnegtwo_vec_commute:
67; CHECK:       // %bb.0:
68; CHECK-NEXT:    fadd v1.4s, v1.4s, v1.4s
69; CHECK-NEXT:    fsub v0.4s, v0.4s, v1.4s
70; CHECK-NEXT:    ret
71  %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0>
72  %add = fadd <4 x float> %mul, %a
73  ret <4 x float> %add
74}
75
76define <4 x float> @fmulnegtwo_vec_undefs(<4 x float> %a, <4 x float> %b) {
77; CHECK-LABEL: fmulnegtwo_vec_undefs:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    fadd v1.4s, v1.4s, v1.4s
80; CHECK-NEXT:    fsub v0.4s, v0.4s, v1.4s
81; CHECK-NEXT:    ret
82  %mul = fmul <4 x float> %b, <float undef, float -2.0, float undef, float -2.0>
83  %add = fadd <4 x float> %a, %mul
84  ret <4 x float> %add
85}
86
87define <4 x float> @fmulnegtwo_vec_commute_undefs(<4 x float> %a, <4 x float> %b) {
88; CHECK-LABEL: fmulnegtwo_vec_commute_undefs:
89; CHECK:       // %bb.0:
90; CHECK-NEXT:    fadd v1.4s, v1.4s, v1.4s
91; CHECK-NEXT:    fsub v0.4s, v0.4s, v1.4s
92; CHECK-NEXT:    ret
93  %mul = fmul <4 x float> %b, <float -2.0, float undef, float -2.0, float -2.0>
94  %add = fadd <4 x float> %mul, %a
95  ret <4 x float> %add
96}
97
98define <4 x float> @test6(<4 x float> %a, <4 x float> %b) {
99; CHECK-LABEL: test6:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    fadd v1.4s, v1.4s, v1.4s
102; CHECK-NEXT:    fsub v0.4s, v0.4s, v1.4s
103; CHECK-NEXT:    ret
104  %mul = fmul <4 x float> %b, <float 2.0, float 2.0, float 2.0, float 2.0>
105  %add = fsub <4 x float> %a, %mul
106  ret <4 x float> %add
107}
108
109; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has
110; multiple uses.
111
112define double @test7(double %a, double %b) nounwind {
113; CHECK-LABEL: test7:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
116; CHECK-NEXT:    fmov d2, #-2.00000000
117; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
118; CHECK-NEXT:    fmul d1, d1, d2
119; CHECK-NEXT:    fadd d8, d0, d1
120; CHECK-NEXT:    fmov d0, d1
121; CHECK-NEXT:    bl use
122; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
123; CHECK-NEXT:    fmov d0, d8
124; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
125; CHECK-NEXT:    ret
126  %mul = fmul double %b, -2.000000e+00
127  %add1 = fadd double %a, %mul
128  call void @use(double %mul)
129  ret double %add1
130}
131
132define float @fadd_const_multiuse_fmf(float %x) {
133; CHECK-LABEL: fadd_const_multiuse_fmf:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
136; CHECK-NEXT:    mov w9, #1114374144 // =0x426c0000
137; CHECK-NEXT:    fmov s1, w8
138; CHECK-NEXT:    fmov s2, w9
139; CHECK-NEXT:    fadd s1, s0, s1
140; CHECK-NEXT:    fadd s0, s0, s2
141; CHECK-NEXT:    fadd s0, s1, s0
142; CHECK-NEXT:    ret
143  %a1 = fadd float %x, 42.0
144  %a2 = fadd nsz reassoc float %a1, 17.0
145  %a3 = fadd float %a1, %a2
146  ret float %a3
147}
148
149; DAGCombiner transforms this into: (x + 17.0) + (x + 59.0).
150define float @fadd_const_multiuse_attr(float %x) {
151; CHECK-LABEL: fadd_const_multiuse_attr:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
154; CHECK-NEXT:    mov w9, #1114374144 // =0x426c0000
155; CHECK-NEXT:    fmov s1, w8
156; CHECK-NEXT:    fmov s2, w9
157; CHECK-NEXT:    fadd s1, s0, s1
158; CHECK-NEXT:    fadd s0, s0, s2
159; CHECK-NEXT:    fadd s0, s1, s0
160; CHECK-NEXT:    ret
161  %a1 = fadd fast float %x, 42.0
162  %a2 = fadd fast float %a1, 17.0
163  %a3 = fadd fast float %a1, %a2
164  ret float %a3
165}
166
167; PR32939 - https://bugs.llvm.org/show_bug.cgi?id=32939
168
169define double @fmul2_negated(double %a, double %b, double %c) {
170; CHECK-LABEL: fmul2_negated:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    fadd d1, d1, d1
173; CHECK-NEXT:    fmul d1, d1, d2
174; CHECK-NEXT:    fsub d0, d0, d1
175; CHECK-NEXT:    ret
176  %mul = fmul double %b, 2.0
177  %mul1 = fmul double %mul, %c
178  %sub = fsub double %a, %mul1
179  ret double %sub
180}
181
182define <2 x double> @fmul2_negated_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
183; CHECK-LABEL: fmul2_negated_vec:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    fadd v1.2d, v1.2d, v1.2d
186; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
187; CHECK-NEXT:    fsub v0.2d, v0.2d, v1.2d
188; CHECK-NEXT:    ret
189  %mul = fmul <2 x double> %b, <double 2.0, double 2.0>
190  %mul1 = fmul <2 x double> %mul, %c
191  %sub = fsub <2 x double> %a, %mul1
192  ret <2 x double> %sub
193}
194
195; ((a*b) + (c*d)) + n1 --> (a*b) + ((c*d) + n1)
196
197define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, double %n1) nounwind {
198; CHECK-LABEL: fadd_fma_fmul_1:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    fmadd d2, d2, d3, d4
201; CHECK-NEXT:    fmadd d0, d0, d1, d2
202; CHECK-NEXT:    ret
203  %m1 = fmul fast double %a, %b
204  %m2 = fmul fast double %c, %d
205  %a1 = fadd fast double %m1, %m2
206  %a2 = fadd fast double %a1, %n1
207  ret double %a2
208}
209
210; Minimum FMF - the 1st fadd is contracted because that combines
211; fmul+fadd as specified by the order of operations; the 2nd fadd
212; requires reassociation to fuse with c*d.
213
214define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind {
215; CHECK-LABEL: fadd_fma_fmul_fmf:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    fmadd s2, s2, s3, s4
218; CHECK-NEXT:    fmadd s0, s0, s1, s2
219; CHECK-NEXT:    ret
220  %m1 = fmul contract float %a, %b
221  %m2 = fmul contract float %c, %d
222  %a1 = fadd contract float %m1, %m2
223  %a2 = fadd contract reassoc float %n0, %a1
224  ret float %a2
225}
226
227; Not minimum FMF.
228
229define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind {
230; CHECK-LABEL: fadd_fma_fmul_2:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    fmul s2, s2, s3
233; CHECK-NEXT:    fmadd s0, s0, s1, s2
234; CHECK-NEXT:    fadd s0, s4, s0
235; CHECK-NEXT:    ret
236  %m1 = fmul float %a, %b
237  %m2 = fmul float %c, %d
238  %a1 = fadd contract float %m1, %m2
239  %a2 = fadd contract float %n0, %a1
240  ret float %a2
241}
242
243; The final fadd can be folded with either 1 of the leading fmuls.
244
245define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x double> %x3, <2 x double> %x4, <2 x double> %x5, <2 x double> %x6, <2 x double> %x7, <2 x double> %x8) nounwind {
246; CHECK-LABEL: fadd_fma_fmul_3:
247; CHECK:       // %bb.0:
248; CHECK-NEXT:    fmul v2.2d, v2.2d, v3.2d
249; CHECK-NEXT:    fmla v2.2d, v1.2d, v0.2d
250; CHECK-NEXT:    fmla v2.2d, v7.2d, v6.2d
251; CHECK-NEXT:    fmla v2.2d, v5.2d, v4.2d
252; CHECK-NEXT:    mov v0.16b, v2.16b
253; CHECK-NEXT:    ret
254  %m1 = fmul fast <2 x double> %x1, %x2
255  %m2 = fmul fast <2 x double> %x3, %x4
256  %m3 = fmul fast <2 x double> %x5, %x6
257  %m4 = fmul fast <2 x double> %x7, %x8
258  %a1 = fadd fast <2 x double> %m1, %m2
259  %a2 = fadd fast <2 x double> %m3, %m4
260  %a3 = fadd fast <2 x double> %a1, %a2
261  ret <2 x double> %a3
262}
263
264; negative test
265
266define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
267; CHECK-LABEL: fadd_fma_fmul_extra_use_1:
268; CHECK:       // %bb.0:
269; CHECK-NEXT:    fmul s1, s0, s1
270; CHECK-NEXT:    fmadd s0, s2, s3, s1
271; CHECK-NEXT:    str s1, [x0]
272; CHECK-NEXT:    fadd s0, s4, s0
273; CHECK-NEXT:    ret
274  %m1 = fmul fast float %a, %b
275  store float %m1, ptr %p
276  %m2 = fmul fast float %c, %d
277  %a1 = fadd fast float %m1, %m2
278  %a2 = fadd fast float %n0, %a1
279  ret float %a2
280}
281
282; negative test
283
284define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
285; CHECK-LABEL: fadd_fma_fmul_extra_use_2:
286; CHECK:       // %bb.0:
287; CHECK-NEXT:    fmul s2, s2, s3
288; CHECK-NEXT:    fmadd s0, s0, s1, s2
289; CHECK-NEXT:    str s2, [x0]
290; CHECK-NEXT:    fadd s0, s4, s0
291; CHECK-NEXT:    ret
292  %m1 = fmul fast float %a, %b
293  %m2 = fmul fast float %c, %d
294  store float %m2, ptr %p
295  %a1 = fadd fast float %m1, %m2
296  %a2 = fadd fast float %n0, %a1
297  ret float %a2
298}
299
300; negative test
301
302define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
303; CHECK-LABEL: fadd_fma_fmul_extra_use_3:
304; CHECK:       // %bb.0:
305; CHECK-NEXT:    fmul s2, s2, s3
306; CHECK-NEXT:    fmadd s1, s0, s1, s2
307; CHECK-NEXT:    fadd s0, s4, s1
308; CHECK-NEXT:    str s1, [x0]
309; CHECK-NEXT:    ret
310  %m1 = fmul fast float %a, %b
311  %m2 = fmul fast float %c, %d
312  %a1 = fadd fast float %m1, %m2
313  store float %a1, ptr %p
314  %a2 = fadd fast float %n0, %a1
315  ret float %a2
316}
317
318define float @fmac_sequence_innermost_fmul(float %a, float %b, float %c, float %d, float %e, float %f, float %g) {
319; CHECK-LABEL: fmac_sequence_innermost_fmul:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    fmadd s0, s0, s1, s6
322; CHECK-NEXT:    fmadd s0, s2, s3, s0
323; CHECK-NEXT:    fmadd s0, s4, s5, s0
324; CHECK-NEXT:    ret
325  %t0 = fmul float %a, %b
326  %t1 = fmul contract float %c, %d
327  %t2 = fadd contract float %t0, %t1
328  %t3 = fmul contract float %e, %f
329  %t4 = fadd contract float %t2, %t3
330  %t5 = fadd contract reassoc float %t4, %g
331  ret float %t5
332}
333
334define float @fmac_sequence_innermost_fmul_intrinsics(float %a, float %b, float %c, float %d, float %e, float %f, float %g) {
335; CHECK-LABEL: fmac_sequence_innermost_fmul_intrinsics:
336; CHECK:       // %bb.0:
337; CHECK-NEXT:    fmadd s0, s0, s1, s6
338; CHECK-NEXT:    fmadd s0, s2, s3, s0
339; CHECK-NEXT:    fmadd s0, s4, s5, s0
340; CHECK-NEXT:    ret
341  %t0 = fmul float %a, %b
342  %t1 = call float @llvm.fma.f32(float %c, float %d, float %t0)
343  %t2 = call float @llvm.fma.f32(float %e, float %f, float %t1)
344  %t5 = fadd contract reassoc float %t2, %g
345  ret float %t5
346}
347
348declare float @llvm.fma.f32(float, float, float)
349
350declare void @use(double)
351
352