xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-fma.ll (revision acdc419c897f8a9414c7a00c8908ac32312afee2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4
5define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
6;
7; CHECK-LABEL: @test_vfmadd_ss(
8; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
9; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
10; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
11; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
12; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
13; CHECK-NEXT:    ret <4 x float> [[TMP5]]
14;
15  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
16  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
17  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
18  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
19  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
20  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
21  %7 = extractelement <4 x float> %a, i64 0
22  %8 = extractelement <4 x float> %3, i64 0
23  %9 = extractelement <4 x float> %6, i64 0
24  %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
25  %11 = insertelement <4 x float> %a, float %10, i64 0
26  ret <4 x float> %11
27}
28
29define float @test_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
30;
31; CHECK-LABEL: @test_vfmadd_ss_0(
32; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
33; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
34; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
35; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
36; CHECK-NEXT:    ret float [[TMP4]]
37;
38  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
39  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
40  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
41  %4 = extractelement <4 x float> %3, i64 0
42  %5 = extractelement <4 x float> %b, i64 0
43  %6 = extractelement <4 x float> %c, i64 0
44  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
45  %8 = insertelement <4 x float> %3, float %7, i64 0
46  %9 = extractelement <4 x float> %8, i32 0
47  ret float %9
48}
49
50define float @test_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
51;
52; CHECK-LABEL: @test_vfmadd_ss_1(
53; CHECK-NEXT:    ret float 1.000000e+00
54;
55  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
56  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
57  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
58  %4 = extractelement <4 x float> %3, i64 0
59  %5 = extractelement <4 x float> %b, i64 0
60  %6 = extractelement <4 x float> %c, i64 0
61  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
62  %8 = insertelement <4 x float> %3, float %7, i64 0
63  %9 = extractelement <4 x float> %8, i32 1
64  ret float %9
65}
66
67define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
68;
69; CHECK-LABEL: @test_vfmadd_sd(
70; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
71; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
72; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
73; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
74; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
75; CHECK-NEXT:    ret <2 x double> [[TMP5]]
76;
77  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
78  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
79  %3 = extractelement <2 x double> %a, i64 0
80  %4 = extractelement <2 x double> %1, i64 0
81  %5 = extractelement <2 x double> %2, i64 0
82  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
83  %7 = insertelement <2 x double> %a, double %6, i64 0
84  ret <2 x double> %7
85}
86
87define double @test_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
88;
89; CHECK-LABEL: @test_vfmadd_sd_0(
90; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
91; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
92; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
93; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
94; CHECK-NEXT:    ret double [[TMP4]]
95;
96  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
97  %2 = extractelement <2 x double> %1, i64 0
98  %3 = extractelement <2 x double> %b, i64 0
99  %4 = extractelement <2 x double> %c, i64 0
100  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
101  %6 = insertelement <2 x double> %1, double %5, i64 0
102  %7 = extractelement <2 x double> %6, i32 0
103  ret double %7
104}
105
106define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
107;
108; CHECK-LABEL: @test_vfmadd_sd_1(
109; CHECK-NEXT:    ret double 1.000000e+00
110;
111  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
112  %2 = extractelement <2 x double> %1, i64 0
113  %3 = extractelement <2 x double> %b, i64 0
114  %4 = extractelement <2 x double> %c, i64 0
115  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
116  %6 = insertelement <2 x double> %1, double %5, i64 0
117  %7 = extractelement <2 x double> %6, i32 1
118  ret double %7
119}
120
121declare float @llvm.fma.f32(float, float, float)
122declare double @llvm.fma.f64(double, double, double)
123