xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll (revision 8b56da5e9f3ba737a5ff4bf5dee654416849042f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
8
9define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
10; CHECK-LABEL: @fadd_fsub_v8f32(
11; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
12; CHECK-NEXT:    [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
13; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
14; CHECK-NEXT:    ret <8 x float> [[TMP3]]
15;
16  %a0 = extractelement <8 x float> %a, i32 0
17  %a1 = extractelement <8 x float> %a, i32 1
18  %a2 = extractelement <8 x float> %a, i32 2
19  %a3 = extractelement <8 x float> %a, i32 3
20  %a4 = extractelement <8 x float> %a, i32 4
21  %a5 = extractelement <8 x float> %a, i32 5
22  %a6 = extractelement <8 x float> %a, i32 6
23  %a7 = extractelement <8 x float> %a, i32 7
24  %b0 = extractelement <8 x float> %b, i32 0
25  %b1 = extractelement <8 x float> %b, i32 1
26  %b2 = extractelement <8 x float> %b, i32 2
27  %b3 = extractelement <8 x float> %b, i32 3
28  %b4 = extractelement <8 x float> %b, i32 4
29  %b5 = extractelement <8 x float> %b, i32 5
30  %b6 = extractelement <8 x float> %b, i32 6
31  %b7 = extractelement <8 x float> %b, i32 7
32  %ab0 = fadd float %a0, %b0
33  %ab1 = fsub float %a1, %b1
34  %ab2 = fsub float %a2, %b2
35  %ab3 = fadd float %a3, %b3
36  %ab4 = fadd float %a4, %b4
37  %ab5 = fsub float %a5, %b5
38  %ab6 = fsub float %a6, %b6
39  %ab7 = fadd float %a7, %b7
40  %r0 = insertelement <8 x float> undef, float %ab0, i32 0
41  %r1 = insertelement <8 x float>   %r0, float %ab1, i32 1
42  %r2 = insertelement <8 x float>   %r1, float %ab2, i32 2
43  %r3 = insertelement <8 x float>   %r2, float %ab3, i32 3
44  %r4 = insertelement <8 x float>   %r3, float %ab4, i32 4
45  %r5 = insertelement <8 x float>   %r4, float %ab5, i32 5
46  %r6 = insertelement <8 x float>   %r5, float %ab6, i32 6
47  %r7 = insertelement <8 x float>   %r6, float %ab7, i32 7
48  ret <8 x float> %r7
49}
50
51define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
52; CHECK-LABEL: @fmul_fdiv_v8f32(
53; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
54; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
55; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
56; CHECK-NEXT:    ret <8 x float> [[TMP3]]
57;
58  %a0 = extractelement <8 x float> %a, i32 0
59  %a1 = extractelement <8 x float> %a, i32 1
60  %a2 = extractelement <8 x float> %a, i32 2
61  %a3 = extractelement <8 x float> %a, i32 3
62  %a4 = extractelement <8 x float> %a, i32 4
63  %a5 = extractelement <8 x float> %a, i32 5
64  %a6 = extractelement <8 x float> %a, i32 6
65  %a7 = extractelement <8 x float> %a, i32 7
66  %b0 = extractelement <8 x float> %b, i32 0
67  %b1 = extractelement <8 x float> %b, i32 1
68  %b2 = extractelement <8 x float> %b, i32 2
69  %b3 = extractelement <8 x float> %b, i32 3
70  %b4 = extractelement <8 x float> %b, i32 4
71  %b5 = extractelement <8 x float> %b, i32 5
72  %b6 = extractelement <8 x float> %b, i32 6
73  %b7 = extractelement <8 x float> %b, i32 7
74  %ab0 = fmul float %a0, %b0
75  %ab1 = fdiv float %a1, %b1
76  %ab2 = fdiv float %a2, %b2
77  %ab3 = fmul float %a3, %b3
78  %ab4 = fmul float %a4, %b4
79  %ab5 = fdiv float %a5, %b5
80  %ab6 = fdiv float %a6, %b6
81  %ab7 = fmul float %a7, %b7
82  %r0 = insertelement <8 x float> undef, float %ab0, i32 0
83  %r1 = insertelement <8 x float>   %r0, float %ab1, i32 1
84  %r2 = insertelement <8 x float>   %r1, float %ab2, i32 2
85  %r3 = insertelement <8 x float>   %r2, float %ab3, i32 3
86  %r4 = insertelement <8 x float>   %r3, float %ab4, i32 4
87  %r5 = insertelement <8 x float>   %r4, float %ab5, i32 5
88  %r6 = insertelement <8 x float>   %r5, float %ab6, i32 6
89  %r7 = insertelement <8 x float>   %r6, float %ab7, i32 7
90  ret <8 x float> %r7
91}
92
93define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
94; SSE-LABEL: @fmul_fdiv_v4f32_const(
95; SSE-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
96; SSE-NEXT:    ret <4 x float> [[TMP1]]
97;
98; SLM-LABEL: @fmul_fdiv_v4f32_const(
99; SLM-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i64 2
100; SLM-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3
101; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
102; SLM-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
103; SLM-NEXT:    [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
104; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
105; SLM-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i64 2
106; SLM-NEXT:    [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i64 3
107; SLM-NEXT:    ret <4 x float> [[R3]]
108;
109; AVX-LABEL: @fmul_fdiv_v4f32_const(
110; AVX-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
111; AVX-NEXT:    ret <4 x float> [[TMP1]]
112;
113; AVX512-LABEL: @fmul_fdiv_v4f32_const(
114; AVX512-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
115; AVX512-NEXT:    ret <4 x float> [[TMP1]]
116;
117  %a0 = extractelement <4 x float> %a, i32 0
118  %a1 = extractelement <4 x float> %a, i32 1
119  %a2 = extractelement <4 x float> %a, i32 2
120  %a3 = extractelement <4 x float> %a, i32 3
121  %ab0 = fmul float %a0, 2.0
122  %ab1 = fmul float %a1, 1.0
123  %ab2 = fdiv float %a2, 1.0
124  %ab3 = fdiv float %a3, 0.5
125  %r0 = insertelement <4 x float> undef, float %ab0, i32 0
126  %r1 = insertelement <4 x float>   %r0, float %ab1, i32 1
127  %r2 = insertelement <4 x float>   %r1, float %ab2, i32 2
128  %r3 = insertelement <4 x float>   %r2, float %ab3, i32 3
129  ret <4 x float> %r3
130}
131