xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll (revision 2c7786e94a1058bd4f96794a1d4f70dcb86e5cc5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
3
4define float @dotf(<4 x float> %x, <4 x float> %y) {
5; CHECK-LABEL: @dotf(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]]
8; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]])
9; CHECK-NEXT:    ret float [[TMP1]]
10;
11entry:
12  %vecext = extractelement <4 x float> %x, i32 0
13  %vecext1 = extractelement <4 x float> %y, i32 0
14  %mul = fmul fast float %vecext, %vecext1
15  %vecext.1 = extractelement <4 x float> %x, i32 1
16  %vecext1.1 = extractelement <4 x float> %y, i32 1
17  %mul.1 = fmul fast float %vecext.1, %vecext1.1
18  %add.1 = fadd fast float %mul.1, %mul
19  %vecext.2 = extractelement <4 x float> %x, i32 2
20  %vecext1.2 = extractelement <4 x float> %y, i32 2
21  %mul.2 = fmul fast float %vecext.2, %vecext1.2
22  %add.2 = fadd fast float %mul.2, %add.1
23  %vecext.3 = extractelement <4 x float> %x, i32 3
24  %vecext1.3 = extractelement <4 x float> %y, i32 3
25  %mul.3 = fmul fast float %vecext.3, %vecext1.3
26  %add.3 = fadd fast float %mul.3, %add.2
27  ret float %add.3
28}
29
30define double @dotd(ptr byval(<4 x double>) nocapture readonly align 32, ptr byval(<4 x double>) nocapture readonly align 32) {
31; CHECK-LABEL: @dotd(
32; CHECK-NEXT:  entry:
33; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32
34; CHECK-NEXT:    [[Y:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32
35; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
36; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]])
37; CHECK-NEXT:    ret double [[TMP3]]
38;
39entry:
40  %x = load <4 x double>, ptr %0, align 32
41  %y = load <4 x double>, ptr %1, align 32
42  %vecext = extractelement <4 x double> %x, i32 0
43  %vecext1 = extractelement <4 x double> %y, i32 0
44  %mul = fmul fast double %vecext, %vecext1
45  %vecext.1 = extractelement <4 x double> %x, i32 1
46  %vecext1.1 = extractelement <4 x double> %y, i32 1
47  %mul.1 = fmul fast double %vecext.1, %vecext1.1
48  %add.1 = fadd fast double %mul.1, %mul
49  %vecext.2 = extractelement <4 x double> %x, i32 2
50  %vecext1.2 = extractelement <4 x double> %y, i32 2
51  %mul.2 = fmul fast double %vecext.2, %vecext1.2
52  %add.2 = fadd fast double %mul.2, %add.1
53  %vecext.3 = extractelement <4 x double> %x, i32 3
54  %vecext1.3 = extractelement <4 x double> %y, i32 3
55  %mul.3 = fmul fast double %vecext.3, %vecext1.3
56  %add.3 = fadd fast double %mul.3, %add.2
57  ret double %add.3
58}
59
60define float @dotfq(ptr nocapture readonly %x, ptr nocapture readonly %y) {
61; CHECK-LABEL: @dotfq(
62; CHECK-NEXT:  entry:
63; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 16
64; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Y:%.*]], align 16
65; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
66; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
67; CHECK-NEXT:    ret float [[TMP3]]
68;
69entry:
70  %0 = load <4 x float>, ptr %x, align 16
71  %1 = load <4 x float>, ptr %y, align 16
72  %vecext = extractelement <4 x float> %0, i32 0
73  %vecext1 = extractelement <4 x float> %1, i32 0
74  %mul = fmul fast float %vecext1, %vecext
75  %vecext.1 = extractelement <4 x float> %0, i32 1
76  %vecext1.1 = extractelement <4 x float> %1, i32 1
77  %mul.1 = fmul fast float %vecext1.1, %vecext.1
78  %add.1 = fadd fast float %mul.1, %mul
79  %vecext.2 = extractelement <4 x float> %0, i32 2
80  %vecext1.2 = extractelement <4 x float> %1, i32 2
81  %mul.2 = fmul fast float %vecext1.2, %vecext.2
82  %add.2 = fadd fast float %mul.2, %add.1
83  %vecext.3 = extractelement <4 x float> %0, i32 3
84  %vecext1.3 = extractelement <4 x float> %1, i32 3
85  %mul.3 = fmul fast float %vecext1.3, %vecext.3
86  %add.3 = fadd fast float %mul.3, %add.2
87  ret float %add.3
88}
89
90define double @dotdq(ptr nocapture readonly %x, ptr nocapture readonly %y) {
91; CHECK-LABEL: @dotdq(
92; CHECK-NEXT:  entry:
93; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[X:%.*]], align 32
94; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[Y:%.*]], align 32
95; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
96; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP2]])
97; CHECK-NEXT:    ret double [[TMP3]]
98;
99entry:
100  %0 = load <4 x double>, ptr %x, align 32
101  %1 = load <4 x double>, ptr %y, align 32
102  %vecext = extractelement <4 x double> %0, i32 0
103  %vecext1 = extractelement <4 x double> %1, i32 0
104  %mul = fmul fast double %vecext1, %vecext
105  %vecext.1 = extractelement <4 x double> %0, i32 1
106  %vecext1.1 = extractelement <4 x double> %1, i32 1
107  %mul.1 = fmul fast double %vecext1.1, %vecext.1
108  %add.1 = fadd fast double %mul.1, %mul
109  %vecext.2 = extractelement <4 x double> %0, i32 2
110  %vecext1.2 = extractelement <4 x double> %1, i32 2
111  %mul.2 = fmul fast double %vecext1.2, %vecext.2
112  %add.2 = fadd fast double %mul.2, %add.1
113  %vecext.3 = extractelement <4 x double> %0, i32 3
114  %vecext1.3 = extractelement <4 x double> %1, i32 3
115  %mul.3 = fmul fast double %vecext1.3, %vecext.3
116  %add.3 = fadd fast double %mul.3, %add.2
117  ret double %add.3
118}
119