xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll (revision 5f53e85f8aa92ff0cf7d674a92491e06683f73ef)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
3
4define i32 @foo() {
5; CHECK-LABEL: @foo(
6; CHECK-NEXT:  bb:
7; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
8; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> zeroinitializer, zeroinitializer
9; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
10; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], zeroinitializer
11; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], zeroinitializer
12; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]])
13; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 0, [[TMP5]]
14; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], 0
15; CHECK-NEXT:    [[OP_RDX2:%.*]] = mul i32 [[TMP0]], [[TMP0]]
16; CHECK-NEXT:    [[OP_RDX3:%.*]] = mul i32 [[TMP0]], [[TMP0]]
17; CHECK-NEXT:    [[OP_RDX4:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
18; CHECK-NEXT:    [[OP_RDX5:%.*]] = mul i32 [[OP_RDX3]], [[TMP2]]
19; CHECK-NEXT:    [[OP_RDX6:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
20; CHECK-NEXT:    ret i32 [[OP_RDX6]]
21;
22bb:
23  %inst5 = add i32 0, 0
24  %0 = extractelement <2 x i32> zeroinitializer, i32 0
25  %inst7 = mul i32 %0, %inst5
26  %1 = extractelement <2 x i32> zeroinitializer, i32 0
27  %inst13 = mul i32 %1, %inst7
28  %inst14 = mul i32 %inst13, 0
29  %2 = extractelement <2 x i32> zeroinitializer, i32 0
30  %inst19 = mul i32 %2, %inst14
31  %inst20 = mul i32 %inst19, 0
32  %3 = extractelement <2 x i32> zeroinitializer, i32 0
33  %inst26 = mul i32 %3, %inst20
34  %inst27 = mul i32 %inst26, 0
35  %4 = or <4 x i32> zeroinitializer, zeroinitializer
36  %5 = extractelement <4 x i32> %4, i32 0
37  %inst31 = mul i32 %5, 0
38  %inst32 = add i32 %inst31, 0
39  %inst33 = mul i32 %5, %inst27
40  %inst34 = mul i32 %inst33, %inst32
41  %6 = extractelement <4 x i32> %4, i32 1
42  %inst39 = mul i32 %6, 0
43  %inst40 = add i32 %inst39, 0
44  %inst41 = mul i32 0, %inst34
45  %inst42 = mul i32 %inst41, %inst40
46  %7 = extractelement <4 x i32> %4, i32 2
47  %inst47 = mul i32 %7, 0
48  %inst48 = add i32 %inst47, 0
49  %inst49 = mul i32 0, %inst42
50  %inst50 = mul i32 %inst49, %inst48
51  %8 = extractelement <4 x i32> %4, i32 3
52  %inst55 = mul i32 %8, 0
53  %inst56 = add i32 %inst55, 0
54  %inst57 = mul i32 0, %inst50
55  %inst58 = mul i32 %inst57, %inst56
56  ret i32 %inst58
57}
58
59