xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll (revision 7d01a8f2b9ac28ffe73bef4b513d383d3edf34b5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -pass-remarks-output=%t < %s | FileCheck %s
3; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
4
5; YAML-LABEL: --- !Passed
6; YAML-NEXT: Pass:            slp-vectorizer
7; YAML-NEXT: Name:            VectorizedHorizontalReduction
8; YAML-NEXT: Function:        test
9; YAML-NEXT: Args:
10; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
11; YAML-NEXT:   - Cost:            '-41'
12; YAML-NEXT:   - String:          ' and with tree size '
13; YAML-NEXT:   - TreeSize:        '7'
14; YAML-NEXT: ...
15
16define i64 @test() {
17; CHECK-LABEL: define i64 @test(
18; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
19; CHECK-NEXT:  [[ENTRY:.*:]]
20; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 1, i32 0>, i32 0, i32 6
21; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
22; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 24)
23; CHECK-NEXT:    [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]]
24; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP3]])
25; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP4]], 0
26; CHECK-NEXT:    [[RES:%.*]] = sext i32 [[OP_RDX]] to i64
27; CHECK-NEXT:    ret i64 [[RES]]
28;
29entry:
30  %.neg15 = sub i32 0, 0
31  %0 = trunc i64 1 to i32
32  %.neg.1 = sub i32 0, 0
33  %.neg15.1 = sub i32 0, %0
34  %.neg16.1 = add i32 %.neg.1, %.neg15.1
35  %1 = add i32 %.neg16.1, %.neg15
36  %2 = trunc i64 0 to i32
37  %.neg.2 = sub i32 0, %0
38  %.neg15.2 = sub i32 0, %2
39  %.neg16.2 = add i32 %.neg.2, %.neg15.2
40  %3 = add i32 %.neg16.2, %1
41  %4 = trunc i64 0 to i32
42  %.neg.3 = sub i32 0, %2
43  %.neg15.3 = sub i32 0, %4
44  %.neg16.3 = add i32 %.neg.3, %.neg15.3
45  %5 = add i32 %.neg16.3, %3
46  %6 = trunc i64 0 to i32
47  %.neg.4 = sub i32 0, %4
48  %.neg15.4 = sub i32 0, %6
49  %.neg16.4 = add i32 %.neg.4, %.neg15.4
50  %7 = add i32 %.neg16.4, %5
51  %.neg.5 = sub i32 0, %6
52  %.neg15.5 = sub i32 0, 0
53  %.neg16.5 = add i32 %.neg.5, %.neg15.5
54  %8 = add i32 %.neg16.5, %7
55  %.neg15.6 = sub i32 0, 0
56  %.neg16.6 = add i32 0, %.neg15.6
57  %9 = add i32 %.neg16.6, %8
58  %.neg.7 = sub i32 0, 0
59  %.neg15.7 = sub i32 0, 0
60  %.neg16.7 = add i32 %.neg.7, %.neg15.7
61  %10 = add i32 %.neg16.7, %9
62  %11 = trunc i64 0 to i32
63  %.neg.8 = sub i32 0, 0
64  %.neg15.8 = sub i32 0, %11
65  %.neg16.8 = add i32 %.neg.8, %.neg15.8
66  %12 = add i32 %.neg16.8, %10
67  %13 = trunc i64 0 to i32
68  %.neg.9 = sub i32 0, %11
69  %.neg15.9 = sub i32 0, %13
70  %.neg16.9 = add i32 %.neg.9, %.neg15.9
71  %14 = add i32 %.neg16.9, %12
72  %15 = trunc i64 0 to i32
73  %.neg.10 = sub i32 0, %13
74  %.neg15.10 = sub i32 0, %15
75  %.neg16.10 = add i32 %.neg.10, %.neg15.10
76  %16 = add i32 %.neg16.10, %14
77  %17 = trunc i64 0 to i32
78  %.neg.11 = sub i32 0, %15
79  %.neg15.11 = sub i32 0, %17
80  %.neg16.11 = add i32 %.neg.11, %.neg15.11
81  %18 = add i32 %.neg16.11, %16
82  %19 = trunc i64 0 to i32
83  %.neg.12 = sub i32 0, %17
84  %.neg15.12 = sub i32 0, %19
85  %.neg16.12 = add i32 %.neg.12, %.neg15.12
86  %20 = add i32 %.neg16.12, %18
87  %.neg.13 = sub i32 0, %19
88  %.neg15.13 = sub i32 0, 0
89  %.neg16.13 = add i32 %.neg.13, %.neg15.13
90  %21 = add i32 %.neg16.13, %20
91  %.neg.14 = sub i32 0, 0
92  %.neg15.14 = sub i32 0, 0
93  %.neg16.14 = add i32 %.neg.14, %.neg15.14
94  %22 = add i32 %.neg16.14, %21
95  %.neg.15 = sub i32 0, 0
96  %.neg15.15 = sub i32 0, 0
97  %.neg16.15 = add i32 %.neg.15, %.neg15.15
98  %23 = add i32 %.neg16.15, %22
99  %.neg.16 = sub i32 0, 0
100  %.neg15.16 = sub i32 0, 0
101  %.neg16.16 = add i32 %.neg.16, %.neg15.16
102  %24 = add i32 %.neg16.16, %23
103  %res = sext i32 %24 to i64
104  ret i64 %res
105}
106