xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV
3; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s
4; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s
5; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target
6
7define i64 @test(ptr %p) {
8; RISCV-LABEL: @test(
9; RISCV-NEXT:  entry:
10; RISCV-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4
11; RISCV-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4
12; RISCV-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4
13; RISCV-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0>
14; RISCV-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0)
15; RISCV-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4)
16; RISCV-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42)
17; RISCV-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
18; RISCV-NEXT:    ret i64 [[TMP6]]
19;
20; CHECK-LABEL: @test(
21; CHECK-NEXT:  entry:
22; CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4
23; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0>
24; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42)
25; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]])
26; CHECK-NEXT:    ret i64 [[TMP3]]
27;
28entry:
29  %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1
30  %arrayidx.2 = getelementptr inbounds i64, ptr %p, i64 2
31  %arrayidx.3 = getelementptr inbounds i64, ptr %p, i64 3
32  %arrayidx.4 = getelementptr inbounds i64, ptr %p, i64 4
33  %arrayidx.5 = getelementptr inbounds i64, ptr %p, i64 5
34  %tmp = load i64, ptr %p, align 4
35  %mul = mul i64 %tmp, 42
36  %tmp1 = load i64, ptr %arrayidx.1, align 4
37  %mul1 = mul i64 %tmp1, 42
38  %add = add i64 %mul, %mul1
39  %tmp2 = load i64, ptr %arrayidx.2, align 4
40  %mul2 = mul i64 %tmp2, 42
41  %add1 = add i64 %add, %mul2
42  %tmp3 = load i64, ptr %arrayidx.3, align 4
43  %mul3 = mul i64 %tmp3, 42
44  %add2 = add i64 %add1, %mul3
45  %tmp4 = load i64, ptr %arrayidx.4, align 4
46  %mul4 = mul i64 %tmp4, 42
47  %add3 = add i64 %add2, %mul4
48  %tmp5 = load i64, ptr %arrayidx.5, align 4
49  %mul5 = mul i64 %tmp5, 42
50  %add4 = add i64 %add3, %mul5
51  %mul6 = mul i64 %tmp, 42
52  %add5 = add i64 %add4, %mul6
53  %mul7 = mul i64 %tmp, 42
54  %add6 = add i64 %add5, %mul7
55  ret i64 %add6
56}
57