xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll (revision 2b1e037adb274c515b6ebe7808cc7da6a5b9c3b3)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3
4define <16 x double> @test(ptr %x, double %v, double %a) {
5; CHECK-LABEL: define <16 x double> @test(
6; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) {
7; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
8; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9
9; CHECK-NEXT:    [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4
10; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4
11; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4
12; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0
13; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer
14; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0
15; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer
16; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0
17; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer
18; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0)
19; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 14, i32 15>
21; CHECK-NEXT:    [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6)
22; CHECK-NEXT:    [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8)
23; CHECK-NEXT:    [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10)
24; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12)
25; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14)
26; CHECK-NEXT:    [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]]
27; CHECK-NEXT:    ret <16 x double> [[TMP18]]
28;
29  %gep1 = getelementptr inbounds double, ptr %x, i64 1
30  %gep2 = getelementptr inbounds double, ptr %x, i64 2
31  %gep3 = getelementptr inbounds double, ptr %x, i64 3
32  %gep4 = getelementptr inbounds double, ptr %x, i64 4
33  %gep5 = getelementptr inbounds double, ptr %x, i64 5
34  %gep6 = getelementptr inbounds double, ptr %x, i64 8
35  %gep7 = getelementptr inbounds double, ptr %x, i64 9
36  %gep8 = getelementptr inbounds double, ptr %x, i64 9
37  %gep9 = getelementptr inbounds double, ptr %x, i64 10
38  %x0 = load double, ptr %x, align 4
39  %x1 = load double, ptr %gep1, align 4
40  %x2 = load double, ptr %gep2, align 4
41  %x3 = load double, ptr %gep3, align 4
42  %x4 = load double, ptr %gep4, align 4
43  %x5 = load double, ptr %gep5, align 4
44  %x6 = load double, ptr %gep6, align 4
45  %x7 = load double, ptr %gep7, align 4
46  %x8 = load double, ptr %gep8, align 4
47  %x9 = load double, ptr %gep9, align 4
48  %add1 = fadd double %a, %x0
49  %add2 = fadd double %a, %x1
50  %add3 = fadd double %a, %x2
51  %add4 = fadd double %a, %x3
52  %add5 = fadd double %a, %x4
53  %add6 = fadd double %a, %x5
54  %add7 = fadd double %a, %x6
55  %add8 = fadd double %a, %x7
56  %add9 = fadd double %a, %x8
57  %add10 = fadd double %a, %x9
58  %add11 = fadd double %a, %v
59  %add12 = fadd double %a, %v
60  %add13 = fadd double %a, %v
61  %add14 = fadd double %a, %v
62  %add15 = fadd double %a, %v
63  %add16 = fadd double %a, %v
64  %i0 = insertelement <16 x double> poison, double %add1, i32 0
65  %i1 = insertelement <16 x double> %i0, double %add2, i32 1
66  %i2 = insertelement <16 x double> %i1, double %add3, i32 2
67  %i3 = insertelement <16 x double> %i2, double %add4, i32 3
68  %i4 = insertelement <16 x double> %i3, double %add5, i32 4
69  %i5 = insertelement <16 x double> %i4, double %add6, i32 5
70  %i6 = insertelement <16 x double> %i5, double %add7, i32 6
71  %i7 = insertelement <16 x double> %i6, double %add8, i32 7
72  %i8 = insertelement <16 x double> %i7, double %add9, i32 8
73  %i9 = insertelement <16 x double> %i8, double %add10, i32 9
74  %i10 = insertelement <16 x double> %i9, double %add11, i32 10
75  %i11 = insertelement <16 x double> %i10, double %add12, i32 11
76  %i12 = insertelement <16 x double> %i11, double %add13, i32 12
77  %i13 = insertelement <16 x double> %i12, double %add14, i32 13
78  %i14 = insertelement <16 x double> %i13, double %add15, i32 14
79  %i15 = insertelement <16 x double> %i14, double %add16, i32 15
80  ret <16 x double> %i15
81}
82