xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll (revision 2be0abb7fe72ed4537b3eabcd3102d48ea845717)
1; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5
6; Vectorized subsets of the load/store chains in the presence of
7; interleaved loads/stores
8
9; CHECK-LABEL: @interleave_2L_2S(
10; CHECK: load <2 x i32>
11; CHECK: store <2 x i32>
12; CHECK: load <2 x i32>
13define void @interleave_2L_2S(ptr noalias %ptr) {
14  %next.gep1 = getelementptr i32, ptr %ptr, i64 1
15  %next.gep2 = getelementptr i32, ptr %ptr, i64 2
16
17  %l1 = load i32, ptr %next.gep1, align 4
18  %l2 = load i32, ptr %ptr, align 4
19  store i32 0, ptr %next.gep1, align 4
20  store i32 0, ptr %ptr, align 4
21  %l3 = load i32, ptr %next.gep1, align 4
22  %l4 = load i32, ptr %next.gep2, align 4
23
24  ret void
25}
26
27; CHECK-LABEL: @interleave_3L_2S_1L(
28; CHECK: load <2 x i32>
29; CHECK: store <2 x i32>
30; CHECK: load <2 x i32>
31
32define void @interleave_3L_2S_1L(ptr noalias %ptr) {
33  %next.gep1 = getelementptr i32, ptr %ptr, i64 1
34  %next.gep2 = getelementptr i32, ptr %ptr, i64 2
35
36  %l2 = load i32, ptr %ptr, align 4
37  %l1 = load i32, ptr %next.gep1, align 4
38  store i32 0, ptr %next.gep1, align 4
39  store i32 0, ptr %ptr, align 4
40  %l3 = load i32, ptr %next.gep1, align 4
41  %l4 = load i32, ptr %next.gep2, align 4
42
43  ret void
44}
45
46; CHECK-LABEL: @chain_suffix(
47; CHECK: load i32
48; CHECK: store <2 x i32>
49; CHECK: load <2 x i32>
50define void @chain_suffix(ptr noalias %ptr) {
51  %next.gep1 = getelementptr i32, ptr %ptr, i64 1
52  %next.gep2 = getelementptr i32, ptr %ptr, i64 2
53
54  %l2 = load i32, ptr %ptr, align 4
55  store i32 0, ptr %next.gep1, align 4
56  store i32 0, ptr %ptr, align 4
57  %l3 = load i32, ptr %next.gep1, align 4
58  %l4 = load i32, ptr %next.gep2, align 4
59
60  ret void
61}
62
63
64; CHECK-LABEL: @chain_prefix_suffix(
65; CHECK: load <2 x i32>
66; CHECK: store <2 x i32>
67; CHECK: load <3 x i32>
68define void  @chain_prefix_suffix(ptr noalias %ptr) {
69  %next.gep1 = getelementptr i32, ptr %ptr, i64 1
70  %next.gep2 = getelementptr i32, ptr %ptr, i64 2
71  %next.gep3 = getelementptr i32, ptr %ptr, i64 3
72
73  %l1 = load i32, ptr %ptr, align 4
74  %l2 = load i32, ptr %next.gep1, align 4
75  store i32 0, ptr %next.gep1, align 4
76  store i32 0, ptr %next.gep2, align 4
77  %l3 = load i32, ptr %next.gep1, align 4
78  %l4 = load i32, ptr %next.gep2, align 4
79  %l5 = load i32, ptr %next.gep3, align 4
80
81  ret void
82}
83
84; CHECK-LABEL: @interleave_get_longest
85; CHECK: load <2 x i32>
86; CHECK: store <2 x i32> zeroinitializer
87; CHECK: load <3 x i32>
88; CHECK: load i32
89; CHECK: load i32
90
91define void @interleave_get_longest(ptr noalias %ptr) {
92  %tmp2 = getelementptr i32, ptr %ptr, i64 1
93  %tmp3 = getelementptr i32, ptr %ptr, i64 2
94  %tmp4 = getelementptr i32, ptr %ptr, i64 3
95  %tmp5 = getelementptr i32, ptr %ptr, i64 4
96
97  %l1 = load i32, ptr %tmp2, align 4
98  %l2 = load i32, ptr %ptr, align 4
99  store i32 0, ptr %tmp2, align 4
100  store i32 0, ptr %ptr, align 4
101  %l3 = load i32, ptr %tmp2, align 4
102  %l4 = load i32, ptr %tmp3, align 4
103  %l5 = load i32, ptr %tmp4, align 4
104  %l6 = load i32, ptr %tmp5, align 4
105  %l7 = load i32, ptr %tmp5, align 4
106
107  ret void
108}
109
110; CHECK-LABEL: @interleave_get_longest_aligned
111; CHECK: load <2 x i32>
112; CHECK: store <2 x i32> zeroinitializer
113; CHECK: load <4 x i32>
114
115define void @interleave_get_longest_aligned(ptr noalias %ptr) {
116  %tmp2 = getelementptr i32, ptr %ptr, i64 1
117  %tmp3 = getelementptr i32, ptr %ptr, i64 2
118  %tmp4 = getelementptr i32, ptr %ptr, i64 3
119  %tmp5 = getelementptr i32, ptr %ptr, i64 4
120
121  %l1 = load i32, ptr %tmp2, align 4
122  %l2 = load i32, ptr %ptr, align 4
123  store i32 0, ptr %tmp2, align 4
124  store i32 0, ptr %ptr, align 4
125  %l3 = load i32, ptr %tmp2, align 16
126  %l4 = load i32, ptr %tmp3, align 4
127  %l5 = load i32, ptr %tmp4, align 8
128  %l6 = load i32, ptr %tmp5, align 4
129  %l7 = load i32, ptr %tmp5, align 4
130
131  ret void
132}