xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll (revision 1833d418a04123916c1dbeb0c41c8bc7d06b779b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=alderlake < %s| FileCheck %s
3
4define void @test() {
5; CHECK-LABEL: define void @test(
6; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33
9; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
10; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7
11; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
12; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
13; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr null, align 4
14; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
15; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
16; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1
17; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 2
18; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]]
19; CHECK-NEXT:    [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer
20; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
21; CHECK-NEXT:    store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
22; CHECK-NEXT:    ret void
23;
24entry:
25  %arrayidx1 = getelementptr i32, ptr null, i64 1
26  %0 = load i32, ptr %arrayidx1, align 4
27  %arrayidx2 = getelementptr i32, ptr null, i64 63
28  %1 = load i32, ptr %arrayidx2, align 4
29  %mul = mul i32 %1, %0
30  %conv = sext i32 %mul to i64
31  %shr = ashr i64 %conv, 0
32  %conv3 = trunc i64 %shr to i32
33  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
34  %arrayidx5 = getelementptr i32, ptr null, i64 33
35  %2 = load i32, ptr %arrayidx5, align 4
36  %arrayidx6 = getelementptr i32, ptr null, i64 62
37  %3 = load i32, ptr %arrayidx6, align 4
38  %mul7 = mul i32 %3, %2
39  %conv8 = sext i32 %mul7 to i64
40  %shr10 = ashr i64 %conv8, 0
41  %conv11 = trunc i64 %shr10 to i32
42  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
43  %arrayidx13 = getelementptr i32, ptr null, i64 7
44  %4 = load i32, ptr %arrayidx13, align 4
45  %arrayidx14 = getelementptr i32, ptr null, i64 61
46  %5 = load i32, ptr %arrayidx14, align 4
47  %mul15 = mul i32 %5, %4
48  %conv16 = sext i32 %mul15 to i64
49  %shr18 = ashr i64 %conv16, 0
50  %conv19 = trunc i64 %shr18 to i32
51  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
52  %6 = load i32, ptr null, align 4
53  %arrayidx22 = getelementptr i32, ptr null, i64 60
54  %7 = load i32, ptr %arrayidx22, align 4
55  %mul23 = mul i32 %7, %6
56  %conv24 = sext i32 %mul23 to i64
57  %shr26 = ashr i64 %conv24, 0
58  %conv27 = trunc i64 %shr26 to i32
59  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
60  ret void
61}
62
63define void @test1() {
64; CHECK-LABEL: define void @test1(
65; CHECK-SAME: ) #[[ATTR0]] {
66; CHECK-NEXT:  entry:
67; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33
68; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
69; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7
70; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
71; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
72; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr null, align 4
73; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
74; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
75; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1
76; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 2
77; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]]
78; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64>
79; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer
80; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
81; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
82; CHECK-NEXT:    store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
83; CHECK-NEXT:    ret void
84;
85entry:
86  %arrayidx1 = getelementptr i32, ptr null, i64 1
87  %0 = load i32, ptr %arrayidx1, align 4
88  %arrayidx2 = getelementptr i32, ptr null, i64 63
89  %1 = load i32, ptr %arrayidx2, align 4
90  %mul = mul i32 %1, %0
91  %conv = sext i32 %mul to i64
92  %shr = lshr i64 %conv, 0
93  %conv3 = trunc i64 %shr to i32
94  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
95  %arrayidx5 = getelementptr i32, ptr null, i64 33
96  %2 = load i32, ptr %arrayidx5, align 4
97  %arrayidx6 = getelementptr i32, ptr null, i64 62
98  %3 = load i32, ptr %arrayidx6, align 4
99  %mul7 = mul i32 %3, %2
100  %conv8 = sext i32 %mul7 to i64
101  %shr10 = lshr i64 %conv8, 0
102  %conv11 = trunc i64 %shr10 to i32
103  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
104  %arrayidx13 = getelementptr i32, ptr null, i64 7
105  %4 = load i32, ptr %arrayidx13, align 4
106  %arrayidx14 = getelementptr i32, ptr null, i64 61
107  %5 = load i32, ptr %arrayidx14, align 4
108  %mul15 = mul i32 %5, %4
109  %conv16 = sext i32 %mul15 to i64
110  %shr18 = lshr i64 %conv16, 0
111  %conv19 = trunc i64 %shr18 to i32
112  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
113  %6 = load i32, ptr null, align 4
114  %arrayidx22 = getelementptr i32, ptr null, i64 60
115  %7 = load i32, ptr %arrayidx22, align 4
116  %mul23 = mul i32 %7, %6
117  %conv24 = sext i32 %mul23 to i64
118  %shr26 = lshr i64 %conv24, 0
119  %conv27 = trunc i64 %shr26 to i32
120  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
121  ret void
122}
123
124define void @test_div() {
125; CHECK-LABEL: define void @test_div(
126; CHECK-SAME: ) #[[ATTR0]] {
127; CHECK-NEXT:  entry:
128; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33
129; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
130; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7
131; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
132; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
133; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr null, align 4
134; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
135; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
136; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1
137; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP4]], i32 2
138; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]]
139; CHECK-NEXT:    [[TMP10:%.*]] = udiv <4 x i32> [[TMP9]], <i32 2, i32 1, i32 2, i32 1>
140; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
141; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
142; CHECK-NEXT:    ret void
143;
144entry:
145  %arrayidx1 = getelementptr i32, ptr null, i64 1
146  %0 = load i32, ptr %arrayidx1, align 4
147  %arrayidx2 = getelementptr i32, ptr null, i64 63
148  %1 = load i32, ptr %arrayidx2, align 4
149  %mul = mul i32 %1, %0
150  %conv = zext i32 %mul to i64
151  %shr = udiv i64 %conv, 1
152  %conv3 = trunc i64 %shr to i32
153  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
154  %arrayidx5 = getelementptr i32, ptr null, i64 33
155  %2 = load i32, ptr %arrayidx5, align 4
156  %arrayidx6 = getelementptr i32, ptr null, i64 62
157  %3 = load i32, ptr %arrayidx6, align 4
158  %mul7 = mul i32 %3, %2
159  %conv8 = zext i32 %mul7 to i64
160  %shr10 = udiv i64 %conv8, 2
161  %conv11 = trunc i64 %shr10 to i32
162  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
163  %arrayidx13 = getelementptr i32, ptr null, i64 7
164  %4 = load i32, ptr %arrayidx13, align 4
165  %arrayidx14 = getelementptr i32, ptr null, i64 61
166  %5 = load i32, ptr %arrayidx14, align 4
167  %mul15 = mul i32 %5, %4
168  %conv16 = zext i32 %mul15 to i64
169  %shr18 = udiv i64 %conv16, 1
170  %conv19 = trunc i64 %shr18 to i32
171  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
172  %6 = load i32, ptr null, align 4
173  %arrayidx22 = getelementptr i32, ptr null, i64 60
174  %7 = load i32, ptr %arrayidx22, align 4
175  %mul23 = mul i32 %7, %6
176  %conv24 = zext i32 %mul23 to i64
177  %shr26 = udiv i64 %conv24, 2
178  %conv27 = trunc i64 %shr26 to i32
179  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
180  ret void
181}
182
183define void @test_rem() {
184; CHECK-LABEL: define void @test_rem(
185; CHECK-SAME: ) #[[ATTR0]] {
186; CHECK-NEXT:  entry:
187; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33
188; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
189; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7
190; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
191; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
192; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr null, align 4
193; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
194; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
195; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1
196; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP4]], i32 2
197; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]]
198; CHECK-NEXT:    [[TMP10:%.*]] = urem <4 x i32> [[TMP9]], <i32 1, i32 1, i32 2, i32 1>
199; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
200; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
201; CHECK-NEXT:    ret void
202;
203entry:
204  %arrayidx1 = getelementptr i32, ptr null, i64 1
205  %0 = load i32, ptr %arrayidx1, align 4
206  %arrayidx2 = getelementptr i32, ptr null, i64 63
207  %1 = load i32, ptr %arrayidx2, align 4
208  %mul = mul i32 %1, %0
209  %conv = zext i32 %mul to i64
210  %shr = urem i64 %conv, 1
211  %conv3 = trunc i64 %shr to i32
212  store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
213  %arrayidx5 = getelementptr i32, ptr null, i64 33
214  %2 = load i32, ptr %arrayidx5, align 4
215  %arrayidx6 = getelementptr i32, ptr null, i64 62
216  %3 = load i32, ptr %arrayidx6, align 4
217  %mul7 = mul i32 %3, %2
218  %conv8 = zext i32 %mul7 to i64
219  %shr10 = urem i64 %conv8, 2
220  %conv11 = trunc i64 %shr10 to i32
221  store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
222  %arrayidx13 = getelementptr i32, ptr null, i64 7
223  %4 = load i32, ptr %arrayidx13, align 4
224  %arrayidx14 = getelementptr i32, ptr null, i64 61
225  %5 = load i32, ptr %arrayidx14, align 4
226  %mul15 = mul i32 %5, %4
227  %conv16 = zext i32 %mul15 to i64
228  %shr18 = urem i64 %conv16, 1
229  %conv19 = trunc i64 %shr18 to i32
230  store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
231  %6 = load i32, ptr null, align 4
232  %arrayidx22 = getelementptr i32, ptr null, i64 60
233  %7 = load i32, ptr %arrayidx22, align 4
234  %mul23 = mul i32 %7, %6
235  %conv24 = zext i32 %mul23 to i64
236  %shr26 = urem i64 %conv24, 1
237  %conv27 = trunc i64 %shr26 to i32
238  store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
239  ret void
240}
241