xref: /llvm-project/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll (revision d2234ca16310a9e9bd595561353556ea6ba0176f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
3
4define void @store_load(ptr %ptr) {
5; CHECK-LABEL: define void @store_load(
6; CHECK-SAME: ptr [[PTR:%.*]]) {
7; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
8; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
9; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
10; CHECK-NEXT:    ret void
11;
12  %ptr0 = getelementptr float, ptr %ptr, i32 0
13  %ptr1 = getelementptr float, ptr %ptr, i32 1
14  %ld0 = load float, ptr %ptr0
15  %ld1 = load float, ptr %ptr1
16  store float %ld0, ptr %ptr0
17  store float %ld1, ptr %ptr1
18  ret void
19}
20
21
22define void @store_fpext_load(ptr %ptr) {
23; CHECK-LABEL: define void @store_fpext_load(
24; CHECK-SAME: ptr [[PTR:%.*]]) {
25; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
26; CHECK-NEXT:    [[PTRD0:%.*]] = getelementptr double, ptr [[PTR]], i32 0
27; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
28; CHECK-NEXT:    [[VCAST:%.*]] = fpext <2 x float> [[VECL]] to <2 x double>
29; CHECK-NEXT:    store <2 x double> [[VCAST]], ptr [[PTRD0]], align 8
30; CHECK-NEXT:    ret void
31;
32  %ptr0 = getelementptr float, ptr %ptr, i32 0
33  %ptr1 = getelementptr float, ptr %ptr, i32 1
34  %ptrd0 = getelementptr double, ptr %ptr, i32 0
35  %ptrd1 = getelementptr double, ptr %ptr, i32 1
36  %ld0 = load float, ptr %ptr0
37  %ld1 = load float, ptr %ptr1
38  %fpext0 = fpext float %ld0 to double
39  %fpext1 = fpext float %ld1 to double
40  store double %fpext0, ptr %ptrd0
41  store double %fpext1, ptr %ptrd1
42  ret void
43}
44
45define void @store_fcmp_zext_load(ptr %ptr) {
46; CHECK-LABEL: define void @store_fcmp_zext_load(
47; CHECK-SAME: ptr [[PTR:%.*]]) {
48; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
49; CHECK-NEXT:    [[PTRB0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
50; CHECK-NEXT:    [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
51; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
52; CHECK-NEXT:    [[VCMP:%.*]] = fcmp ogt <2 x float> [[VECL]], [[VECL1]]
53; CHECK-NEXT:    [[VCAST:%.*]] = zext <2 x i1> [[VCMP]] to <2 x i32>
54; CHECK-NEXT:    store <2 x i32> [[VCAST]], ptr [[PTRB0]], align 4
55; CHECK-NEXT:    ret void
56;
57  %ptr0 = getelementptr float, ptr %ptr, i32 0
58  %ptr1 = getelementptr float, ptr %ptr, i32 1
59  %ptrb0 = getelementptr i32, ptr %ptr, i32 0
60  %ptrb1 = getelementptr i32, ptr %ptr, i32 1
61  %ldB0 = load float, ptr %ptr0
62  %ldB1 = load float, ptr %ptr1
63  %ldA0 = load float, ptr %ptr0
64  %ldA1 = load float, ptr %ptr1
65  %fcmp0 = fcmp ogt float %ldA0, %ldB0
66  %fcmp1 = fcmp ogt float %ldA1, %ldB1
67  %zext0 = zext i1 %fcmp0 to i32
68  %zext1 = zext i1 %fcmp1 to i32
69  store i32 %zext0, ptr %ptrb0
70  store i32 %zext1, ptr %ptrb1
71  ret void
72}
73
74define void @store_fadd_load(ptr %ptr) {
75; CHECK-LABEL: define void @store_fadd_load(
76; CHECK-SAME: ptr [[PTR:%.*]]) {
77; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
78; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
79; CHECK-NEXT:    [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
80; CHECK-NEXT:    [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]]
81; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
82; CHECK-NEXT:    ret void
83;
84  %ptr0 = getelementptr float, ptr %ptr, i32 0
85  %ptr1 = getelementptr float, ptr %ptr, i32 1
86  %ldA0 = load float, ptr %ptr0
87  %ldA1 = load float, ptr %ptr1
88  %ldB0 = load float, ptr %ptr0
89  %ldB1 = load float, ptr %ptr1
90  %fadd0 = fadd float %ldA0, %ldB0
91  %fadd1 = fadd float %ldA1, %ldB1
92  store float %fadd0, ptr %ptr0
93  store float %fadd1, ptr %ptr1
94  ret void
95}
96
97define void @store_fneg_load(ptr %ptr) {
98; CHECK-LABEL: define void @store_fneg_load(
99; CHECK-SAME: ptr [[PTR:%.*]]) {
100; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
101; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
102; CHECK-NEXT:    [[VEC:%.*]] = fneg <2 x float> [[VECL]]
103; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
104; CHECK-NEXT:    ret void
105;
106  %ptr0 = getelementptr float, ptr %ptr, i32 0
107  %ptr1 = getelementptr float, ptr %ptr, i32 1
108  %ld0 = load float, ptr %ptr0
109  %ld1 = load float, ptr %ptr1
110  %fneg0 = fneg float %ld0
111  %fneg1 = fneg float %ld1
112  store float %fneg0, ptr %ptr0
113  store float %fneg1, ptr %ptr1
114  ret void
115}
116
117define float @scalars_with_external_uses_not_dead(ptr %ptr) {
118; CHECK-LABEL: define float @scalars_with_external_uses_not_dead(
119; CHECK-SAME: ptr [[PTR:%.*]]) {
120; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
121; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
122; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
123; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
124; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
125; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
126; CHECK-NEXT:    [[USER:%.*]] = fneg float [[LD1]]
127; CHECK-NEXT:    ret float [[LD0]]
128;
129  %ptr0 = getelementptr float, ptr %ptr, i32 0
130  %ptr1 = getelementptr float, ptr %ptr, i32 1
131  %ld0 = load float, ptr %ptr0
132  %ld1 = load float, ptr %ptr1
133  store float %ld0, ptr %ptr0
134  store float %ld1, ptr %ptr1
135  %user = fneg float %ld1
136  ret float %ld0
137}
138
139define void @pack_scalars(ptr %ptr, ptr %ptr2) {
140; CHECK-LABEL: define void @pack_scalars(
141; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
142; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
143; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
144; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
145; CHECK-NEXT:    [[PACK:%.*]] = insertelement <2 x float> poison, float [[LD0]], i32 0
146; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LD1]], i32 1
147; CHECK-NEXT:    store <2 x float> [[PACK1]], ptr [[PTR0]], align 4
148; CHECK-NEXT:    ret void
149;
150  %ptr0 = getelementptr float, ptr %ptr, i32 0
151  %ptr1 = getelementptr float, ptr %ptr, i32 1
152  %ld0 = load float, ptr %ptr0
153  %ld1 = load float, ptr %ptr2
154  store float %ld0, ptr %ptr0
155  store float %ld1, ptr %ptr1
156  ret void
157}
158
159declare void @foo()
160define void @cant_vectorize_seeds(ptr %ptr) {
161; CHECK-LABEL: define void @cant_vectorize_seeds(
162; CHECK-SAME: ptr [[PTR:%.*]]) {
163; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
164; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
165; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
166; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
167; CHECK-NEXT:    store float [[LD1]], ptr [[PTR1]], align 4
168; CHECK-NEXT:    call void @foo()
169; CHECK-NEXT:    store float [[LD1]], ptr [[PTR1]], align 4
170; CHECK-NEXT:    ret void
171;
172  %ptr0 = getelementptr float, ptr %ptr, i32 0
173  %ptr1 = getelementptr float, ptr %ptr, i32 1
174  %ld0 = load float, ptr %ptr0
175  %ld1 = load float, ptr %ptr1
176  store float %ld1, ptr %ptr1
177  call void @foo() ; This call blocks scheduling of the store seeds.
178  store float %ld1, ptr %ptr1
179  ret void
180}
181
182define void @pack_vectors(ptr %ptr, ptr %ptr2) {
183; CHECK-LABEL: define void @pack_vectors(
184; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
185; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr <2 x float>, ptr [[PTR]], i32 0
186; CHECK-NEXT:    [[LD0:%.*]] = load <2 x float>, ptr [[PTR0]], align 8
187; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
188; CHECK-NEXT:    [[VPACK:%.*]] = extractelement <2 x float> [[LD0]], i32 0
189; CHECK-NEXT:    [[VPACK1:%.*]] = insertelement <3 x float> poison, float [[VPACK]], i32 0
190; CHECK-NEXT:    [[VPACK2:%.*]] = extractelement <2 x float> [[LD0]], i32 1
191; CHECK-NEXT:    [[VPACK3:%.*]] = insertelement <3 x float> [[VPACK1]], float [[VPACK2]], i32 1
192; CHECK-NEXT:    [[PACK:%.*]] = insertelement <3 x float> [[VPACK3]], float [[LD1]], i32 2
193; CHECK-NEXT:    store <3 x float> [[PACK]], ptr [[PTR0]], align 8
194; CHECK-NEXT:    ret void
195;
196  %ptr0 = getelementptr <2 x float>, ptr %ptr, i32 0
197  %ptr1 = getelementptr float, ptr %ptr, i32 2
198  %ld0 = load <2 x float>, ptr %ptr0
199  %ld1 = load float, ptr %ptr2
200  store <2 x float> %ld0, ptr %ptr0
201  store float %ld1, ptr %ptr1
202  ret void
203}
204
205define void @diamond(ptr %ptr) {
206; CHECK-LABEL: define void @diamond(
207; CHECK-SAME: ptr [[PTR:%.*]]) {
208; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
209; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
210; CHECK-NEXT:    [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VECL]]
211; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
212; CHECK-NEXT:    ret void
213;
214  %ptr0 = getelementptr float, ptr %ptr, i32 0
215  %ptr1 = getelementptr float, ptr %ptr, i32 1
216  %ld0 = load float, ptr %ptr0
217  %ld1 = load float, ptr %ptr1
218  %sub0 = fsub float %ld0, %ld0
219  %sub1 = fsub float %ld1, %ld1
220  store float %sub0, ptr %ptr0
221  store float %sub1, ptr %ptr1
222  ret void
223}
224
225define void @diamondWithShuffle(ptr %ptr) {
226; CHECK-LABEL: define void @diamondWithShuffle(
227; CHECK-SAME: ptr [[PTR:%.*]]) {
228; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
229; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
230; CHECK-NEXT:    [[VSHUF:%.*]] = shufflevector <2 x float> [[VECL]], <2 x float> [[VECL]], <2 x i32> <i32 1, i32 0>
231; CHECK-NEXT:    [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VSHUF]]
232; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
233; CHECK-NEXT:    ret void
234;
235  %ptr0 = getelementptr float, ptr %ptr, i32 0
236  %ptr1 = getelementptr float, ptr %ptr, i32 1
237  %ld0 = load float, ptr %ptr0
238  %ld1 = load float, ptr %ptr1
239  %sub0 = fsub float %ld0, %ld1
240  %sub1 = fsub float %ld1, %ld0
241  store float %sub0, ptr %ptr0
242  store float %sub1, ptr %ptr1
243  ret void
244}
245
246define void @diamondMultiInput(ptr %ptr, ptr %ptrX) {
247; CHECK-LABEL: define void @diamondMultiInput(
248; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) {
249; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
250; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
251; CHECK-NEXT:    [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
252; CHECK-NEXT:    [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0
253; CHECK-NEXT:    [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0
254; CHECK-NEXT:    [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1
255; CHECK-NEXT:    [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VINS1]]
256; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
257; CHECK-NEXT:    ret void
258;
259  %ptr0 = getelementptr float, ptr %ptr, i32 0
260  %ptr1 = getelementptr float, ptr %ptr, i32 1
261  %ld0 = load float, ptr %ptr0
262  %ld1 = load float, ptr %ptr1
263
264  %ldX = load float, ptr %ptrX
265
266  %sub0 = fsub float %ld0, %ldX
267  %sub1 = fsub float %ld1, %ld0
268  store float %sub0, ptr %ptr0
269  store float %sub1, ptr %ptr1
270  ret void
271}
272
273define void @diamondWithConstantVector(ptr %ptr) {
274; CHECK-LABEL: define void @diamondWithConstantVector(
275; CHECK-SAME: ptr [[PTR:%.*]]) {
276; CHECK-NEXT:    [[GEPA0:%.*]] = getelementptr i32, ptr [[PTR]], i64 0
277; CHECK-NEXT:    [[GEPB0:%.*]] = getelementptr i32, ptr [[PTR]], i64 10
278; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[GEPA0]], align 4
279; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[GEPB0]], align 4
280; CHECK-NEXT:    ret void
281;
282  %gepA0 = getelementptr i32, ptr %ptr, i64 0
283  %gepA1 = getelementptr i32, ptr %ptr, i64 1
284
285  %gepB0 = getelementptr i32, ptr %ptr, i64 10
286  %gepB1 = getelementptr i32, ptr %ptr, i64 11
287
288  %zext0 = zext i16 0 to i32
289  %zext1 = zext i16 0 to i32
290
291  store i32 %zext0, ptr %gepA0
292  store i32 %zext1, ptr %gepA1
293
294  %orB0 = or i32 0, %zext0
295  %orB1 = or i32 0, %zext1
296  store i32 %orB0, ptr %gepB0
297  store i32 %orB1, ptr %gepB1
298  ret void
299}
300