xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll (revision 3133acf1fbd1cc57ea8e74288ee9a0acd027d749)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
3; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
4
5define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
6; CHECK-LABEL: @v3_load_i32_mul_by_constant_store(
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
9; CHECK-NEXT:    [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
10; CHECK-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
11; CHECK-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
12; CHECK-NEXT:    [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
13; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
14; CHECK-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
15; CHECK-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
16; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
17; CHECK-NEXT:    store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
18; CHECK-NEXT:    [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
19; CHECK-NEXT:    store i32 [[MUL_1]], ptr [[DST_1]], align 4
20; CHECK-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
21; CHECK-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
22; CHECK-NEXT:    ret void
23;
24entry:
25  %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
26  %l.src.0 = load i32, ptr %gep.src.0, align 4
27  %mul.0 = mul nsw i32 %l.src.0, 10
28
29  %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
30  %l.src.1 = load i32, ptr %gep.src.1, align 4
31  %mul.1 = mul nsw i32 %l.src.1, 10
32
33  %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
34  %l.src.2 = load i32, ptr %gep.src.2, align 4
35  %mul.2 = mul nsw i32 %l.src.2, 10
36
37  store i32 %mul.0, ptr %dst
38
39  %dst.1 = getelementptr i32, ptr %dst, i32 1
40  store i32 %mul.1, ptr %dst.1
41
42  %dst.2 = getelementptr i32, ptr %dst, i32 2
43  store i32 %mul.2, ptr %dst.2
44
45  ret void
46}
47
48define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
49; CHECK-LABEL: @v3_load_i32_mul_store(
50; CHECK-NEXT:  entry:
51; CHECK-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
52; CHECK-NEXT:    [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
53; CHECK-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
54; CHECK-NEXT:    [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
55; CHECK-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
56; CHECK-NEXT:    [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
57; CHECK-NEXT:    [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
58; CHECK-NEXT:    [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
59; CHECK-NEXT:    [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
60; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
61; CHECK-NEXT:    [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
62; CHECK-NEXT:    [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
63; CHECK-NEXT:    [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
64; CHECK-NEXT:    [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
65; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
66; CHECK-NEXT:    store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
67; CHECK-NEXT:    [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
68; CHECK-NEXT:    store i32 [[MUL_1]], ptr [[DST_1]], align 4
69; CHECK-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
70; CHECK-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
71; CHECK-NEXT:    ret void
72;
73entry:
74  %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
75  %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
76  %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
77  %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
78  %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
79
80  %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
81  %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
82  %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
83  %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
84  %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
85
86  %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
87  %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
88  %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
89  %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
90  %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
91
92  store i32 %mul.0, ptr %dst
93
94  %dst.1 = getelementptr i32, ptr %dst, i32 1
95  store i32 %mul.1, ptr %dst.1
96
97  %dst.2 = getelementptr i32, ptr %dst, i32 2
98  store i32 %mul.2, ptr %dst.2
99
100  ret void
101}
102
103define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
104; CHECK-LABEL: @v3_load_i32_mul_add_const_store(
105; CHECK-NEXT:  entry:
106; CHECK-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
107; CHECK-NEXT:    [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4
108; CHECK-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
109; CHECK-NEXT:    [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4
110; CHECK-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]]
111; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[MUL_0]], 9
112; CHECK-NEXT:    [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1
113; CHECK-NEXT:    [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4
114; CHECK-NEXT:    [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1
115; CHECK-NEXT:    [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4
116; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]]
117; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[MUL_1]], 9
118; CHECK-NEXT:    [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
119; CHECK-NEXT:    [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
120; CHECK-NEXT:    [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
121; CHECK-NEXT:    [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
122; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
123; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
124; CHECK-NEXT:    store i32 [[ADD_0]], ptr [[DST:%.*]], align 4
125; CHECK-NEXT:    [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
126; CHECK-NEXT:    store i32 [[ADD_1]], ptr [[DST_1]], align 4
127; CHECK-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
128; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[DST_2]], align 4
129; CHECK-NEXT:    ret void
130;
131entry:
132  %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
133  %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
134  %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
135  %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
136  %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
137  %add.0 = add i32 %mul.0, 9
138
139  %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
140  %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
141  %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
142  %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
143  %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
144  %add.1 = add i32 %mul.1, 9
145
146  %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
147  %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
148  %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
149  %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
150  %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
151  %add.2 = add i32 %mul.2, 9
152
153  store i32 %add.0, ptr %dst
154
155  %dst.1 = getelementptr i32, ptr %dst, i32 1
156  store i32 %add.1, ptr %dst.1
157
158  %dst.2 = getelementptr i32, ptr %dst, i32 2
159  store i32 %add.2, ptr %dst.2
160
161  ret void
162}
163
164define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
165; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
166; NON-POW2-NEXT:  entry:
167; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
168; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
169; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01)
170; NON-POW2-NEXT:    store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
171; NON-POW2-NEXT:    ret void
172;
173; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
174; POW2-ONLY-NEXT:  entry:
175; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
176; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
177; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
178; POW2-ONLY-NEXT:    [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
179; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
180; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01)
181; POW2-ONLY-NEXT:    store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
182; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
183; POW2-ONLY-NEXT:    store float [[FADD_2]], ptr [[DST_2]], align 4
184; POW2-ONLY-NEXT:    ret void
185;
186entry:
187  %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
188  %l.src.0 = load float , ptr %gep.src.0, align 4
189  %fadd.0 = fadd float %l.src.0, 10.0
190
191  %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
192  %l.src.1 = load float, ptr %gep.src.1, align 4
193  %fadd.1 = fadd float %l.src.1, 10.0
194
195  %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
196  %l.src.2 = load float, ptr %gep.src.2, align 4
197  %fadd.2 = fadd float %l.src.2, 10.0
198
199  store float %fadd.0, ptr %dst
200
201  %dst.1 = getelementptr float, ptr %dst, i32 1
202  store float %fadd.1, ptr %dst.1
203
204  %dst.2 = getelementptr float, ptr %dst, i32 2
205  store float %fadd.2, ptr %dst.2
206
207  ret void
208}
209
210define void @phi_store3(ptr %dst) {
211; CHECK-LABEL: @phi_store3(
212; CHECK-NEXT:  entry:
213; CHECK-NEXT:    br label [[EXIT:%.*]]
214; CHECK:       invoke.cont8.loopexit:
215; CHECK-NEXT:    br label [[EXIT]]
216; CHECK:       exit:
217; CHECK-NEXT:    [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
218; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
219; CHECK-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
220; CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[DST]], align 4
221; CHECK-NEXT:    store i32 [[P_2]], ptr [[DST_2]], align 4
222; CHECK-NEXT:    ret void
223;
224entry:
225  br label %exit
226
227invoke.cont8.loopexit:                            ; No predecessors!
228  br label %exit
229
230exit:
231  %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
232  %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
233  %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
234
235  %dst.1 = getelementptr i32, ptr %dst, i32 1
236  %dst.2 = getelementptr i32, ptr %dst, i32 2
237
238  store i32 %p.0, ptr %dst, align 4
239  store i32 %p.1, ptr %dst.1, align 4
240  store i32 %p.2, ptr %dst.2, align 4
241  ret void
242}
243
244define void @store_try_reorder(ptr %dst) {
245; CHECK-LABEL: @store_try_reorder(
246; CHECK-NEXT:  entry:
247; CHECK-NEXT:    [[ADD:%.*]] = add i32 0, 0
248; CHECK-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
249; CHECK-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
250; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
251; CHECK-NEXT:    ret void
252;
253entry:
254  %add = add i32 0, 0
255  store i32 %add, ptr %dst, align 4
256  %add207 = sub i32 0, 0
257  %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
258  store i32 %add207, ptr %arrayidx.i1887, align 4
259  %add216 = sub i32 0, 0
260  %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
261  store i32 %add216, ptr %arrayidx.i1891, align 4
262  ret void
263}
264
265define void @vec3_fpext_cost(ptr %Colour, float %0) {
266; CHECK-LABEL: @vec3_fpext_cost(
267; CHECK-NEXT:  entry:
268; CHECK-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
269; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
270; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
271; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
272; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
273; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
274; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
275; CHECK-NEXT:    [[CONV78:%.*]] = fpext float [[TMP0]] to double
276; CHECK-NEXT:    [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
277; CHECK-NEXT:    [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
278; CHECK-NEXT:    store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
279; CHECK-NEXT:    ret void
280;
281entry:
282  %arrayidx72 = getelementptr float, ptr %Colour, i64 1
283  %arrayidx80 = getelementptr float, ptr %Colour, i64 2
284  %conv62 = fpext float %0 to double
285  %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
286  %conv66 = fptrunc double %1 to float
287  store float %conv66, ptr %Colour, align 4
288  %conv70 = fpext float %0 to double
289  %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
290  %conv74 = fptrunc double %2 to float
291  store float %conv74, ptr %arrayidx72, align 4
292  %conv78 = fpext float %0 to double
293  %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
294  %conv82 = fptrunc double %3 to float
295  store float %conv82, ptr %arrayidx80, align 4
296  ret void
297}
298
299define void @fpext_gather(ptr %dst, double %conv) {
300; CHECK-LABEL: @fpext_gather(
301; CHECK-NEXT:  entry:
302; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0
303; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
304; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
305; CHECK-NEXT:    [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
306; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
307; CHECK-NEXT:    store float [[TMP3]], ptr [[LENGTHS]], align 4
308; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
309; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4
310; CHECK-NEXT:    ret void
311;
312entry:
313  %conv25 = fptrunc double %conv to float
314  %Lengths = getelementptr float, ptr %dst, i64 0
315  store float %conv25, ptr %Lengths, align 4
316  %arrayidx32 = getelementptr float, ptr %dst, i64 1
317  store float %conv25, ptr %arrayidx32, align 4
318  %conv34 = fptrunc double %conv to float
319  %arrayidx37 = getelementptr float, ptr %dst, i64 2
320  store float %conv34, ptr %arrayidx37, align 4
321  ret void
322}
323
324declare float @llvm.fmuladd.f32(float, float, float)
325
326declare double @llvm.fmuladd.f64(double, double, double)
327