xref: /llvm-project/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll (revision 8b56da5e9f3ba737a5ff4bf5dee654416849042f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -expand-reductions -S | FileCheck %s
3; Tests without a target which should expand all reductions
4declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
5declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
6declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
7declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
8declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
9
10declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
11declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
12
13declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
14declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
15declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
16declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
17
18declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
19declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
20
21declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)
22
23define i64 @add_i64(<2 x i64> %vec) {
24; CHECK-LABEL: @add_i64(
25; CHECK-NEXT:  entry:
26; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
27; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
28; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
29; CHECK-NEXT:    ret i64 [[TMP0]]
30;
31entry:
32  %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec)
33  ret i64 %r
34}
35
36define i64 @mul_i64(<2 x i64> %vec) {
37; CHECK-LABEL: @mul_i64(
38; CHECK-NEXT:  entry:
39; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
40; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
41; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
42; CHECK-NEXT:    ret i64 [[TMP0]]
43;
44entry:
45  %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec)
46  ret i64 %r
47}
48
49define i64 @and_i64(<2 x i64> %vec) {
50; CHECK-LABEL: @and_i64(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
53; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
54; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
55; CHECK-NEXT:    ret i64 [[TMP0]]
56;
57entry:
58  %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec)
59  ret i64 %r
60}
61
62define i64 @or_i64(<2 x i64> %vec) {
63; CHECK-LABEL: @or_i64(
64; CHECK-NEXT:  entry:
65; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
66; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
67; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
68; CHECK-NEXT:    ret i64 [[TMP0]]
69;
70entry:
71  %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec)
72  ret i64 %r
73}
74
75define i64 @xor_i64(<2 x i64> %vec) {
76; CHECK-LABEL: @xor_i64(
77; CHECK-NEXT:  entry:
78; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
79; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
80; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
81; CHECK-NEXT:    ret i64 [[TMP0]]
82;
83entry:
84  %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec)
85  ret i64 %r
86}
87
88define float @fadd_f32(<4 x float> %vec) {
89; CHECK-LABEL: @fadd_f32(
90; CHECK-NEXT:  entry:
91; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
92; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
93; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
94; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
95; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
96; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
97; CHECK-NEXT:    ret float [[BIN_RDX3]]
98;
99entry:
100  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
101  ret float %r
102}
103
104define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
105; CHECK-LABEL: @fadd_f32_accum(
106; CHECK-NEXT:  entry:
107; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
108; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
109; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
110; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
111; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
112; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]]
113; CHECK-NEXT:    ret float [[BIN_RDX3]]
114;
115entry:
116  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
117  ret float %r
118}
119
120define float @fadd_f32_strict(<4 x float> %vec) {
121; CHECK-LABEL: @fadd_f32_strict(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
124; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
125; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
126; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
127; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
128; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
129; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
130; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
131; CHECK-NEXT:    ret float [[BIN_RDX3]]
132;
133entry:
134  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
135  ret float %r
136}
137
138define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
139; CHECK-LABEL: @fadd_f32_strict_accum(
140; CHECK-NEXT:  entry:
141; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
142; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
143; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
144; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
145; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
146; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
147; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
148; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
149; CHECK-NEXT:    ret float [[BIN_RDX3]]
150;
151entry:
152  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
153  ret float %r
154}
155
156define float @fmul_f32(<4 x float> %vec) {
157; CHECK-LABEL: @fmul_f32(
158; CHECK-NEXT:  entry:
159; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
160; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
161; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
162; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
163; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
164; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
165; CHECK-NEXT:    ret float [[BIN_RDX3]]
166;
167entry:
168  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
169  ret float %r
170}
171
172define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
173; CHECK-LABEL: @fmul_f32_accum(
174; CHECK-NEXT:  entry:
175; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
176; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
177; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
178; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
179; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
180; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]]
181; CHECK-NEXT:    ret float [[BIN_RDX3]]
182;
183entry:
184  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
185  ret float %r
186}
187
188define float @fmul_f32_strict(<4 x float> %vec) {
189; CHECK-LABEL: @fmul_f32_strict(
190; CHECK-NEXT:  entry:
191; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
192; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
193; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
194; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
195; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
196; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
197; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
198; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
199; CHECK-NEXT:    ret float [[BIN_RDX3]]
200;
201entry:
202  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
203  ret float %r
204}
205
206define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
207; CHECK-LABEL: @fmul_f32_strict_accum(
208; CHECK-NEXT:  entry:
209; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
210; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
211; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
212; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
213; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
214; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
215; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
216; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
217; CHECK-NEXT:    ret float [[BIN_RDX3]]
218;
219entry:
220  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
221  ret float %r
222}
223
224define i64 @smax_i64(<2 x i64> %vec) {
225; CHECK-LABEL: @smax_i64(
226; CHECK-NEXT:  entry:
227; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
228; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
229; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
230; CHECK-NEXT:    ret i64 [[TMP0]]
231;
232entry:
233  %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
234  ret i64 %r
235}
236
237define i64 @smin_i64(<2 x i64> %vec) {
238; CHECK-LABEL: @smin_i64(
239; CHECK-NEXT:  entry:
240; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
241; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
242; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
243; CHECK-NEXT:    ret i64 [[TMP0]]
244;
245entry:
246  %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
247  ret i64 %r
248}
249
250define i64 @umax_i64(<2 x i64> %vec) {
251; CHECK-LABEL: @umax_i64(
252; CHECK-NEXT:  entry:
253; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
254; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
255; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
256; CHECK-NEXT:    ret i64 [[TMP0]]
257;
258entry:
259  %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
260  ret i64 %r
261}
262
263define i64 @umin_i64(<2 x i64> %vec) {
264; CHECK-LABEL: @umin_i64(
265; CHECK-NEXT:  entry:
266; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
267; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
268; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
269; CHECK-NEXT:    ret i64 [[TMP0]]
270;
271entry:
272  %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
273  ret i64 %r
274}
275
276; FIXME: Expand using maxnum intrinsic?
277
278define double @fmax_f64(<2 x double> %vec) {
279; CHECK-LABEL: @fmax_f64(
280; CHECK-NEXT:  entry:
281; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
282; CHECK-NEXT:    ret double [[R]]
283;
284entry:
285  %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec)
286  ret double %r
287}
288
289; FIXME: Expand using minnum intrinsic?
290
291define double @fmin_f64(<2 x double> %vec) {
292; CHECK-LABEL: @fmin_f64(
293; CHECK-NEXT:  entry:
294; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
295; CHECK-NEXT:    ret double [[R]]
296;
297entry:
298  %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec)
299  ret double %r
300}
301
302; FIXME: Why is this not expanded?
303
304; Test when the vector size is not power of two.
305define i8 @test_v3i8(<3 x i8> %a) nounwind {
306; CHECK-LABEL: @test_v3i8(
307; CHECK-NEXT:  entry:
308; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
309; CHECK-NEXT:    ret i8 [[B]]
310;
311entry:
312  %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a)
313  ret i8 %b
314}
315