xref: /llvm-project/llvm/test/Transforms/InstCombine/vector-reductions.ll (revision 0ad275c1588065c9f4ef45a6a88f062182ad446b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
5declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
6declare float @llvm.vector.reduce.fmul.f32.nxv4f32(float, <vscale x 4 x float>)
7declare float @llvm.vector.reduce.fmin.f32.v4f32(float, <4 x float>)
8declare float @llvm.vector.reduce.fmax.f32.nxv4f32(float, <vscale x 4 x float>)
9declare void @use_f32(float)
10
11declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
12declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
13declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
14declare void @use_i32(i32)
15
16declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
17declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
18
19declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
20declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
21declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
22declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
23
24declare <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32>)
25declare <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float>)
26declare <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32>)
27declare <4 x float> @llvm.vector.reverse.v4f32(<4 x float>)
28
29define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
30; CHECK-LABEL: @diff_of_sums_v4f32(
31; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]]
32; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]])
33; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]]
34; CHECK-NEXT:    ret float [[R]]
35;
36  %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
37  %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
38  %r = fsub reassoc nsz float %r0, %r1
39  ret float %r
40}
41
42define float @reassoc_sum_of_reverse_v4f32(<4 x float> %v0) {
43; CHECK-LABEL: @reassoc_sum_of_reverse_v4f32(
44; CHECK-NEXT:    [[RED:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V0:%.*]])
45; CHECK-NEXT:    ret float [[RED]]
46;
47  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
48  %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float zeroinitializer, <4 x float> %rev)
49  ret float %red
50}
51
52define float @reassoc_mul_reduction_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
53; CHECK-LABEL: @reassoc_mul_reduction_of_reverse_nxv4f32(
54; CHECK-NEXT:    [[RED:%.*]] = call reassoc float @llvm.vector.reduce.fmul.nxv4f32(float 1.000000e+00, <vscale x 4 x float> [[V0:%.*]])
55; CHECK-NEXT:    ret float [[RED]]
56;
57  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
58  %red = call reassoc float @llvm.vector.reduce.fmul.nxv4f32(float 1.0, <vscale x 4 x float> %rev)
59  ret float %red
60}
61
62define float @fmax_of_reverse_v4f32(<4 x float> %v0) {
63; CHECK-LABEL: @fmax_of_reverse_v4f32(
64; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[V0:%.*]])
65; CHECK-NEXT:    ret float [[RED]]
66;
67  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
68  %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %rev)
69  ret float %red
70}
71
72define float @fmin_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
73; CHECK-LABEL: @fmin_of_reverse_nxv4f32(
74; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[V0:%.*]])
75; CHECK-NEXT:    ret float [[RED]]
76;
77  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
78  %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %rev)
79  ret float %red
80}
81
82; negative test - fadd cannot be folded with reverse due to lack of reassoc
83define float @sum_of_reverse_v4f32(<4 x float> %v0) {
84; CHECK-LABEL: @sum_of_reverse_v4f32(
85; CHECK-NEXT:    [[REV:%.*]] = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[V0:%.*]])
86; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[REV]])
87; CHECK-NEXT:    ret float [[RED]]
88;
89  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
90  %red = call float @llvm.vector.reduce.fadd.v4f32(float zeroinitializer, <4 x float> %rev)
91  ret float %red
92}
93
94; negative test - fmul cannot be folded with reverse due to lack of reassoc
95define float @mul_reduction_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
96; CHECK-LABEL: @mul_reduction_of_reverse_nxv4f32(
97; CHECK-NEXT:    [[REV:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[V0:%.*]])
98; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[REV]])
99; CHECK-NEXT:    ret float [[RED]]
100;
101  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
102  %red = call float @llvm.vector.reduce.fmul.nxv4f32(float zeroinitializer, <vscale x 4 x float> %rev)
103  ret float %red
104}
105
106
107; negative test - fsub must allow reassociation
108
109define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
110; CHECK-LABEL: @diff_of_sums_v4f32_fmf(
111; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
112; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
113; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]]
114; CHECK-NEXT:    ret float [[R]]
115;
116  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
117  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
118  %r = fsub ninf nnan nsz float %r0, %r1
119  ret float %r
120}
121
122; negative test - extra uses could create extra instructions
123
124define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
125; CHECK-LABEL: @diff_of_sums_extra_use1(
126; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
127; CHECK-NEXT:    call void @use_f32(float [[R0]])
128; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
129; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
130; CHECK-NEXT:    ret float [[R]]
131;
132  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
133  call void @use_f32(float %r0)
134  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
135  %r = fsub fast float %r0, %r1
136  ret float %r
137}
138
139; negative test - extra uses could create extra instructions
140
141define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
142; CHECK-LABEL: @diff_of_sums_extra_use2(
143; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
144; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
145; CHECK-NEXT:    call void @use_f32(float [[R1]])
146; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
147; CHECK-NEXT:    ret float [[R]]
148;
149  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
150  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
151  call void @use_f32(float %r1)
152  %r = fsub fast float %r0, %r1
153  ret float %r
154}
155
156; negative test - can't reassociate different vector types
157
158define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {
159; CHECK-LABEL: @diff_of_sums_type_mismatch(
160; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
161; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]])
162; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
163; CHECK-NEXT:    ret float [[R]]
164;
165  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
166  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1)
167  %r = fsub fast float %r0, %r1
168  ret float %r
169}
170
171define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
172; CHECK-LABEL: @diff_of_sums_v4i32(
173; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
174; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
175; CHECK-NEXT:    ret i32 [[R]]
176;
177  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
178  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
179  %r = sub i32 %r0, %r1
180  ret i32 %r
181}
182
183define i32 @sum_of_reverse_v4i32(<4 x i32> %v0) {
184; CHECK-LABEL: @sum_of_reverse_v4i32(
185; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
186; CHECK-NEXT:    ret i32 [[RED]]
187;
188  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
189  %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %rev)
190  ret i32 %red
191}
192
193define i32 @sum_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
194; CHECK-LABEL: @sum_of_reverse_nxv4i32(
195; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
196; CHECK-NEXT:    ret i32 [[RED]]
197;
198  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
199  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %rev)
200  ret i32 %red
201}
202
203define i32 @mul_reduce_of_reverse_v4i32(<4 x i32> %v0) {
204; CHECK-LABEL: @mul_reduce_of_reverse_v4i32(
205; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[V0:%.*]])
206; CHECK-NEXT:    ret i32 [[RED]]
207;
208  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
209  %red = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %rev)
210  ret i32 %red
211}
212
213define i32 @mul_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
214; CHECK-LABEL: @mul_reduce_of_reverse_nxv4i32(
215; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
216; CHECK-NEXT:    ret i32 [[RED]]
217;
218  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
219  %red = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %rev)
220  ret i32 %red
221}
222
223define i32 @smin_reduce_of_reverse_v4i32(<4 x i32> %v0) {
224; CHECK-LABEL: @smin_reduce_of_reverse_v4i32(
225; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[V0:%.*]])
226; CHECK-NEXT:    ret i32 [[RED]]
227;
228  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
229  %red = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %rev)
230  ret i32 %red
231}
232
233define i32 @smax_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
234; CHECK-LABEL: @smax_reduce_of_reverse_nxv4i32(
235; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
236; CHECK-NEXT:    ret i32 [[RED]]
237;
238  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
239  %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %rev)
240  ret i32 %red
241}
242
243define i32 @umin_reduce_of_reverse_v4i32(<4 x i32> %v0) {
244; CHECK-LABEL: @umin_reduce_of_reverse_v4i32(
245; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[V0:%.*]])
246; CHECK-NEXT:    ret i32 [[RED]]
247;
248  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
249  %red = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %rev)
250  ret i32 %red
251}
252
253define i32 @umax_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
254; CHECK-LABEL: @umax_reduce_of_reverse_nxv4i32(
255; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
256; CHECK-NEXT:    ret i32 [[RED]]
257;
258  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
259  %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %rev)
260  ret i32 %red
261}
262
263; negative test - extra uses could create extra instructions
264
265define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
266; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(
267; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
268; CHECK-NEXT:    call void @use_i32(i32 [[R0]])
269; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
270; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
271; CHECK-NEXT:    ret i32 [[R]]
272;
273  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
274  call void @use_i32(i32 %r0)
275  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
276  %r = sub i32 %r0, %r1
277  ret i32 %r
278}
279
280; negative test - extra uses could create extra instructions
281
282define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
283; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(
284; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
285; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
286; CHECK-NEXT:    call void @use_i32(i32 [[R1]])
287; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
288; CHECK-NEXT:    ret i32 [[R]]
289;
290  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
291  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
292  call void @use_i32(i32 %r1)
293  %r = sub i32 %r0, %r1
294  ret i32 %r
295}
296
297; negative test - can't reassociate different vector types
298
299define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
300; CHECK-LABEL: @diff_of_sums_type_mismatch2(
301; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])
302; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
303; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
304; CHECK-NEXT:    ret i32 [[R]]
305;
306  %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0)
307  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
308  %r = sub i32 %r0, %r1
309  ret i32 %r
310}
311