xref: /llvm-project/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll (revision 8b56da5e9f3ba737a5ff4bf5dee654416849042f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4define i32 @reduce_add(<4 x i32> %x) {
5; CHECK-LABEL: @reduce_add(
6; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
7; CHECK-NEXT:    ret i32 [[RES]]
8;
9  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10  %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
11  ret i32 %res
12}
13
14define i32 @reduce_or(<4 x i32> %x) {
15; CHECK-LABEL: @reduce_or(
16; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
17; CHECK-NEXT:    ret i32 [[RES]]
18;
19  %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
20  %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
21  ret i32 %res
22}
23
24define i32 @reduce_and(<4 x i32> %x) {
25; CHECK-LABEL: @reduce_and(
26; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
27; CHECK-NEXT:    ret i32 [[RES]]
28;
29  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
30  %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
31  ret i32 %res
32}
33
34define i32 @reduce_xor(<4 x i32> %x) {
35; CHECK-LABEL: @reduce_xor(
36; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
37; CHECK-NEXT:    ret i32 [[RES]]
38;
39  %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
40  %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
41  ret i32 %res
42}
43
44define i32 @reduce_umax(<4 x i32> %x) {
45; CHECK-LABEL: @reduce_umax(
46; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
47; CHECK-NEXT:    ret i32 [[RES]]
48;
49  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
50  %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
51  ret i32 %res
52}
53
54define i32 @reduce_umin(<4 x i32> %x) {
55; CHECK-LABEL: @reduce_umin(
56; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
57; CHECK-NEXT:    ret i32 [[RES]]
58;
59  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
60  %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
61  ret i32 %res
62}
63
64define i32 @reduce_smax(<4 x i32> %x) {
65; CHECK-LABEL: @reduce_smax(
66; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
67; CHECK-NEXT:    ret i32 [[RES]]
68;
69  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
70  %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
71  ret i32 %res
72}
73
74define i32 @reduce_smin(<4 x i32> %x) {
75; CHECK-LABEL: @reduce_smin(
76; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
77; CHECK-NEXT:    ret i32 [[RES]]
78;
79  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
80  %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
81  ret i32 %res
82}
83
84define float @reduce_fmax(<4 x float> %x) {
85; CHECK-LABEL: @reduce_fmax(
86; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
87; CHECK-NEXT:    ret float [[RES]]
88;
89  %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
90  %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
91  ret float %res
92}
93
94define float @reduce_fmin(<4 x float> %x) {
95; CHECK-LABEL: @reduce_fmin(
96; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
97; CHECK-NEXT:    ret float [[RES]]
98;
99  %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
100  %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
101  ret float %res
102}
103
104define float @reduce_fadd(float %a, <4 x float> %x) {
105; CHECK-LABEL: @reduce_fadd(
106; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
107; CHECK-NEXT:    ret float [[RES]]
108;
109  %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
110  %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
111  ret float %res
112}
113
114define float @reduce_fmul(float %a, <4 x float> %x) {
115; CHECK-LABEL: @reduce_fmul(
116; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
117; CHECK-NEXT:    ret float [[RES]]
118;
119  %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
120  %res = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
121  ret float %res
122}
123
124; Failed cases
125; TODO: simplify the reductions for shuffles resulting in undef/poison elements.
126
127define i32 @reduce_add_failed(<4 x i32> %x) {
128; CHECK-LABEL: @reduce_add_failed(
129; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
130; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUF]])
131; CHECK-NEXT:    ret i32 [[RES]]
132;
133  %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
134  %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
135  ret i32 %res
136}
137
138define i32 @reduce_or_failed(<4 x i32> %x) {
139; CHECK-LABEL: @reduce_or_failed(
140; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
141; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
142; CHECK-NEXT:    ret i32 [[RES]]
143;
144  %shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
145  %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
146  ret i32 %res
147}
148
149define i32 @reduce_and_failed(<4 x i32> %x) {
150; CHECK-LABEL: @reduce_and_failed(
151; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
152; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
153; CHECK-NEXT:    ret i32 [[RES]]
154;
155  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
156  %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
157  ret i32 %res
158}
159
160define i32 @reduce_xor_failed(<4 x i32> %x) {
161; CHECK-LABEL: @reduce_xor_failed(
162; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 poison>
163; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
164; CHECK-NEXT:    ret i32 [[RES]]
165;
166  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
167  %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
168  ret i32 %res
169}
170
171define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) {
172; CHECK-LABEL: @reduce_umax_failed(
173; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <4 x i32> <i32 2, i32 1, i32 3, i32 0>
174; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
175; CHECK-NEXT:    ret i32 [[RES]]
176;
177  %shuf = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
178  %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
179  ret i32 %res
180}
181
182define i32 @reduce_umin_failed(<2 x i32> %x) {
183; CHECK-LABEL: @reduce_umin_failed(
184; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
185; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
186; CHECK-NEXT:    ret i32 [[RES]]
187;
188  %shuf = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
189  %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
190  ret i32 %res
191}
192
193define i32 @reduce_smax_failed(<8 x i32> %x) {
194; CHECK-LABEL: @reduce_smax_failed(
195; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
196; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
197; CHECK-NEXT:    ret i32 [[RES]]
198;
199  %shuf = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
200  %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
201  ret i32 %res
202}
203
204define i32 @reduce_smin_failed(<8 x i32> %x) {
205; CHECK-LABEL: @reduce_smin_failed(
206; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
207; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
208; CHECK-NEXT:    ret i32 [[RES]]
209;
210  %shuf = shufflevector <8 x i32> %x, <8 x i32> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
211  %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
212  ret i32 %res
213}
214
215define float @reduce_fmax_failed(<4 x float> %x) {
216; CHECK-LABEL: @reduce_fmax_failed(
217; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
218; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
219; CHECK-NEXT:    ret float [[RES]]
220;
221  %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
222  %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
223  ret float %res
224}
225
226define float @reduce_fmin_failed(<4 x float> %x) {
227; CHECK-LABEL: @reduce_fmin_failed(
228; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2>
229; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
230; CHECK-NEXT:    ret float [[RES]]
231;
232  %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2>
233  %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
234  ret float %res
235}
236
237define float @reduce_fadd_failed(float %a, <4 x float> %x) {
238; CHECK-LABEL: @reduce_fadd_failed(
239; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
240; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
241; CHECK-NEXT:    ret float [[RES]]
242;
243  %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
244  %res = call float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
245  ret float %res
246}
247
248define float @reduce_fmul_failed(float %a, <2 x float> %x) {
249; CHECK-LABEL: @reduce_fmul_failed(
250; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
251; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
252; CHECK-NEXT:    ret float [[RES]]
253;
254  %shuf = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
255  %res = call float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
256  ret float %res
257}
258
259declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
260declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
261declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
262declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
263declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
264declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
265declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
266declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
267declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
268declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
269declare float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %b)
270declare float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %b)
271