xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll (revision faa2d69e462146543e168cc6c36a28a7e238ecce)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+zbb -riscv-v-vector-bits-min=128 -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
3
4define i64 @reduce_add(i64 %x, <4 x i64> %v) {
5; CHECK-LABEL: reduce_add:
6; CHECK:       # %bb.0: # %entry
7; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
8; CHECK-NEXT:    vmv.s.x v10, a0
9; CHECK-NEXT:    vredsum.vs v8, v8, v10
10; CHECK-NEXT:    vmv.x.s a0, v8
11; CHECK-NEXT:    ret
12entry:
13  %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
14  %res = add i64 %rdx, %x
15  ret i64 %res
16}
17
18define i64 @reduce_add2(<4 x i64> %v) {
19; CHECK-LABEL: reduce_add2:
20; CHECK:       # %bb.0: # %entry
21; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
22; CHECK-NEXT:    vmv.v.i v10, 8
23; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
24; CHECK-NEXT:    vredsum.vs v8, v8, v10
25; CHECK-NEXT:    vmv.x.s a0, v8
26; CHECK-NEXT:    ret
27entry:
28  %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
29  %res = add i64 %rdx, 8
30  ret i64 %res
31}
32
33define i64 @reduce_and(i64 %x, <4 x i64> %v) {
34; CHECK-LABEL: reduce_and:
35; CHECK:       # %bb.0: # %entry
36; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
37; CHECK-NEXT:    vmv.s.x v10, a0
38; CHECK-NEXT:    vredand.vs v8, v8, v10
39; CHECK-NEXT:    vmv.x.s a0, v8
40; CHECK-NEXT:    ret
41entry:
42  %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
43  %res = and i64 %rdx, %x
44  ret i64 %res
45}
46
47define i64 @reduce_and2(<4 x i64> %v) {
48; CHECK-LABEL: reduce_and2:
49; CHECK:       # %bb.0: # %entry
50; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
51; CHECK-NEXT:    vmv.v.i v10, 8
52; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
53; CHECK-NEXT:    vredand.vs v8, v8, v10
54; CHECK-NEXT:    vmv.x.s a0, v8
55; CHECK-NEXT:    ret
56entry:
57  %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
58  %res = and i64 %rdx, 8
59  ret i64 %res
60}
61
62define i64 @reduce_or(i64 %x, <4 x i64> %v) {
63; CHECK-LABEL: reduce_or:
64; CHECK:       # %bb.0: # %entry
65; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
66; CHECK-NEXT:    vmv.s.x v10, a0
67; CHECK-NEXT:    vredor.vs v8, v8, v10
68; CHECK-NEXT:    vmv.x.s a0, v8
69; CHECK-NEXT:    ret
70entry:
71  %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
72  %res = or i64 %rdx, %x
73  ret i64 %res
74}
75
76define i64 @reduce_or2(<4 x i64> %v) {
77; CHECK-LABEL: reduce_or2:
78; CHECK:       # %bb.0: # %entry
79; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
80; CHECK-NEXT:    vmv.v.i v10, 8
81; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
82; CHECK-NEXT:    vredor.vs v8, v8, v10
83; CHECK-NEXT:    vmv.x.s a0, v8
84; CHECK-NEXT:    ret
85entry:
86  %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
87  %res = or i64 %rdx, 8
88  ret i64 %res
89}
90
91define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
92; CHECK-LABEL: reduce_xor:
93; CHECK:       # %bb.0: # %entry
94; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
95; CHECK-NEXT:    vmv.s.x v10, a0
96; CHECK-NEXT:    vredxor.vs v8, v8, v10
97; CHECK-NEXT:    vmv.x.s a0, v8
98; CHECK-NEXT:    ret
99entry:
100  %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
101  %res = xor i64 %rdx, %x
102  ret i64 %res
103}
104
105define i64 @reduce_xor2(<4 x i64> %v) {
106; CHECK-LABEL: reduce_xor2:
107; CHECK:       # %bb.0: # %entry
108; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
109; CHECK-NEXT:    vmv.s.x v10, zero
110; CHECK-NEXT:    vredxor.vs v8, v8, v10
111; CHECK-NEXT:    vmv.x.s a0, v8
112; CHECK-NEXT:    andi a0, a0, 8
113; CHECK-NEXT:    ret
114entry:
115  %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
116  %res = and i64 %rdx, 8
117  ret i64 %res
118}
119
120define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
121; CHECK-LABEL: reduce_umax:
122; CHECK:       # %bb.0: # %entry
123; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
124; CHECK-NEXT:    vmv.s.x v10, a0
125; CHECK-NEXT:    vredmaxu.vs v8, v8, v10
126; CHECK-NEXT:    vmv.x.s a0, v8
127; CHECK-NEXT:    ret
128entry:
129  %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
130  %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x)
131  ret i64 %res
132}
133
134define i64 @reduce_umax2(<4 x i64> %v) {
135; CHECK-LABEL: reduce_umax2:
136; CHECK:       # %bb.0: # %entry
137; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
138; CHECK-NEXT:    vmv.v.i v10, 8
139; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
140; CHECK-NEXT:    vredmaxu.vs v8, v8, v10
141; CHECK-NEXT:    vmv.x.s a0, v8
142; CHECK-NEXT:    ret
143entry:
144  %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
145  %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8)
146  ret i64 %res
147}
148
149define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
150; CHECK-LABEL: reduce_umin:
151; CHECK:       # %bb.0: # %entry
152; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
153; CHECK-NEXT:    vmv.s.x v10, a0
154; CHECK-NEXT:    vredminu.vs v8, v8, v10
155; CHECK-NEXT:    vmv.x.s a0, v8
156; CHECK-NEXT:    ret
157entry:
158  %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
159  %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x)
160  ret i64 %res
161}
162
163define i64 @reduce_umin2(<4 x i64> %v) {
164; CHECK-LABEL: reduce_umin2:
165; CHECK:       # %bb.0: # %entry
166; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
167; CHECK-NEXT:    vmv.v.i v10, 8
168; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
169; CHECK-NEXT:    vredminu.vs v8, v8, v10
170; CHECK-NEXT:    vmv.x.s a0, v8
171; CHECK-NEXT:    ret
172entry:
173  %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
174  %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8)
175  ret i64 %res
176}
177
178define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
179; CHECK-LABEL: reduce_smax:
180; CHECK:       # %bb.0: # %entry
181; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
182; CHECK-NEXT:    vmv.s.x v10, a0
183; CHECK-NEXT:    vredmax.vs v8, v8, v10
184; CHECK-NEXT:    vmv.x.s a0, v8
185; CHECK-NEXT:    ret
186entry:
187  %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
188  %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x)
189  ret i64 %res
190}
191
192define i64 @reduce_smax2(<4 x i64> %v) {
193; CHECK-LABEL: reduce_smax2:
194; CHECK:       # %bb.0: # %entry
195; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
196; CHECK-NEXT:    vmv.v.i v10, 8
197; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
198; CHECK-NEXT:    vredmax.vs v8, v8, v10
199; CHECK-NEXT:    vmv.x.s a0, v8
200; CHECK-NEXT:    ret
201entry:
202  %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
203  %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8)
204  ret i64 %res
205}
206
207define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
208; CHECK-LABEL: reduce_smin:
209; CHECK:       # %bb.0: # %entry
210; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
211; CHECK-NEXT:    vmv.s.x v10, a0
212; CHECK-NEXT:    vredmin.vs v8, v8, v10
213; CHECK-NEXT:    vmv.x.s a0, v8
214; CHECK-NEXT:    ret
215entry:
216  %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
217  %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x)
218  ret i64 %res
219}
220
221define i64 @reduce_smin2(<4 x i64> %v) {
222; CHECK-LABEL: reduce_smin2:
223; CHECK:       # %bb.0: # %entry
224; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
225; CHECK-NEXT:    vmv.v.i v10, 8
226; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
227; CHECK-NEXT:    vredmin.vs v8, v8, v10
228; CHECK-NEXT:    vmv.x.s a0, v8
229; CHECK-NEXT:    ret
230entry:
231  %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
232  %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8)
233  ret i64 %res
234}
235
236define float @reduce_fadd(float %x, <4 x float> %v) {
237; CHECK-LABEL: reduce_fadd:
238; CHECK:       # %bb.0: # %entry
239; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
240; CHECK-NEXT:    vfmv.s.f v9, fa0
241; CHECK-NEXT:    vfredusum.vs v8, v8, v9
242; CHECK-NEXT:    vfmv.f.s fa0, v8
243; CHECK-NEXT:    ret
244entry:
245  %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v)
246  ret float %rdx
247}
248
249define float @reduce_fadd2(float %x, <4 x float> %v) {
250; CHECK-LABEL: reduce_fadd2:
251; CHECK:       # %bb.0: # %entry
252; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
253; CHECK-NEXT:    vfmv.s.f v9, fa0
254; CHECK-NEXT:    vfredusum.vs v8, v8, v9
255; CHECK-NEXT:    vfmv.f.s fa0, v8
256; CHECK-NEXT:    ret
257entry:
258  %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v)
259  %res = fadd fast float %rdx, %x
260  ret float %res
261}
262
263define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) {
264; CHECK-LABEL: reduce_fadd3:
265; CHECK:       # %bb.0: # %entry
266; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
267; CHECK-NEXT:    vmv.s.x v9, zero
268; CHECK-NEXT:    vfredusum.vs v8, v8, v9
269; CHECK-NEXT:    vfmv.f.s fa5, v8
270; CHECK-NEXT:    fadd.s fa0, fa5, fa0
271; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
272; CHECK-NEXT:    vse32.v v8, (a0)
273; CHECK-NEXT:    ret
274entry:
275  %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)
276  %res = fadd fast float %rdx, %x
277  store float %rdx, ptr %rdxptr
278  ret float %res
279}
280
281define float @reduce_fmax(float %x, <4 x float> %v) {
282; CHECK-LABEL: reduce_fmax:
283; CHECK:       # %bb.0: # %entry
284; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
285; CHECK-NEXT:    vfmv.s.f v9, fa0
286; CHECK-NEXT:    vfredmax.vs v8, v8, v9
287; CHECK-NEXT:    vfmv.f.s fa0, v8
288; CHECK-NEXT:    ret
289entry:
290  %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
291  %res = call float @llvm.maxnum.f32(float %x, float %rdx)
292  ret float %res
293}
294
295define float @reduce_fmin(float %x, <4 x float> %v) {
296; CHECK-LABEL: reduce_fmin:
297; CHECK:       # %bb.0: # %entry
298; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
299; CHECK-NEXT:    vfmv.s.f v9, fa0
300; CHECK-NEXT:    vfredmin.vs v8, v8, v9
301; CHECK-NEXT:    vfmv.f.s fa0, v8
302; CHECK-NEXT:    ret
303entry:
304  %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
305  %res = call float @llvm.minnum.f32(float %x, float %rdx)
306  ret float %res
307}
308
309; Function Attrs: nofree nosync nounwind readnone willreturn
310declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
311declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
312declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
313declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
314declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
315declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
316declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
317declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
318declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
319declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
320declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
321declare i64 @llvm.umax.i64(i64, i64)
322declare i64 @llvm.umin.i64(i64, i64)
323declare i64 @llvm.smax.i64(i64, i64)
324declare i64 @llvm.smin.i64(i64, i64)
325declare float @llvm.maxnum.f32(float ,float)
326declare float @llvm.minnum.f32(float ,float)
327