xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll (revision 1a1c59f9958609438c13af73444061d85caa749c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+zbb -riscv-v-vector-bits-min=128 -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
3
4define i64 @reduce_add(i64 %x, <4 x i64> %v) {
5; CHECK-LABEL: reduce_add:
6; CHECK:       # %bb.0: # %entry
7; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
8; CHECK-NEXT:    vmv.s.x v10, a0
9; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
10; CHECK-NEXT:    vredsum.vs v8, v8, v10
11; CHECK-NEXT:    vmv.x.s a0, v8
12; CHECK-NEXT:    ret
13entry:
14  %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
15  %res = add i64 %rdx, %x
16  ret i64 %res
17}
18
19define i64 @reduce_add2(<4 x i64> %v) {
20; CHECK-LABEL: reduce_add2:
21; CHECK:       # %bb.0: # %entry
22; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
23; CHECK-NEXT:    vmv.v.i v10, 8
24; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
25; CHECK-NEXT:    vredsum.vs v8, v8, v10
26; CHECK-NEXT:    vmv.x.s a0, v8
27; CHECK-NEXT:    ret
28entry:
29  %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
30  %res = add i64 %rdx, 8
31  ret i64 %res
32}
33
34define i64 @reduce_and(i64 %x, <4 x i64> %v) {
35; CHECK-LABEL: reduce_and:
36; CHECK:       # %bb.0: # %entry
37; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
38; CHECK-NEXT:    vmv.s.x v10, a0
39; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
40; CHECK-NEXT:    vredand.vs v8, v8, v10
41; CHECK-NEXT:    vmv.x.s a0, v8
42; CHECK-NEXT:    ret
43entry:
44  %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
45  %res = and i64 %rdx, %x
46  ret i64 %res
47}
48
49define i64 @reduce_and2(<4 x i64> %v) {
50; CHECK-LABEL: reduce_and2:
51; CHECK:       # %bb.0: # %entry
52; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
53; CHECK-NEXT:    vmv.v.i v10, 8
54; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
55; CHECK-NEXT:    vredand.vs v8, v8, v10
56; CHECK-NEXT:    vmv.x.s a0, v8
57; CHECK-NEXT:    ret
58entry:
59  %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
60  %res = and i64 %rdx, 8
61  ret i64 %res
62}
63
64define i64 @reduce_or(i64 %x, <4 x i64> %v) {
65; CHECK-LABEL: reduce_or:
66; CHECK:       # %bb.0: # %entry
67; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
68; CHECK-NEXT:    vmv.s.x v10, a0
69; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
70; CHECK-NEXT:    vredor.vs v8, v8, v10
71; CHECK-NEXT:    vmv.x.s a0, v8
72; CHECK-NEXT:    ret
73entry:
74  %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
75  %res = or i64 %rdx, %x
76  ret i64 %res
77}
78
79define i64 @reduce_or2(<4 x i64> %v) {
80; CHECK-LABEL: reduce_or2:
81; CHECK:       # %bb.0: # %entry
82; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
83; CHECK-NEXT:    vmv.v.i v10, 8
84; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
85; CHECK-NEXT:    vredor.vs v8, v8, v10
86; CHECK-NEXT:    vmv.x.s a0, v8
87; CHECK-NEXT:    ret
88entry:
89  %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
90  %res = or i64 %rdx, 8
91  ret i64 %res
92}
93
94define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
95; CHECK-LABEL: reduce_xor:
96; CHECK:       # %bb.0: # %entry
97; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
98; CHECK-NEXT:    vmv.s.x v10, a0
99; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
100; CHECK-NEXT:    vredxor.vs v8, v8, v10
101; CHECK-NEXT:    vmv.x.s a0, v8
102; CHECK-NEXT:    ret
103entry:
104  %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
105  %res = xor i64 %rdx, %x
106  ret i64 %res
107}
108
109define i64 @reduce_xor2(<4 x i64> %v) {
110; CHECK-LABEL: reduce_xor2:
111; CHECK:       # %bb.0: # %entry
112; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
113; CHECK-NEXT:    vmv.s.x v10, zero
114; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
115; CHECK-NEXT:    vredxor.vs v8, v8, v10
116; CHECK-NEXT:    vmv.x.s a0, v8
117; CHECK-NEXT:    andi a0, a0, 8
118; CHECK-NEXT:    ret
119entry:
120  %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
121  %res = and i64 %rdx, 8
122  ret i64 %res
123}
124
125define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
126; CHECK-LABEL: reduce_umax:
127; CHECK:       # %bb.0: # %entry
128; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
129; CHECK-NEXT:    vmv.s.x v10, a0
130; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
131; CHECK-NEXT:    vredmaxu.vs v8, v8, v10
132; CHECK-NEXT:    vmv.x.s a0, v8
133; CHECK-NEXT:    ret
134entry:
135  %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
136  %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x)
137  ret i64 %res
138}
139
140define i64 @reduce_umax2(<4 x i64> %v) {
141; CHECK-LABEL: reduce_umax2:
142; CHECK:       # %bb.0: # %entry
143; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
144; CHECK-NEXT:    vmv.v.i v10, 8
145; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
146; CHECK-NEXT:    vredmaxu.vs v8, v8, v10
147; CHECK-NEXT:    vmv.x.s a0, v8
148; CHECK-NEXT:    ret
149entry:
150  %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
151  %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8)
152  ret i64 %res
153}
154
155define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
156; CHECK-LABEL: reduce_umin:
157; CHECK:       # %bb.0: # %entry
158; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
159; CHECK-NEXT:    vmv.s.x v10, a0
160; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
161; CHECK-NEXT:    vredminu.vs v8, v8, v10
162; CHECK-NEXT:    vmv.x.s a0, v8
163; CHECK-NEXT:    ret
164entry:
165  %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
166  %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x)
167  ret i64 %res
168}
169
170define i64 @reduce_umin2(<4 x i64> %v) {
171; CHECK-LABEL: reduce_umin2:
172; CHECK:       # %bb.0: # %entry
173; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
174; CHECK-NEXT:    vmv.v.i v10, 8
175; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
176; CHECK-NEXT:    vredminu.vs v8, v8, v10
177; CHECK-NEXT:    vmv.x.s a0, v8
178; CHECK-NEXT:    ret
179entry:
180  %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
181  %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8)
182  ret i64 %res
183}
184
185define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
186; CHECK-LABEL: reduce_smax:
187; CHECK:       # %bb.0: # %entry
188; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
189; CHECK-NEXT:    vmv.s.x v10, a0
190; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
191; CHECK-NEXT:    vredmax.vs v8, v8, v10
192; CHECK-NEXT:    vmv.x.s a0, v8
193; CHECK-NEXT:    ret
194entry:
195  %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
196  %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x)
197  ret i64 %res
198}
199
200define i64 @reduce_smax2(<4 x i64> %v) {
201; CHECK-LABEL: reduce_smax2:
202; CHECK:       # %bb.0: # %entry
203; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
204; CHECK-NEXT:    vmv.v.i v10, 8
205; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
206; CHECK-NEXT:    vredmax.vs v8, v8, v10
207; CHECK-NEXT:    vmv.x.s a0, v8
208; CHECK-NEXT:    ret
209entry:
210  %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
211  %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8)
212  ret i64 %res
213}
214
215define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
216; CHECK-LABEL: reduce_smin:
217; CHECK:       # %bb.0: # %entry
218; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
219; CHECK-NEXT:    vmv.s.x v10, a0
220; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
221; CHECK-NEXT:    vredmin.vs v8, v8, v10
222; CHECK-NEXT:    vmv.x.s a0, v8
223; CHECK-NEXT:    ret
224entry:
225  %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
226  %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x)
227  ret i64 %res
228}
229
230define i64 @reduce_smin2(<4 x i64> %v) {
231; CHECK-LABEL: reduce_smin2:
232; CHECK:       # %bb.0: # %entry
233; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
234; CHECK-NEXT:    vmv.v.i v10, 8
235; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
236; CHECK-NEXT:    vredmin.vs v8, v8, v10
237; CHECK-NEXT:    vmv.x.s a0, v8
238; CHECK-NEXT:    ret
239entry:
240  %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
241  %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8)
242  ret i64 %res
243}
244
245define float @reduce_fadd(float %x, <4 x float> %v) {
246; CHECK-LABEL: reduce_fadd:
247; CHECK:       # %bb.0: # %entry
248; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
249; CHECK-NEXT:    vfmv.s.f v9, fa0
250; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
251; CHECK-NEXT:    vfredusum.vs v8, v8, v9
252; CHECK-NEXT:    vfmv.f.s fa0, v8
253; CHECK-NEXT:    ret
254entry:
255  %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v)
256  ret float %rdx
257}
258
259define float @reduce_fadd2(float %x, <4 x float> %v) {
260; CHECK-LABEL: reduce_fadd2:
261; CHECK:       # %bb.0: # %entry
262; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
263; CHECK-NEXT:    vfmv.s.f v9, fa0
264; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
265; CHECK-NEXT:    vfredusum.vs v8, v8, v9
266; CHECK-NEXT:    vfmv.f.s fa0, v8
267; CHECK-NEXT:    ret
268entry:
269  %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v)
270  %res = fadd fast float %rdx, %x
271  ret float %res
272}
273
274define float @reduce_fmax(float %x, <4 x float> %v) {
275; CHECK-LABEL: reduce_fmax:
276; CHECK:       # %bb.0: # %entry
277; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
278; CHECK-NEXT:    vfmv.s.f v9, fa0
279; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
280; CHECK-NEXT:    vfredmax.vs v8, v8, v9
281; CHECK-NEXT:    vfmv.f.s fa0, v8
282; CHECK-NEXT:    ret
283entry:
284  %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
285  %res = call float @llvm.maxnum.f32(float %x, float %rdx)
286  ret float %res
287}
288
289define float @reduce_fmin(float %x, <4 x float> %v) {
290; CHECK-LABEL: reduce_fmin:
291; CHECK:       # %bb.0: # %entry
292; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
293; CHECK-NEXT:    vfmv.s.f v9, fa0
294; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
295; CHECK-NEXT:    vfredmin.vs v8, v8, v9
296; CHECK-NEXT:    vfmv.f.s fa0, v8
297; CHECK-NEXT:    ret
298entry:
299  %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
300  %res = call float @llvm.minnum.f32(float %x, float %rdx)
301  ret float %res
302}
303
304; Function Attrs: nofree nosync nounwind readnone willreturn
305declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
306declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
307declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
308declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
309declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
310declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
311declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
312declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
313declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
314declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
315declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
316declare i64 @llvm.umax.i64(i64, i64)
317declare i64 @llvm.umin.i64(i64, i64)
318declare i64 @llvm.smax.i64(i64, i64)
319declare i64 @llvm.smin.i64(i64, i64)
320declare float @llvm.maxnum.f32(float ,float)
321declare float @llvm.minnum.f32(float ,float)
322