xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
3; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
5; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
8
9define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
10; CHECK-LABEL: vreduce_add_nxv1i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
13; CHECK-NEXT:    vmv.s.x v9, zero
14; CHECK-NEXT:    vredsum.vs v8, v8, v9
15; CHECK-NEXT:    vmv.x.s a0, v8
16; CHECK-NEXT:    ret
17  %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
18  ret i8 %red
19}
20
21declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
22
23define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
24; CHECK-LABEL: vreduce_umax_nxv1i8:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
27; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
28; CHECK-NEXT:    vmv.x.s a0, v8
29; CHECK-NEXT:    ret
30  %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
31  ret i8 %red
32}
33
34declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
35
36define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
37; CHECK-LABEL: vreduce_smax_nxv1i8:
38; CHECK:       # %bb.0:
39; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
40; CHECK-NEXT:    vredmax.vs v8, v8, v8
41; CHECK-NEXT:    vmv.x.s a0, v8
42; CHECK-NEXT:    ret
43  %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
44  ret i8 %red
45}
46
47declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
48
49define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
50; CHECK-LABEL: vreduce_umin_nxv1i8:
51; CHECK:       # %bb.0:
52; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
53; CHECK-NEXT:    vredminu.vs v8, v8, v8
54; CHECK-NEXT:    vmv.x.s a0, v8
55; CHECK-NEXT:    ret
56  %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
57  ret i8 %red
58}
59
60declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
61
62define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
63; CHECK-LABEL: vreduce_smin_nxv1i8:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
66; CHECK-NEXT:    vredmin.vs v8, v8, v8
67; CHECK-NEXT:    vmv.x.s a0, v8
68; CHECK-NEXT:    ret
69  %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
70  ret i8 %red
71}
72
73declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
74
75define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
76; CHECK-LABEL: vreduce_and_nxv1i8:
77; CHECK:       # %bb.0:
78; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
79; CHECK-NEXT:    vredand.vs v8, v8, v8
80; CHECK-NEXT:    vmv.x.s a0, v8
81; CHECK-NEXT:    ret
82  %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
83  ret i8 %red
84}
85
86declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
87
88define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
89; CHECK-LABEL: vreduce_or_nxv1i8:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
92; CHECK-NEXT:    vredor.vs v8, v8, v8
93; CHECK-NEXT:    vmv.x.s a0, v8
94; CHECK-NEXT:    ret
95  %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
96  ret i8 %red
97}
98
99declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
100
101define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
102; CHECK-LABEL: vreduce_xor_nxv1i8:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
105; CHECK-NEXT:    vmv.s.x v9, zero
106; CHECK-NEXT:    vredxor.vs v8, v8, v9
107; CHECK-NEXT:    vmv.x.s a0, v8
108; CHECK-NEXT:    ret
109  %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
110  ret i8 %red
111}
112
113declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
114
115define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
116; CHECK-LABEL: vreduce_add_nxv2i8:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
119; CHECK-NEXT:    vmv.s.x v9, zero
120; CHECK-NEXT:    vredsum.vs v8, v8, v9
121; CHECK-NEXT:    vmv.x.s a0, v8
122; CHECK-NEXT:    ret
123  %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
124  ret i8 %red
125}
126
127declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
128
129define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
130; CHECK-LABEL: vreduce_umax_nxv2i8:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
133; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
134; CHECK-NEXT:    vmv.x.s a0, v8
135; CHECK-NEXT:    ret
136  %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
137  ret i8 %red
138}
139
140declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
141
142define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
143; CHECK-LABEL: vreduce_smax_nxv2i8:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
146; CHECK-NEXT:    vredmax.vs v8, v8, v8
147; CHECK-NEXT:    vmv.x.s a0, v8
148; CHECK-NEXT:    ret
149  %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
150  ret i8 %red
151}
152
153declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
154
155define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
156; CHECK-LABEL: vreduce_umin_nxv2i8:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
159; CHECK-NEXT:    vredminu.vs v8, v8, v8
160; CHECK-NEXT:    vmv.x.s a0, v8
161; CHECK-NEXT:    ret
162  %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
163  ret i8 %red
164}
165
166declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
167
168define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
169; CHECK-LABEL: vreduce_smin_nxv2i8:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
172; CHECK-NEXT:    vredmin.vs v8, v8, v8
173; CHECK-NEXT:    vmv.x.s a0, v8
174; CHECK-NEXT:    ret
175  %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
176  ret i8 %red
177}
178
179declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
180
181define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
182; CHECK-LABEL: vreduce_and_nxv2i8:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
185; CHECK-NEXT:    vredand.vs v8, v8, v8
186; CHECK-NEXT:    vmv.x.s a0, v8
187; CHECK-NEXT:    ret
188  %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
189  ret i8 %red
190}
191
192declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
193
194define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
195; CHECK-LABEL: vreduce_or_nxv2i8:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
198; CHECK-NEXT:    vredor.vs v8, v8, v8
199; CHECK-NEXT:    vmv.x.s a0, v8
200; CHECK-NEXT:    ret
201  %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
202  ret i8 %red
203}
204
205declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
206
207define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
208; CHECK-LABEL: vreduce_xor_nxv2i8:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
211; CHECK-NEXT:    vmv.s.x v9, zero
212; CHECK-NEXT:    vredxor.vs v8, v8, v9
213; CHECK-NEXT:    vmv.x.s a0, v8
214; CHECK-NEXT:    ret
215  %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
216  ret i8 %red
217}
218
219declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
220
221define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
222; CHECK-LABEL: vreduce_add_nxv4i8:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
225; CHECK-NEXT:    vmv.s.x v9, zero
226; CHECK-NEXT:    vredsum.vs v8, v8, v9
227; CHECK-NEXT:    vmv.x.s a0, v8
228; CHECK-NEXT:    ret
229  %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
230  ret i8 %red
231}
232
233declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
234
235define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
236; CHECK-LABEL: vreduce_umax_nxv4i8:
237; CHECK:       # %bb.0:
238; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
239; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
240; CHECK-NEXT:    vmv.x.s a0, v8
241; CHECK-NEXT:    ret
242  %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
243  ret i8 %red
244}
245
246declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
247
248define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
249; CHECK-LABEL: vreduce_smax_nxv4i8:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
252; CHECK-NEXT:    vredmax.vs v8, v8, v8
253; CHECK-NEXT:    vmv.x.s a0, v8
254; CHECK-NEXT:    ret
255  %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
256  ret i8 %red
257}
258
259declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
260
261define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
262; CHECK-LABEL: vreduce_umin_nxv4i8:
263; CHECK:       # %bb.0:
264; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
265; CHECK-NEXT:    vredminu.vs v8, v8, v8
266; CHECK-NEXT:    vmv.x.s a0, v8
267; CHECK-NEXT:    ret
268  %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
269  ret i8 %red
270}
271
272declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
273
274define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
275; CHECK-LABEL: vreduce_smin_nxv4i8:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
278; CHECK-NEXT:    vredmin.vs v8, v8, v8
279; CHECK-NEXT:    vmv.x.s a0, v8
280; CHECK-NEXT:    ret
281  %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
282  ret i8 %red
283}
284
285declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
286
287define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
288; CHECK-LABEL: vreduce_and_nxv4i8:
289; CHECK:       # %bb.0:
290; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
291; CHECK-NEXT:    vredand.vs v8, v8, v8
292; CHECK-NEXT:    vmv.x.s a0, v8
293; CHECK-NEXT:    ret
294  %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
295  ret i8 %red
296}
297
298declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
299
300define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
301; CHECK-LABEL: vreduce_or_nxv4i8:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
304; CHECK-NEXT:    vredor.vs v8, v8, v8
305; CHECK-NEXT:    vmv.x.s a0, v8
306; CHECK-NEXT:    ret
307  %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
308  ret i8 %red
309}
310
311declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
312
313define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
314; CHECK-LABEL: vreduce_xor_nxv4i8:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
317; CHECK-NEXT:    vmv.s.x v9, zero
318; CHECK-NEXT:    vredxor.vs v8, v8, v9
319; CHECK-NEXT:    vmv.x.s a0, v8
320; CHECK-NEXT:    ret
321  %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
322  ret i8 %red
323}
324
325declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
326
327define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
328; CHECK-LABEL: vreduce_add_nxv1i16:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
331; CHECK-NEXT:    vmv.s.x v9, zero
332; CHECK-NEXT:    vredsum.vs v8, v8, v9
333; CHECK-NEXT:    vmv.x.s a0, v8
334; CHECK-NEXT:    ret
335  %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
336  ret i16 %red
337}
338
339define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
340; CHECK-LABEL: vwreduce_add_nxv1i8:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
343; CHECK-NEXT:    vmv.s.x v9, zero
344; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
345; CHECK-NEXT:    vwredsum.vs v8, v8, v9
346; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
347; CHECK-NEXT:    vmv.x.s a0, v8
348; CHECK-NEXT:    ret
349  %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
350  %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
351  ret i16 %red
352}
353
354define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
355; CHECK-LABEL: vwreduce_uadd_nxv1i8:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
358; CHECK-NEXT:    vmv.s.x v9, zero
359; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
360; CHECK-NEXT:    vwredsum.vs v8, v8, v9
361; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
362; CHECK-NEXT:    vmv.x.s a0, v8
363; CHECK-NEXT:    ret
364  %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
365  %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
366  ret i16 %red
367}
368
369declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
370
371define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
372; CHECK-LABEL: vreduce_umax_nxv1i16:
373; CHECK:       # %bb.0:
374; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
375; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
376; CHECK-NEXT:    vmv.x.s a0, v8
377; CHECK-NEXT:    ret
378  %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
379  ret i16 %red
380}
381
382declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
383
384define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
385; CHECK-LABEL: vreduce_smax_nxv1i16:
386; CHECK:       # %bb.0:
387; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
388; CHECK-NEXT:    vredmax.vs v8, v8, v8
389; CHECK-NEXT:    vmv.x.s a0, v8
390; CHECK-NEXT:    ret
391  %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
392  ret i16 %red
393}
394
395declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
396
397define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
398; CHECK-LABEL: vreduce_umin_nxv1i16:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
401; CHECK-NEXT:    vredminu.vs v8, v8, v8
402; CHECK-NEXT:    vmv.x.s a0, v8
403; CHECK-NEXT:    ret
404  %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
405  ret i16 %red
406}
407
408declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
409
410define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
411; CHECK-LABEL: vreduce_smin_nxv1i16:
412; CHECK:       # %bb.0:
413; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
414; CHECK-NEXT:    vredmin.vs v8, v8, v8
415; CHECK-NEXT:    vmv.x.s a0, v8
416; CHECK-NEXT:    ret
417  %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
418  ret i16 %red
419}
420
421declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
422
423define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
424; CHECK-LABEL: vreduce_and_nxv1i16:
425; CHECK:       # %bb.0:
426; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
427; CHECK-NEXT:    vredand.vs v8, v8, v8
428; CHECK-NEXT:    vmv.x.s a0, v8
429; CHECK-NEXT:    ret
430  %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
431  ret i16 %red
432}
433
434declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
435
436define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
437; CHECK-LABEL: vreduce_or_nxv1i16:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
440; CHECK-NEXT:    vredor.vs v8, v8, v8
441; CHECK-NEXT:    vmv.x.s a0, v8
442; CHECK-NEXT:    ret
443  %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
444  ret i16 %red
445}
446
447declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
448
449define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
450; CHECK-LABEL: vreduce_xor_nxv1i16:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
453; CHECK-NEXT:    vmv.s.x v9, zero
454; CHECK-NEXT:    vredxor.vs v8, v8, v9
455; CHECK-NEXT:    vmv.x.s a0, v8
456; CHECK-NEXT:    ret
457  %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
458  ret i16 %red
459}
460
461declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
462
463define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
464; CHECK-LABEL: vreduce_add_nxv2i16:
465; CHECK:       # %bb.0:
466; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
467; CHECK-NEXT:    vmv.s.x v9, zero
468; CHECK-NEXT:    vredsum.vs v8, v8, v9
469; CHECK-NEXT:    vmv.x.s a0, v8
470; CHECK-NEXT:    ret
471  %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
472  ret i16 %red
473}
474
475define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
476; CHECK-LABEL: vwreduce_add_nxv2i8:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
479; CHECK-NEXT:    vmv.s.x v9, zero
480; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
481; CHECK-NEXT:    vwredsum.vs v8, v8, v9
482; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
483; CHECK-NEXT:    vmv.x.s a0, v8
484; CHECK-NEXT:    ret
485  %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
486  %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
487  ret i16 %red
488}
489
490define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
491; CHECK-LABEL: vwreduce_uadd_nxv2i8:
492; CHECK:       # %bb.0:
493; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
494; CHECK-NEXT:    vmv.s.x v9, zero
495; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
496; CHECK-NEXT:    vwredsum.vs v8, v8, v9
497; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
498; CHECK-NEXT:    vmv.x.s a0, v8
499; CHECK-NEXT:    ret
500  %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
501  %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
502  ret i16 %red
503}
504
505declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
506
507define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
508; CHECK-LABEL: vreduce_umax_nxv2i16:
509; CHECK:       # %bb.0:
510; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
511; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
512; CHECK-NEXT:    vmv.x.s a0, v8
513; CHECK-NEXT:    ret
514  %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
515  ret i16 %red
516}
517
518declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
519
520define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
521; CHECK-LABEL: vreduce_smax_nxv2i16:
522; CHECK:       # %bb.0:
523; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
524; CHECK-NEXT:    vredmax.vs v8, v8, v8
525; CHECK-NEXT:    vmv.x.s a0, v8
526; CHECK-NEXT:    ret
527  %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
528  ret i16 %red
529}
530
531declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
532
533define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
534; CHECK-LABEL: vreduce_umin_nxv2i16:
535; CHECK:       # %bb.0:
536; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
537; CHECK-NEXT:    vredminu.vs v8, v8, v8
538; CHECK-NEXT:    vmv.x.s a0, v8
539; CHECK-NEXT:    ret
540  %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
541  ret i16 %red
542}
543
544declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
545
546define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
547; CHECK-LABEL: vreduce_smin_nxv2i16:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
550; CHECK-NEXT:    vredmin.vs v8, v8, v8
551; CHECK-NEXT:    vmv.x.s a0, v8
552; CHECK-NEXT:    ret
553  %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
554  ret i16 %red
555}
556
557declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
558
559define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
560; CHECK-LABEL: vreduce_and_nxv2i16:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
563; CHECK-NEXT:    vredand.vs v8, v8, v8
564; CHECK-NEXT:    vmv.x.s a0, v8
565; CHECK-NEXT:    ret
566  %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
567  ret i16 %red
568}
569
570declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
571
572define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
573; CHECK-LABEL: vreduce_or_nxv2i16:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
576; CHECK-NEXT:    vredor.vs v8, v8, v8
577; CHECK-NEXT:    vmv.x.s a0, v8
578; CHECK-NEXT:    ret
579  %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
580  ret i16 %red
581}
582
583declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
584
585define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
586; CHECK-LABEL: vreduce_xor_nxv2i16:
587; CHECK:       # %bb.0:
588; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
589; CHECK-NEXT:    vmv.s.x v9, zero
590; CHECK-NEXT:    vredxor.vs v8, v8, v9
591; CHECK-NEXT:    vmv.x.s a0, v8
592; CHECK-NEXT:    ret
593  %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
594  ret i16 %red
595}
596
597declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
598
599define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
600; CHECK-LABEL: vreduce_add_nxv4i16:
601; CHECK:       # %bb.0:
602; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
603; CHECK-NEXT:    vmv.s.x v9, zero
604; CHECK-NEXT:    vredsum.vs v8, v8, v9
605; CHECK-NEXT:    vmv.x.s a0, v8
606; CHECK-NEXT:    ret
607  %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
608  ret i16 %red
609}
610
611define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
612; CHECK-LABEL: vwreduce_add_nxv4i8:
613; CHECK:       # %bb.0:
614; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
615; CHECK-NEXT:    vmv.s.x v9, zero
616; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
617; CHECK-NEXT:    vwredsum.vs v8, v8, v9
618; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
619; CHECK-NEXT:    vmv.x.s a0, v8
620; CHECK-NEXT:    ret
621  %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
622  %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
623  ret i16 %red
624}
625
626define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
627; CHECK-LABEL: vwreduce_uadd_nxv4i8:
628; CHECK:       # %bb.0:
629; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
630; CHECK-NEXT:    vmv.s.x v9, zero
631; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
632; CHECK-NEXT:    vwredsum.vs v8, v8, v9
633; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
634; CHECK-NEXT:    vmv.x.s a0, v8
635; CHECK-NEXT:    ret
636  %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
637  %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
638  ret i16 %red
639}
640
641declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
642
643define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
644; CHECK-LABEL: vreduce_umax_nxv4i16:
645; CHECK:       # %bb.0:
646; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
647; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
648; CHECK-NEXT:    vmv.x.s a0, v8
649; CHECK-NEXT:    ret
650  %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
651  ret i16 %red
652}
653
654declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
655
656define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
657; CHECK-LABEL: vreduce_smax_nxv4i16:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
660; CHECK-NEXT:    vredmax.vs v8, v8, v8
661; CHECK-NEXT:    vmv.x.s a0, v8
662; CHECK-NEXT:    ret
663  %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
664  ret i16 %red
665}
666
667declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
668
669define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
670; CHECK-LABEL: vreduce_umin_nxv4i16:
671; CHECK:       # %bb.0:
672; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
673; CHECK-NEXT:    vredminu.vs v8, v8, v8
674; CHECK-NEXT:    vmv.x.s a0, v8
675; CHECK-NEXT:    ret
676  %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
677  ret i16 %red
678}
679
680declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
681
682define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
683; CHECK-LABEL: vreduce_smin_nxv4i16:
684; CHECK:       # %bb.0:
685; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
686; CHECK-NEXT:    vredmin.vs v8, v8, v8
687; CHECK-NEXT:    vmv.x.s a0, v8
688; CHECK-NEXT:    ret
689  %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
690  ret i16 %red
691}
692
693declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
694
695define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
696; CHECK-LABEL: vreduce_and_nxv4i16:
697; CHECK:       # %bb.0:
698; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
699; CHECK-NEXT:    vredand.vs v8, v8, v8
700; CHECK-NEXT:    vmv.x.s a0, v8
701; CHECK-NEXT:    ret
702  %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
703  ret i16 %red
704}
705
706declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
707
708define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
709; CHECK-LABEL: vreduce_or_nxv4i16:
710; CHECK:       # %bb.0:
711; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
712; CHECK-NEXT:    vredor.vs v8, v8, v8
713; CHECK-NEXT:    vmv.x.s a0, v8
714; CHECK-NEXT:    ret
715  %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
716  ret i16 %red
717}
718
719declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
720
721define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
722; CHECK-LABEL: vreduce_xor_nxv4i16:
723; CHECK:       # %bb.0:
724; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
725; CHECK-NEXT:    vmv.s.x v9, zero
726; CHECK-NEXT:    vredxor.vs v8, v8, v9
727; CHECK-NEXT:    vmv.x.s a0, v8
728; CHECK-NEXT:    ret
729  %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
730  ret i16 %red
731}
732
733declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
734
735define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
736; CHECK-LABEL: vreduce_add_nxv1i32:
737; CHECK:       # %bb.0:
738; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
739; CHECK-NEXT:    vmv.s.x v9, zero
740; CHECK-NEXT:    vredsum.vs v8, v8, v9
741; CHECK-NEXT:    vmv.x.s a0, v8
742; CHECK-NEXT:    ret
743  %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
744  ret i32 %red
745}
746
747define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
748; CHECK-LABEL: vwreduce_add_nxv1i16:
749; CHECK:       # %bb.0:
750; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
751; CHECK-NEXT:    vmv.s.x v9, zero
752; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
753; CHECK-NEXT:    vwredsum.vs v8, v8, v9
754; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
755; CHECK-NEXT:    vmv.x.s a0, v8
756; CHECK-NEXT:    ret
757  %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
758  %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
759  ret i32 %red
760}
761
762define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
763; CHECK-LABEL: vwreduce_uadd_nxv1i16:
764; CHECK:       # %bb.0:
765; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
766; CHECK-NEXT:    vmv.s.x v9, zero
767; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
768; CHECK-NEXT:    vwredsumu.vs v8, v8, v9
769; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
770; CHECK-NEXT:    vmv.x.s a0, v8
771; CHECK-NEXT:    ret
772  %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
773  %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
774  ret i32 %red
775}
776
777declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
778
779define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
780; CHECK-LABEL: vreduce_umax_nxv1i32:
781; CHECK:       # %bb.0:
782; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
783; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
784; CHECK-NEXT:    vmv.x.s a0, v8
785; CHECK-NEXT:    ret
786  %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
787  ret i32 %red
788}
789
790declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
791
792define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
793; CHECK-LABEL: vreduce_smax_nxv1i32:
794; CHECK:       # %bb.0:
795; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
796; CHECK-NEXT:    vredmax.vs v8, v8, v8
797; CHECK-NEXT:    vmv.x.s a0, v8
798; CHECK-NEXT:    ret
799  %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
800  ret i32 %red
801}
802
803declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
804
805define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
806; CHECK-LABEL: vreduce_umin_nxv1i32:
807; CHECK:       # %bb.0:
808; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
809; CHECK-NEXT:    vredminu.vs v8, v8, v8
810; CHECK-NEXT:    vmv.x.s a0, v8
811; CHECK-NEXT:    ret
812  %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
813  ret i32 %red
814}
815
816declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
817
818define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
819; CHECK-LABEL: vreduce_smin_nxv1i32:
820; CHECK:       # %bb.0:
821; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
822; CHECK-NEXT:    vredmin.vs v8, v8, v8
823; CHECK-NEXT:    vmv.x.s a0, v8
824; CHECK-NEXT:    ret
825  %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
826  ret i32 %red
827}
828
829declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
830
831define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
832; CHECK-LABEL: vreduce_and_nxv1i32:
833; CHECK:       # %bb.0:
834; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
835; CHECK-NEXT:    vredand.vs v8, v8, v8
836; CHECK-NEXT:    vmv.x.s a0, v8
837; CHECK-NEXT:    ret
838  %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
839  ret i32 %red
840}
841
842declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
843
844define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
845; CHECK-LABEL: vreduce_or_nxv1i32:
846; CHECK:       # %bb.0:
847; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
848; CHECK-NEXT:    vredor.vs v8, v8, v8
849; CHECK-NEXT:    vmv.x.s a0, v8
850; CHECK-NEXT:    ret
851  %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
852  ret i32 %red
853}
854
855declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
856
857define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
858; CHECK-LABEL: vreduce_xor_nxv1i32:
859; CHECK:       # %bb.0:
860; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
861; CHECK-NEXT:    vmv.s.x v9, zero
862; CHECK-NEXT:    vredxor.vs v8, v8, v9
863; CHECK-NEXT:    vmv.x.s a0, v8
864; CHECK-NEXT:    ret
865  %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
866  ret i32 %red
867}
868
869declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
870
871define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
872; CHECK-LABEL: vreduce_add_nxv2i32:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
875; CHECK-NEXT:    vmv.s.x v9, zero
876; CHECK-NEXT:    vredsum.vs v8, v8, v9
877; CHECK-NEXT:    vmv.x.s a0, v8
878; CHECK-NEXT:    ret
879  %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
880  ret i32 %red
881}
882
883define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
884; CHECK-LABEL: vwreduce_add_nxv2i16:
885; CHECK:       # %bb.0:
886; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
887; CHECK-NEXT:    vmv.s.x v9, zero
888; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
889; CHECK-NEXT:    vwredsum.vs v8, v8, v9
890; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
891; CHECK-NEXT:    vmv.x.s a0, v8
892; CHECK-NEXT:    ret
893  %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
894  %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
895  ret i32 %red
896}
897
898define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
899; CHECK-LABEL: vwreduce_uadd_nxv2i16:
900; CHECK:       # %bb.0:
901; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
902; CHECK-NEXT:    vmv.s.x v9, zero
903; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
904; CHECK-NEXT:    vwredsumu.vs v8, v8, v9
905; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
906; CHECK-NEXT:    vmv.x.s a0, v8
907; CHECK-NEXT:    ret
908  %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
909  %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
910  ret i32 %red
911}
912
913declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
914
915define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
916; CHECK-LABEL: vreduce_umax_nxv2i32:
917; CHECK:       # %bb.0:
918; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
919; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
920; CHECK-NEXT:    vmv.x.s a0, v8
921; CHECK-NEXT:    ret
922  %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
923  ret i32 %red
924}
925
926declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
927
928define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
929; CHECK-LABEL: vreduce_smax_nxv2i32:
930; CHECK:       # %bb.0:
931; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
932; CHECK-NEXT:    vredmax.vs v8, v8, v8
933; CHECK-NEXT:    vmv.x.s a0, v8
934; CHECK-NEXT:    ret
935  %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
936  ret i32 %red
937}
938
939declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
940
941define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
942; CHECK-LABEL: vreduce_umin_nxv2i32:
943; CHECK:       # %bb.0:
944; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
945; CHECK-NEXT:    vredminu.vs v8, v8, v8
946; CHECK-NEXT:    vmv.x.s a0, v8
947; CHECK-NEXT:    ret
948  %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
949  ret i32 %red
950}
951
952declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
953
954define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
955; CHECK-LABEL: vreduce_smin_nxv2i32:
956; CHECK:       # %bb.0:
957; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
958; CHECK-NEXT:    vredmin.vs v8, v8, v8
959; CHECK-NEXT:    vmv.x.s a0, v8
960; CHECK-NEXT:    ret
961  %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
962  ret i32 %red
963}
964
965declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
966
967define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
968; CHECK-LABEL: vreduce_and_nxv2i32:
969; CHECK:       # %bb.0:
970; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
971; CHECK-NEXT:    vredand.vs v8, v8, v8
972; CHECK-NEXT:    vmv.x.s a0, v8
973; CHECK-NEXT:    ret
974  %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
975  ret i32 %red
976}
977
978declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
979
980define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
981; CHECK-LABEL: vreduce_or_nxv2i32:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
984; CHECK-NEXT:    vredor.vs v8, v8, v8
985; CHECK-NEXT:    vmv.x.s a0, v8
986; CHECK-NEXT:    ret
987  %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
988  ret i32 %red
989}
990
991declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
992
993define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
994; CHECK-LABEL: vreduce_xor_nxv2i32:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
997; CHECK-NEXT:    vmv.s.x v9, zero
998; CHECK-NEXT:    vredxor.vs v8, v8, v9
999; CHECK-NEXT:    vmv.x.s a0, v8
1000; CHECK-NEXT:    ret
1001  %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
1002  ret i32 %red
1003}
1004
1005declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
1006
1007define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
1008; CHECK-LABEL: vreduce_add_nxv4i32:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1011; CHECK-NEXT:    vmv.s.x v10, zero
1012; CHECK-NEXT:    vredsum.vs v8, v8, v10
1013; CHECK-NEXT:    vmv.x.s a0, v8
1014; CHECK-NEXT:    ret
1015  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
1016  ret i32 %red
1017}
1018
1019define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
1020; CHECK-LABEL: vwreduce_add_nxv4i16:
1021; CHECK:       # %bb.0:
1022; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1023; CHECK-NEXT:    vmv.s.x v9, zero
1024; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1025; CHECK-NEXT:    vwredsum.vs v8, v8, v9
1026; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1027; CHECK-NEXT:    vmv.x.s a0, v8
1028; CHECK-NEXT:    ret
1029  %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
1030  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
1031  ret i32 %red
1032}
1033
1034define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
1035; CHECK-LABEL: vwreduce_uadd_nxv4i16:
1036; CHECK:       # %bb.0:
1037; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1038; CHECK-NEXT:    vmv.s.x v9, zero
1039; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1040; CHECK-NEXT:    vwredsumu.vs v8, v8, v9
1041; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1042; CHECK-NEXT:    vmv.x.s a0, v8
1043; CHECK-NEXT:    ret
1044  %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
1045  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
1046  ret i32 %red
1047}
1048
1049declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
1050
1051define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
1052; CHECK-LABEL: vreduce_umax_nxv4i32:
1053; CHECK:       # %bb.0:
1054; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1055; CHECK-NEXT:    vredmaxu.vs v8, v8, v8
1056; CHECK-NEXT:    vmv.x.s a0, v8
1057; CHECK-NEXT:    ret
1058  %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
1059  ret i32 %red
1060}
1061
1062declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
1063
1064define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
1065; CHECK-LABEL: vreduce_smax_nxv4i32:
1066; CHECK:       # %bb.0:
1067; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1068; CHECK-NEXT:    vredmax.vs v8, v8, v8
1069; CHECK-NEXT:    vmv.x.s a0, v8
1070; CHECK-NEXT:    ret
1071  %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
1072  ret i32 %red
1073}
1074
1075declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
1076
1077define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
1078; CHECK-LABEL: vreduce_umin_nxv4i32:
1079; CHECK:       # %bb.0:
1080; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1081; CHECK-NEXT:    vredminu.vs v8, v8, v8
1082; CHECK-NEXT:    vmv.x.s a0, v8
1083; CHECK-NEXT:    ret
1084  %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
1085  ret i32 %red
1086}
1087
1088declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
1089
1090define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
1091; CHECK-LABEL: vreduce_smin_nxv4i32:
1092; CHECK:       # %bb.0:
1093; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1094; CHECK-NEXT:    vredmin.vs v8, v8, v8
1095; CHECK-NEXT:    vmv.x.s a0, v8
1096; CHECK-NEXT:    ret
1097  %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
1098  ret i32 %red
1099}
1100
1101declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
1102
1103define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
1104; CHECK-LABEL: vreduce_and_nxv4i32:
1105; CHECK:       # %bb.0:
1106; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1107; CHECK-NEXT:    vredand.vs v8, v8, v8
1108; CHECK-NEXT:    vmv.x.s a0, v8
1109; CHECK-NEXT:    ret
1110  %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
1111  ret i32 %red
1112}
1113
1114declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
1115
1116define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
1117; CHECK-LABEL: vreduce_or_nxv4i32:
1118; CHECK:       # %bb.0:
1119; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1120; CHECK-NEXT:    vredor.vs v8, v8, v8
1121; CHECK-NEXT:    vmv.x.s a0, v8
1122; CHECK-NEXT:    ret
1123  %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
1124  ret i32 %red
1125}
1126
1127declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
1128
1129define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
1130; CHECK-LABEL: vreduce_xor_nxv4i32:
1131; CHECK:       # %bb.0:
1132; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1133; CHECK-NEXT:    vmv.s.x v10, zero
1134; CHECK-NEXT:    vredxor.vs v8, v8, v10
1135; CHECK-NEXT:    vmv.x.s a0, v8
1136; CHECK-NEXT:    ret
1137  %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
1138  ret i32 %red
1139}
1140
1141declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
1142
1143define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
1144; RV32-LABEL: vreduce_add_nxv1i64:
1145; RV32:       # %bb.0:
1146; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1147; RV32-NEXT:    vmv.s.x v9, zero
1148; RV32-NEXT:    li a1, 32
1149; RV32-NEXT:    vredsum.vs v8, v8, v9
1150; RV32-NEXT:    vmv.x.s a0, v8
1151; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1152; RV32-NEXT:    vsrl.vx v8, v8, a1
1153; RV32-NEXT:    vmv.x.s a1, v8
1154; RV32-NEXT:    ret
1155;
1156; RV64-LABEL: vreduce_add_nxv1i64:
1157; RV64:       # %bb.0:
1158; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1159; RV64-NEXT:    vmv.s.x v9, zero
1160; RV64-NEXT:    vredsum.vs v8, v8, v9
1161; RV64-NEXT:    vmv.x.s a0, v8
1162; RV64-NEXT:    ret
1163  %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
1164  ret i64 %red
1165}
1166
1167define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
1168; RV32-LABEL: vwreduce_add_nxv1i32:
1169; RV32:       # %bb.0:
1170; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1171; RV32-NEXT:    vmv.s.x v9, zero
1172; RV32-NEXT:    li a1, 32
1173; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1174; RV32-NEXT:    vwredsum.vs v8, v8, v9
1175; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1176; RV32-NEXT:    vmv.x.s a0, v8
1177; RV32-NEXT:    vsrl.vx v8, v8, a1
1178; RV32-NEXT:    vmv.x.s a1, v8
1179; RV32-NEXT:    ret
1180;
1181; RV64-LABEL: vwreduce_add_nxv1i32:
1182; RV64:       # %bb.0:
1183; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1184; RV64-NEXT:    vmv.s.x v9, zero
1185; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1186; RV64-NEXT:    vwredsum.vs v8, v8, v9
1187; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1188; RV64-NEXT:    vmv.x.s a0, v8
1189; RV64-NEXT:    ret
1190  %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1191  %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
1192  ret i64 %red
1193}
1194
1195define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
1196; RV32-LABEL: vwreduce_uadd_nxv1i32:
1197; RV32:       # %bb.0:
1198; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1199; RV32-NEXT:    vmv.s.x v9, zero
1200; RV32-NEXT:    li a1, 32
1201; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1202; RV32-NEXT:    vwredsumu.vs v8, v8, v9
1203; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1204; RV32-NEXT:    vmv.x.s a0, v8
1205; RV32-NEXT:    vsrl.vx v8, v8, a1
1206; RV32-NEXT:    vmv.x.s a1, v8
1207; RV32-NEXT:    ret
1208;
1209; RV64-LABEL: vwreduce_uadd_nxv1i32:
1210; RV64:       # %bb.0:
1211; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1212; RV64-NEXT:    vmv.s.x v9, zero
1213; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1214; RV64-NEXT:    vwredsumu.vs v8, v8, v9
1215; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1216; RV64-NEXT:    vmv.x.s a0, v8
1217; RV64-NEXT:    ret
1218  %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1219  %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
1220  ret i64 %red
1221}
1222
1223declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
1224
1225define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
1226; RV32-LABEL: vreduce_umax_nxv1i64:
1227; RV32:       # %bb.0:
1228; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1229; RV32-NEXT:    vredmaxu.vs v8, v8, v8
1230; RV32-NEXT:    li a0, 32
1231; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1232; RV32-NEXT:    vsrl.vx v9, v8, a0
1233; RV32-NEXT:    vmv.x.s a1, v9
1234; RV32-NEXT:    vmv.x.s a0, v8
1235; RV32-NEXT:    ret
1236;
1237; RV64-LABEL: vreduce_umax_nxv1i64:
1238; RV64:       # %bb.0:
1239; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1240; RV64-NEXT:    vredmaxu.vs v8, v8, v8
1241; RV64-NEXT:    vmv.x.s a0, v8
1242; RV64-NEXT:    ret
1243  %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
1244  ret i64 %red
1245}
1246
1247declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
1248
1249define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
1250; RV32-LABEL: vreduce_smax_nxv1i64:
1251; RV32:       # %bb.0:
1252; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1253; RV32-NEXT:    vredmax.vs v8, v8, v8
1254; RV32-NEXT:    li a0, 32
1255; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1256; RV32-NEXT:    vsrl.vx v9, v8, a0
1257; RV32-NEXT:    vmv.x.s a1, v9
1258; RV32-NEXT:    vmv.x.s a0, v8
1259; RV32-NEXT:    ret
1260;
1261; RV64-LABEL: vreduce_smax_nxv1i64:
1262; RV64:       # %bb.0:
1263; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1264; RV64-NEXT:    vredmax.vs v8, v8, v8
1265; RV64-NEXT:    vmv.x.s a0, v8
1266; RV64-NEXT:    ret
1267  %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
1268  ret i64 %red
1269}
1270
1271declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
1272
1273define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
1274; RV32-LABEL: vreduce_umin_nxv1i64:
1275; RV32:       # %bb.0:
1276; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1277; RV32-NEXT:    vredminu.vs v8, v8, v8
1278; RV32-NEXT:    li a0, 32
1279; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1280; RV32-NEXT:    vsrl.vx v9, v8, a0
1281; RV32-NEXT:    vmv.x.s a1, v9
1282; RV32-NEXT:    vmv.x.s a0, v8
1283; RV32-NEXT:    ret
1284;
1285; RV64-LABEL: vreduce_umin_nxv1i64:
1286; RV64:       # %bb.0:
1287; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1288; RV64-NEXT:    vredminu.vs v8, v8, v8
1289; RV64-NEXT:    vmv.x.s a0, v8
1290; RV64-NEXT:    ret
1291  %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
1292  ret i64 %red
1293}
1294
1295declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
1296
1297define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
1298; RV32-LABEL: vreduce_smin_nxv1i64:
1299; RV32:       # %bb.0:
1300; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1301; RV32-NEXT:    vredmin.vs v8, v8, v8
1302; RV32-NEXT:    li a0, 32
1303; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1304; RV32-NEXT:    vsrl.vx v9, v8, a0
1305; RV32-NEXT:    vmv.x.s a1, v9
1306; RV32-NEXT:    vmv.x.s a0, v8
1307; RV32-NEXT:    ret
1308;
1309; RV64-LABEL: vreduce_smin_nxv1i64:
1310; RV64:       # %bb.0:
1311; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1312; RV64-NEXT:    vredmin.vs v8, v8, v8
1313; RV64-NEXT:    vmv.x.s a0, v8
1314; RV64-NEXT:    ret
1315  %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
1316  ret i64 %red
1317}
1318
1319declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
1320
1321define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
1322; RV32-LABEL: vreduce_and_nxv1i64:
1323; RV32:       # %bb.0:
1324; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1325; RV32-NEXT:    vredand.vs v8, v8, v8
1326; RV32-NEXT:    li a0, 32
1327; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1328; RV32-NEXT:    vsrl.vx v9, v8, a0
1329; RV32-NEXT:    vmv.x.s a1, v9
1330; RV32-NEXT:    vmv.x.s a0, v8
1331; RV32-NEXT:    ret
1332;
1333; RV64-LABEL: vreduce_and_nxv1i64:
1334; RV64:       # %bb.0:
1335; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1336; RV64-NEXT:    vredand.vs v8, v8, v8
1337; RV64-NEXT:    vmv.x.s a0, v8
1338; RV64-NEXT:    ret
1339  %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
1340  ret i64 %red
1341}
1342
1343declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
1344
1345define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
1346; RV32-LABEL: vreduce_or_nxv1i64:
1347; RV32:       # %bb.0:
1348; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1349; RV32-NEXT:    vredor.vs v8, v8, v8
1350; RV32-NEXT:    li a0, 32
1351; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1352; RV32-NEXT:    vsrl.vx v9, v8, a0
1353; RV32-NEXT:    vmv.x.s a1, v9
1354; RV32-NEXT:    vmv.x.s a0, v8
1355; RV32-NEXT:    ret
1356;
1357; RV64-LABEL: vreduce_or_nxv1i64:
1358; RV64:       # %bb.0:
1359; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1360; RV64-NEXT:    vredor.vs v8, v8, v8
1361; RV64-NEXT:    vmv.x.s a0, v8
1362; RV64-NEXT:    ret
1363  %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
1364  ret i64 %red
1365}
1366
1367declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
1368
1369define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
1370; RV32-LABEL: vreduce_xor_nxv1i64:
1371; RV32:       # %bb.0:
1372; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1373; RV32-NEXT:    vmv.s.x v9, zero
1374; RV32-NEXT:    li a1, 32
1375; RV32-NEXT:    vredxor.vs v8, v8, v9
1376; RV32-NEXT:    vmv.x.s a0, v8
1377; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1378; RV32-NEXT:    vsrl.vx v8, v8, a1
1379; RV32-NEXT:    vmv.x.s a1, v8
1380; RV32-NEXT:    ret
1381;
1382; RV64-LABEL: vreduce_xor_nxv1i64:
1383; RV64:       # %bb.0:
1384; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1385; RV64-NEXT:    vmv.s.x v9, zero
1386; RV64-NEXT:    vredxor.vs v8, v8, v9
1387; RV64-NEXT:    vmv.x.s a0, v8
1388; RV64-NEXT:    ret
1389  %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
1390  ret i64 %red
1391}
1392
1393declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
1394
1395define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
1396; RV32-LABEL: vreduce_add_nxv2i64:
1397; RV32:       # %bb.0:
1398; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1399; RV32-NEXT:    vmv.s.x v10, zero
1400; RV32-NEXT:    li a1, 32
1401; RV32-NEXT:    vredsum.vs v8, v8, v10
1402; RV32-NEXT:    vmv.x.s a0, v8
1403; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1404; RV32-NEXT:    vsrl.vx v8, v8, a1
1405; RV32-NEXT:    vmv.x.s a1, v8
1406; RV32-NEXT:    ret
1407;
1408; RV64-LABEL: vreduce_add_nxv2i64:
1409; RV64:       # %bb.0:
1410; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1411; RV64-NEXT:    vmv.s.x v10, zero
1412; RV64-NEXT:    vredsum.vs v8, v8, v10
1413; RV64-NEXT:    vmv.x.s a0, v8
1414; RV64-NEXT:    ret
1415  %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
1416  ret i64 %red
1417}
1418
1419define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
1420; RV32-LABEL: vwreduce_add_nxv2i32:
1421; RV32:       # %bb.0:
1422; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1423; RV32-NEXT:    vmv.s.x v9, zero
1424; RV32-NEXT:    li a1, 32
1425; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1426; RV32-NEXT:    vwredsum.vs v8, v8, v9
1427; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1428; RV32-NEXT:    vmv.x.s a0, v8
1429; RV32-NEXT:    vsrl.vx v8, v8, a1
1430; RV32-NEXT:    vmv.x.s a1, v8
1431; RV32-NEXT:    ret
1432;
1433; RV64-LABEL: vwreduce_add_nxv2i32:
1434; RV64:       # %bb.0:
1435; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1436; RV64-NEXT:    vmv.s.x v9, zero
1437; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1438; RV64-NEXT:    vwredsum.vs v8, v8, v9
1439; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
1440; RV64-NEXT:    vmv.x.s a0, v8
1441; RV64-NEXT:    ret
1442  %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1443  %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
1444  ret i64 %red
1445}
1446
1447define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
1448; RV32-LABEL: vwreduce_uadd_nxv2i32:
1449; RV32:       # %bb.0:
1450; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1451; RV32-NEXT:    vmv.s.x v9, zero
1452; RV32-NEXT:    li a1, 32
1453; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1454; RV32-NEXT:    vwredsumu.vs v8, v8, v9
1455; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1456; RV32-NEXT:    vmv.x.s a0, v8
1457; RV32-NEXT:    vsrl.vx v8, v8, a1
1458; RV32-NEXT:    vmv.x.s a1, v8
1459; RV32-NEXT:    ret
1460;
1461; RV64-LABEL: vwreduce_uadd_nxv2i32:
1462; RV64:       # %bb.0:
1463; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1464; RV64-NEXT:    vmv.s.x v9, zero
1465; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1466; RV64-NEXT:    vwredsumu.vs v8, v8, v9
1467; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
1468; RV64-NEXT:    vmv.x.s a0, v8
1469; RV64-NEXT:    ret
1470  %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1471  %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
1472  ret i64 %red
1473}
1474
1475declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
1476
1477define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
1478; RV32-LABEL: vreduce_umax_nxv2i64:
1479; RV32:       # %bb.0:
1480; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1481; RV32-NEXT:    vredmaxu.vs v8, v8, v8
1482; RV32-NEXT:    li a1, 32
1483; RV32-NEXT:    vmv.x.s a0, v8
1484; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1485; RV32-NEXT:    vsrl.vx v8, v8, a1
1486; RV32-NEXT:    vmv.x.s a1, v8
1487; RV32-NEXT:    ret
1488;
1489; RV64-LABEL: vreduce_umax_nxv2i64:
1490; RV64:       # %bb.0:
1491; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1492; RV64-NEXT:    vredmaxu.vs v8, v8, v8
1493; RV64-NEXT:    vmv.x.s a0, v8
1494; RV64-NEXT:    ret
1495  %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
1496  ret i64 %red
1497}
1498
1499declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
1500
1501define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
1502; RV32-LABEL: vreduce_smax_nxv2i64:
1503; RV32:       # %bb.0:
1504; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1505; RV32-NEXT:    vredmax.vs v8, v8, v8
1506; RV32-NEXT:    li a1, 32
1507; RV32-NEXT:    vmv.x.s a0, v8
1508; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1509; RV32-NEXT:    vsrl.vx v8, v8, a1
1510; RV32-NEXT:    vmv.x.s a1, v8
1511; RV32-NEXT:    ret
1512;
1513; RV64-LABEL: vreduce_smax_nxv2i64:
1514; RV64:       # %bb.0:
1515; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1516; RV64-NEXT:    vredmax.vs v8, v8, v8
1517; RV64-NEXT:    vmv.x.s a0, v8
1518; RV64-NEXT:    ret
1519  %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
1520  ret i64 %red
1521}
1522
1523declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
1524
1525define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
1526; RV32-LABEL: vreduce_umin_nxv2i64:
1527; RV32:       # %bb.0:
1528; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1529; RV32-NEXT:    vredminu.vs v8, v8, v8
1530; RV32-NEXT:    li a1, 32
1531; RV32-NEXT:    vmv.x.s a0, v8
1532; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1533; RV32-NEXT:    vsrl.vx v8, v8, a1
1534; RV32-NEXT:    vmv.x.s a1, v8
1535; RV32-NEXT:    ret
1536;
1537; RV64-LABEL: vreduce_umin_nxv2i64:
1538; RV64:       # %bb.0:
1539; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1540; RV64-NEXT:    vredminu.vs v8, v8, v8
1541; RV64-NEXT:    vmv.x.s a0, v8
1542; RV64-NEXT:    ret
1543  %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
1544  ret i64 %red
1545}
1546
1547declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
1548
1549define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
1550; RV32-LABEL: vreduce_smin_nxv2i64:
1551; RV32:       # %bb.0:
1552; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1553; RV32-NEXT:    vredmin.vs v8, v8, v8
1554; RV32-NEXT:    li a1, 32
1555; RV32-NEXT:    vmv.x.s a0, v8
1556; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1557; RV32-NEXT:    vsrl.vx v8, v8, a1
1558; RV32-NEXT:    vmv.x.s a1, v8
1559; RV32-NEXT:    ret
1560;
1561; RV64-LABEL: vreduce_smin_nxv2i64:
1562; RV64:       # %bb.0:
1563; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1564; RV64-NEXT:    vredmin.vs v8, v8, v8
1565; RV64-NEXT:    vmv.x.s a0, v8
1566; RV64-NEXT:    ret
1567  %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
1568  ret i64 %red
1569}
1570
1571declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
1572
1573define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
1574; RV32-LABEL: vreduce_and_nxv2i64:
1575; RV32:       # %bb.0:
1576; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1577; RV32-NEXT:    vredand.vs v8, v8, v8
1578; RV32-NEXT:    li a1, 32
1579; RV32-NEXT:    vmv.x.s a0, v8
1580; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1581; RV32-NEXT:    vsrl.vx v8, v8, a1
1582; RV32-NEXT:    vmv.x.s a1, v8
1583; RV32-NEXT:    ret
1584;
1585; RV64-LABEL: vreduce_and_nxv2i64:
1586; RV64:       # %bb.0:
1587; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1588; RV64-NEXT:    vredand.vs v8, v8, v8
1589; RV64-NEXT:    vmv.x.s a0, v8
1590; RV64-NEXT:    ret
1591  %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
1592  ret i64 %red
1593}
1594
1595declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
1596
1597define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
1598; RV32-LABEL: vreduce_or_nxv2i64:
1599; RV32:       # %bb.0:
1600; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1601; RV32-NEXT:    vredor.vs v8, v8, v8
1602; RV32-NEXT:    li a1, 32
1603; RV32-NEXT:    vmv.x.s a0, v8
1604; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1605; RV32-NEXT:    vsrl.vx v8, v8, a1
1606; RV32-NEXT:    vmv.x.s a1, v8
1607; RV32-NEXT:    ret
1608;
1609; RV64-LABEL: vreduce_or_nxv2i64:
1610; RV64:       # %bb.0:
1611; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1612; RV64-NEXT:    vredor.vs v8, v8, v8
1613; RV64-NEXT:    vmv.x.s a0, v8
1614; RV64-NEXT:    ret
1615  %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
1616  ret i64 %red
1617}
1618
1619declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
1620
1621define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
1622; RV32-LABEL: vreduce_xor_nxv2i64:
1623; RV32:       # %bb.0:
1624; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1625; RV32-NEXT:    vmv.s.x v10, zero
1626; RV32-NEXT:    li a1, 32
1627; RV32-NEXT:    vredxor.vs v8, v8, v10
1628; RV32-NEXT:    vmv.x.s a0, v8
1629; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1630; RV32-NEXT:    vsrl.vx v8, v8, a1
1631; RV32-NEXT:    vmv.x.s a1, v8
1632; RV32-NEXT:    ret
1633;
1634; RV64-LABEL: vreduce_xor_nxv2i64:
1635; RV64:       # %bb.0:
1636; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1637; RV64-NEXT:    vmv.s.x v10, zero
1638; RV64-NEXT:    vredxor.vs v8, v8, v10
1639; RV64-NEXT:    vmv.x.s a0, v8
1640; RV64-NEXT:    ret
1641  %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
1642  ret i64 %red
1643}
1644
1645declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
1646
1647define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
1648; RV32-LABEL: vreduce_add_nxv4i64:
1649; RV32:       # %bb.0:
1650; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1651; RV32-NEXT:    vmv.s.x v12, zero
1652; RV32-NEXT:    li a1, 32
1653; RV32-NEXT:    vredsum.vs v8, v8, v12
1654; RV32-NEXT:    vmv.x.s a0, v8
1655; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1656; RV32-NEXT:    vsrl.vx v8, v8, a1
1657; RV32-NEXT:    vmv.x.s a1, v8
1658; RV32-NEXT:    ret
1659;
1660; RV64-LABEL: vreduce_add_nxv4i64:
1661; RV64:       # %bb.0:
1662; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1663; RV64-NEXT:    vmv.s.x v12, zero
1664; RV64-NEXT:    vredsum.vs v8, v8, v12
1665; RV64-NEXT:    vmv.x.s a0, v8
1666; RV64-NEXT:    ret
1667  %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
1668  ret i64 %red
1669}
1670
1671define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
1672; RV32-LABEL: vwreduce_add_nxv4i32:
1673; RV32:       # %bb.0:
1674; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1675; RV32-NEXT:    vmv.s.x v10, zero
1676; RV32-NEXT:    li a1, 32
1677; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1678; RV32-NEXT:    vwredsum.vs v8, v8, v10
1679; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1680; RV32-NEXT:    vmv.x.s a0, v8
1681; RV32-NEXT:    vsrl.vx v8, v8, a1
1682; RV32-NEXT:    vmv.x.s a1, v8
1683; RV32-NEXT:    ret
1684;
1685; RV64-LABEL: vwreduce_add_nxv4i32:
1686; RV64:       # %bb.0:
1687; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1688; RV64-NEXT:    vmv.s.x v10, zero
1689; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1690; RV64-NEXT:    vwredsum.vs v8, v8, v10
1691; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1692; RV64-NEXT:    vmv.x.s a0, v8
1693; RV64-NEXT:    ret
1694  %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
1695  %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
1696  ret i64 %red
1697}
1698
1699define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
1700; RV32-LABEL: vwreduce_uadd_nxv4i32:
1701; RV32:       # %bb.0:
1702; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1703; RV32-NEXT:    vmv.s.x v10, zero
1704; RV32-NEXT:    li a1, 32
1705; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1706; RV32-NEXT:    vwredsumu.vs v8, v8, v10
1707; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1708; RV32-NEXT:    vmv.x.s a0, v8
1709; RV32-NEXT:    vsrl.vx v8, v8, a1
1710; RV32-NEXT:    vmv.x.s a1, v8
1711; RV32-NEXT:    ret
1712;
1713; RV64-LABEL: vwreduce_uadd_nxv4i32:
1714; RV64:       # %bb.0:
1715; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1716; RV64-NEXT:    vmv.s.x v10, zero
1717; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1718; RV64-NEXT:    vwredsumu.vs v8, v8, v10
1719; RV64-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1720; RV64-NEXT:    vmv.x.s a0, v8
1721; RV64-NEXT:    ret
1722  %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
1723  %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
1724  ret i64 %red
1725}
1726
1727declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
1728
1729define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
1730; RV32-LABEL: vreduce_umax_nxv4i64:
1731; RV32:       # %bb.0:
1732; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1733; RV32-NEXT:    vredmaxu.vs v8, v8, v8
1734; RV32-NEXT:    li a1, 32
1735; RV32-NEXT:    vmv.x.s a0, v8
1736; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1737; RV32-NEXT:    vsrl.vx v8, v8, a1
1738; RV32-NEXT:    vmv.x.s a1, v8
1739; RV32-NEXT:    ret
1740;
1741; RV64-LABEL: vreduce_umax_nxv4i64:
1742; RV64:       # %bb.0:
1743; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1744; RV64-NEXT:    vredmaxu.vs v8, v8, v8
1745; RV64-NEXT:    vmv.x.s a0, v8
1746; RV64-NEXT:    ret
1747  %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
1748  ret i64 %red
1749}
1750
1751declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
1752
1753define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
1754; RV32-LABEL: vreduce_smax_nxv4i64:
1755; RV32:       # %bb.0:
1756; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1757; RV32-NEXT:    vredmax.vs v8, v8, v8
1758; RV32-NEXT:    li a1, 32
1759; RV32-NEXT:    vmv.x.s a0, v8
1760; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1761; RV32-NEXT:    vsrl.vx v8, v8, a1
1762; RV32-NEXT:    vmv.x.s a1, v8
1763; RV32-NEXT:    ret
1764;
1765; RV64-LABEL: vreduce_smax_nxv4i64:
1766; RV64:       # %bb.0:
1767; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1768; RV64-NEXT:    vredmax.vs v8, v8, v8
1769; RV64-NEXT:    vmv.x.s a0, v8
1770; RV64-NEXT:    ret
1771  %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
1772  ret i64 %red
1773}
1774
1775declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
1776
1777define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
1778; RV32-LABEL: vreduce_umin_nxv4i64:
1779; RV32:       # %bb.0:
1780; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1781; RV32-NEXT:    vredminu.vs v8, v8, v8
1782; RV32-NEXT:    li a1, 32
1783; RV32-NEXT:    vmv.x.s a0, v8
1784; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1785; RV32-NEXT:    vsrl.vx v8, v8, a1
1786; RV32-NEXT:    vmv.x.s a1, v8
1787; RV32-NEXT:    ret
1788;
1789; RV64-LABEL: vreduce_umin_nxv4i64:
1790; RV64:       # %bb.0:
1791; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1792; RV64-NEXT:    vredminu.vs v8, v8, v8
1793; RV64-NEXT:    vmv.x.s a0, v8
1794; RV64-NEXT:    ret
1795  %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
1796  ret i64 %red
1797}
1798
1799declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
1800
1801define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
1802; RV32-LABEL: vreduce_smin_nxv4i64:
1803; RV32:       # %bb.0:
1804; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1805; RV32-NEXT:    vredmin.vs v8, v8, v8
1806; RV32-NEXT:    li a1, 32
1807; RV32-NEXT:    vmv.x.s a0, v8
1808; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1809; RV32-NEXT:    vsrl.vx v8, v8, a1
1810; RV32-NEXT:    vmv.x.s a1, v8
1811; RV32-NEXT:    ret
1812;
1813; RV64-LABEL: vreduce_smin_nxv4i64:
1814; RV64:       # %bb.0:
1815; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1816; RV64-NEXT:    vredmin.vs v8, v8, v8
1817; RV64-NEXT:    vmv.x.s a0, v8
1818; RV64-NEXT:    ret
1819  %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
1820  ret i64 %red
1821}
1822
1823declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
1824
1825define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
1826; RV32-LABEL: vreduce_and_nxv4i64:
1827; RV32:       # %bb.0:
1828; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1829; RV32-NEXT:    vredand.vs v8, v8, v8
1830; RV32-NEXT:    li a1, 32
1831; RV32-NEXT:    vmv.x.s a0, v8
1832; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1833; RV32-NEXT:    vsrl.vx v8, v8, a1
1834; RV32-NEXT:    vmv.x.s a1, v8
1835; RV32-NEXT:    ret
1836;
1837; RV64-LABEL: vreduce_and_nxv4i64:
1838; RV64:       # %bb.0:
1839; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1840; RV64-NEXT:    vredand.vs v8, v8, v8
1841; RV64-NEXT:    vmv.x.s a0, v8
1842; RV64-NEXT:    ret
1843  %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
1844  ret i64 %red
1845}
1846
1847declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
1848
1849define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
1850; RV32-LABEL: vreduce_or_nxv4i64:
1851; RV32:       # %bb.0:
1852; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1853; RV32-NEXT:    vredor.vs v8, v8, v8
1854; RV32-NEXT:    li a1, 32
1855; RV32-NEXT:    vmv.x.s a0, v8
1856; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1857; RV32-NEXT:    vsrl.vx v8, v8, a1
1858; RV32-NEXT:    vmv.x.s a1, v8
1859; RV32-NEXT:    ret
1860;
1861; RV64-LABEL: vreduce_or_nxv4i64:
1862; RV64:       # %bb.0:
1863; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1864; RV64-NEXT:    vredor.vs v8, v8, v8
1865; RV64-NEXT:    vmv.x.s a0, v8
1866; RV64-NEXT:    ret
1867  %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
1868  ret i64 %red
1869}
1870
1871declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
1872
1873define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
1874; RV32-LABEL: vreduce_xor_nxv4i64:
1875; RV32:       # %bb.0:
1876; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1877; RV32-NEXT:    vmv.s.x v12, zero
1878; RV32-NEXT:    li a1, 32
1879; RV32-NEXT:    vredxor.vs v8, v8, v12
1880; RV32-NEXT:    vmv.x.s a0, v8
1881; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1882; RV32-NEXT:    vsrl.vx v8, v8, a1
1883; RV32-NEXT:    vmv.x.s a1, v8
1884; RV32-NEXT:    ret
1885;
1886; RV64-LABEL: vreduce_xor_nxv4i64:
1887; RV64:       # %bb.0:
1888; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1889; RV64-NEXT:    vmv.s.x v12, zero
1890; RV64-NEXT:    vredxor.vs v8, v8, v12
1891; RV64-NEXT:    vmv.x.s a0, v8
1892; RV64-NEXT:    ret
1893  %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
1894  ret i64 %red
1895}
1896