xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (revision d90a42751f9bfa73ed3555c702e70cf34d97bb39)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d  -verify-machineinstrs < %s | FileCheck %s
4
5declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
6
7define half @vreduce_fadd_v1f16(<1 x half> %v, half %s) {
8; CHECK-LABEL: vreduce_fadd_v1f16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
11; CHECK-NEXT:    vfmv.f.s fa5, v8
12; CHECK-NEXT:    fadd.h fa0, fa0, fa5
13; CHECK-NEXT:    ret
14  %red = call reassoc half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v)
15  ret half %red
16}
17
18define half @vreduce_ord_fadd_v1f16(<1 x half> %v, half %s) {
19; CHECK-LABEL: vreduce_ord_fadd_v1f16:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
22; CHECK-NEXT:    vfmv.s.f v9, fa0
23; CHECK-NEXT:    vfredosum.vs v8, v8, v9
24; CHECK-NEXT:    vfmv.f.s fa0, v8
25; CHECK-NEXT:    ret
26  %red = call half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v)
27  ret half %red
28}
29
30declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
31
32define half @vreduce_fadd_v2f16(ptr %x, half %s) {
33; CHECK-LABEL: vreduce_fadd_v2f16:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
36; CHECK-NEXT:    vle16.v v8, (a0)
37; CHECK-NEXT:    vfmv.s.f v9, fa0
38; CHECK-NEXT:    vfredusum.vs v8, v8, v9
39; CHECK-NEXT:    vfmv.f.s fa0, v8
40; CHECK-NEXT:    ret
41  %v = load <2 x half>, ptr %x
42  %red = call reassoc half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v)
43  ret half %red
44}
45
46define half @vreduce_ord_fadd_v2f16(ptr %x, half %s) {
47; CHECK-LABEL: vreduce_ord_fadd_v2f16:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
50; CHECK-NEXT:    vle16.v v8, (a0)
51; CHECK-NEXT:    vfmv.s.f v9, fa0
52; CHECK-NEXT:    vfredosum.vs v8, v8, v9
53; CHECK-NEXT:    vfmv.f.s fa0, v8
54; CHECK-NEXT:    ret
55  %v = load <2 x half>, ptr %x
56  %red = call half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v)
57  ret half %red
58}
59
60declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
61
62define half @vreduce_fadd_v4f16(ptr %x, half %s) {
63; CHECK-LABEL: vreduce_fadd_v4f16:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
66; CHECK-NEXT:    vle16.v v8, (a0)
67; CHECK-NEXT:    vfmv.s.f v9, fa0
68; CHECK-NEXT:    vfredusum.vs v8, v8, v9
69; CHECK-NEXT:    vfmv.f.s fa0, v8
70; CHECK-NEXT:    ret
71  %v = load <4 x half>, ptr %x
72  %red = call reassoc half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v)
73  ret half %red
74}
75
76define half @vreduce_ord_fadd_v4f16(ptr %x, half %s) {
77; CHECK-LABEL: vreduce_ord_fadd_v4f16:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
80; CHECK-NEXT:    vle16.v v8, (a0)
81; CHECK-NEXT:    vfmv.s.f v9, fa0
82; CHECK-NEXT:    vfredosum.vs v8, v8, v9
83; CHECK-NEXT:    vfmv.f.s fa0, v8
84; CHECK-NEXT:    ret
85  %v = load <4 x half>, ptr %x
86  %red = call half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v)
87  ret half %red
88}
89
90declare half @llvm.vector.reduce.fadd.v7f16(half, <7 x half>)
91
92define half @vreduce_fadd_v7f16(ptr %x, half %s) {
93; CHECK-LABEL: vreduce_fadd_v7f16:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vsetivli zero, 7, e16, m1, ta, ma
96; CHECK-NEXT:    vle16.v v8, (a0)
97; CHECK-NEXT:    vfmv.s.f v9, fa0
98; CHECK-NEXT:    vfredusum.vs v8, v8, v9
99; CHECK-NEXT:    vfmv.f.s fa0, v8
100; CHECK-NEXT:    ret
101  %v = load <7 x half>, ptr %x
102  %red = call reassoc half @llvm.vector.reduce.fadd.v7f16(half %s, <7 x half> %v)
103  ret half %red
104}
105
106declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
107
108define half @vreduce_fadd_v8f16(ptr %x, half %s) {
109; CHECK-LABEL: vreduce_fadd_v8f16:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
112; CHECK-NEXT:    vle16.v v8, (a0)
113; CHECK-NEXT:    vfmv.s.f v9, fa0
114; CHECK-NEXT:    vfredusum.vs v8, v8, v9
115; CHECK-NEXT:    vfmv.f.s fa0, v8
116; CHECK-NEXT:    ret
117  %v = load <8 x half>, ptr %x
118  %red = call reassoc half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v)
119  ret half %red
120}
121
122define half @vreduce_ord_fadd_v8f16(ptr %x, half %s) {
123; CHECK-LABEL: vreduce_ord_fadd_v8f16:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
126; CHECK-NEXT:    vle16.v v8, (a0)
127; CHECK-NEXT:    vfmv.s.f v9, fa0
128; CHECK-NEXT:    vfredosum.vs v8, v8, v9
129; CHECK-NEXT:    vfmv.f.s fa0, v8
130; CHECK-NEXT:    ret
131  %v = load <8 x half>, ptr %x
132  %red = call half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v)
133  ret half %red
134}
135
136declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
137
138define half @vreduce_fadd_v16f16(ptr %x, half %s) {
139; CHECK-LABEL: vreduce_fadd_v16f16:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
142; CHECK-NEXT:    vle16.v v8, (a0)
143; CHECK-NEXT:    vfmv.s.f v10, fa0
144; CHECK-NEXT:    vfredusum.vs v8, v8, v10
145; CHECK-NEXT:    vfmv.f.s fa0, v8
146; CHECK-NEXT:    ret
147  %v = load <16 x half>, ptr %x
148  %red = call reassoc half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v)
149  ret half %red
150}
151
152define half @vreduce_ord_fadd_v16f16(ptr %x, half %s) {
153; CHECK-LABEL: vreduce_ord_fadd_v16f16:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
156; CHECK-NEXT:    vle16.v v8, (a0)
157; CHECK-NEXT:    vfmv.s.f v10, fa0
158; CHECK-NEXT:    vfredosum.vs v8, v8, v10
159; CHECK-NEXT:    vfmv.f.s fa0, v8
160; CHECK-NEXT:    ret
161  %v = load <16 x half>, ptr %x
162  %red = call half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v)
163  ret half %red
164}
165
166declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
167
168define half @vreduce_fadd_v32f16(ptr %x, half %s) {
169; CHECK-LABEL: vreduce_fadd_v32f16:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    li a1, 32
172; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
173; CHECK-NEXT:    vle16.v v8, (a0)
174; CHECK-NEXT:    vfmv.s.f v12, fa0
175; CHECK-NEXT:    vfredusum.vs v8, v8, v12
176; CHECK-NEXT:    vfmv.f.s fa0, v8
177; CHECK-NEXT:    ret
178  %v = load <32 x half>, ptr %x
179  %red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v)
180  ret half %red
181}
182
183define half @vreduce_ord_fadd_v32f16(ptr %x, half %s) {
184; CHECK-LABEL: vreduce_ord_fadd_v32f16:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    li a1, 32
187; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
188; CHECK-NEXT:    vle16.v v8, (a0)
189; CHECK-NEXT:    vfmv.s.f v12, fa0
190; CHECK-NEXT:    vfredosum.vs v8, v8, v12
191; CHECK-NEXT:    vfmv.f.s fa0, v8
192; CHECK-NEXT:    ret
193  %v = load <32 x half>, ptr %x
194  %red = call half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v)
195  ret half %red
196}
197
198declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
199
200define half @vreduce_fadd_v64f16(ptr %x, half %s) {
201; CHECK-LABEL: vreduce_fadd_v64f16:
202; CHECK:       # %bb.0:
203; CHECK-NEXT:    li a1, 64
204; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
205; CHECK-NEXT:    vle16.v v8, (a0)
206; CHECK-NEXT:    vfmv.s.f v16, fa0
207; CHECK-NEXT:    vfredusum.vs v8, v8, v16
208; CHECK-NEXT:    vfmv.f.s fa0, v8
209; CHECK-NEXT:    ret
210  %v = load <64 x half>, ptr %x
211  %red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v)
212  ret half %red
213}
214
215define half @vreduce_ord_fadd_v64f16(ptr %x, half %s) {
216; CHECK-LABEL: vreduce_ord_fadd_v64f16:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    li a1, 64
219; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
220; CHECK-NEXT:    vle16.v v8, (a0)
221; CHECK-NEXT:    vfmv.s.f v16, fa0
222; CHECK-NEXT:    vfredosum.vs v8, v8, v16
223; CHECK-NEXT:    vfmv.f.s fa0, v8
224; CHECK-NEXT:    ret
225  %v = load <64 x half>, ptr %x
226  %red = call half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v)
227  ret half %red
228}
229
230declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
231
232define half @vreduce_fadd_v128f16(ptr %x, half %s) {
233; CHECK-LABEL: vreduce_fadd_v128f16:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    li a1, 64
236; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
237; CHECK-NEXT:    vle16.v v8, (a0)
238; CHECK-NEXT:    addi a0, a0, 128
239; CHECK-NEXT:    vle16.v v16, (a0)
240; CHECK-NEXT:    vfadd.vv v8, v8, v16
241; CHECK-NEXT:    vfmv.s.f v16, fa0
242; CHECK-NEXT:    vfredusum.vs v8, v8, v16
243; CHECK-NEXT:    vfmv.f.s fa0, v8
244; CHECK-NEXT:    ret
245  %v = load <128 x half>, ptr %x
246  %red = call reassoc half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v)
247  ret half %red
248}
249
250define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) {
251; CHECK-LABEL: vreduce_ord_fadd_v128f16:
252; CHECK:       # %bb.0:
253; CHECK-NEXT:    addi a1, a0, 128
254; CHECK-NEXT:    li a2, 64
255; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
256; CHECK-NEXT:    vle16.v v8, (a0)
257; CHECK-NEXT:    vle16.v v16, (a1)
258; CHECK-NEXT:    vfmv.s.f v24, fa0
259; CHECK-NEXT:    vfredosum.vs v8, v8, v24
260; CHECK-NEXT:    vfredosum.vs v8, v16, v8
261; CHECK-NEXT:    vfmv.f.s fa0, v8
262; CHECK-NEXT:    ret
263  %v = load <128 x half>, ptr %x
264  %red = call half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v)
265  ret half %red
266}
267
268declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>)
269
270define float @vreduce_fadd_v1f32(<1 x float> %v, float %s) {
271; CHECK-LABEL: vreduce_fadd_v1f32:
272; CHECK:       # %bb.0:
273; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
274; CHECK-NEXT:    vfmv.f.s fa5, v8
275; CHECK-NEXT:    fadd.s fa0, fa0, fa5
276; CHECK-NEXT:    ret
277  %red = call reassoc float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v)
278  ret float %red
279}
280
281define float @vreduce_ord_fadd_v1f32(<1 x float> %v, float %s) {
282; CHECK-LABEL: vreduce_ord_fadd_v1f32:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
285; CHECK-NEXT:    vfmv.s.f v9, fa0
286; CHECK-NEXT:    vfredosum.vs v8, v8, v9
287; CHECK-NEXT:    vfmv.f.s fa0, v8
288; CHECK-NEXT:    ret
289  %red = call float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v)
290  ret float %red
291}
292
293define float @vreduce_fwadd_v1f32(<1 x half> %v, float %s) {
294; CHECK-LABEL: vreduce_fwadd_v1f32:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
297; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
298; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
299; CHECK-NEXT:    vfmv.f.s fa5, v9
300; CHECK-NEXT:    fadd.s fa0, fa0, fa5
301; CHECK-NEXT:    ret
302  %e = fpext <1 x half> %v to <1 x float>
303  %red = call reassoc float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %e)
304  ret float %red
305}
306
307define float @vreduce_ord_fwadd_v1f32(<1 x half> %v, float %s) {
308; CHECK-LABEL: vreduce_ord_fwadd_v1f32:
309; CHECK:       # %bb.0:
310; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
311; CHECK-NEXT:    vfmv.s.f v9, fa0
312; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
313; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
314; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
315; CHECK-NEXT:    vfmv.f.s fa0, v8
316; CHECK-NEXT:    ret
317  %e = fpext <1 x half> %v to <1 x float>
318  %red = call float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %e)
319  ret float %red
320}
321
322declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
323
324define float @vreduce_fadd_v2f32(ptr %x, float %s) {
325; CHECK-LABEL: vreduce_fadd_v2f32:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
328; CHECK-NEXT:    vle32.v v8, (a0)
329; CHECK-NEXT:    vfmv.s.f v9, fa0
330; CHECK-NEXT:    vfredusum.vs v8, v8, v9
331; CHECK-NEXT:    vfmv.f.s fa0, v8
332; CHECK-NEXT:    ret
333  %v = load <2 x float>, ptr %x
334  %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v)
335  ret float %red
336}
337
338define float @vreduce_ord_fadd_v2f32(ptr %x, float %s) {
339; CHECK-LABEL: vreduce_ord_fadd_v2f32:
340; CHECK:       # %bb.0:
341; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
342; CHECK-NEXT:    vle32.v v8, (a0)
343; CHECK-NEXT:    vfmv.s.f v9, fa0
344; CHECK-NEXT:    vfredosum.vs v8, v8, v9
345; CHECK-NEXT:    vfmv.f.s fa0, v8
346; CHECK-NEXT:    ret
347  %v = load <2 x float>, ptr %x
348  %red = call float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v)
349  ret float %red
350}
351
352define float @vreduce_fwadd_v2f32(ptr %x, float %s) {
353; CHECK-LABEL: vreduce_fwadd_v2f32:
354; CHECK:       # %bb.0:
355; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
356; CHECK-NEXT:    vle16.v v8, (a0)
357; CHECK-NEXT:    vfmv.s.f v9, fa0
358; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
359; CHECK-NEXT:    vfwredusum.vs v8, v8, v9
360; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
361; CHECK-NEXT:    vfmv.f.s fa0, v8
362; CHECK-NEXT:    ret
363  %v = load <2 x half>, ptr %x
364  %e = fpext <2 x half> %v to <2 x float>
365  %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %e)
366  ret float %red
367}
368
369define float @vreduce_ord_fwadd_v2f32(ptr %x, float %s) {
370; CHECK-LABEL: vreduce_ord_fwadd_v2f32:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
373; CHECK-NEXT:    vle16.v v8, (a0)
374; CHECK-NEXT:    vfmv.s.f v9, fa0
375; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
376; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
377; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
378; CHECK-NEXT:    vfmv.f.s fa0, v8
379; CHECK-NEXT:    ret
380  %v = load <2 x half>, ptr %x
381  %e = fpext <2 x half> %v to <2 x float>
382  %red = call float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %e)
383  ret float %red
384}
385
386declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
387
388define float @vreduce_fadd_v4f32(ptr %x, float %s) {
389; CHECK-LABEL: vreduce_fadd_v4f32:
390; CHECK:       # %bb.0:
391; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
392; CHECK-NEXT:    vle32.v v8, (a0)
393; CHECK-NEXT:    vfmv.s.f v9, fa0
394; CHECK-NEXT:    vfredusum.vs v8, v8, v9
395; CHECK-NEXT:    vfmv.f.s fa0, v8
396; CHECK-NEXT:    ret
397  %v = load <4 x float>, ptr %x
398  %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v)
399  ret float %red
400}
401
402define float @vreduce_ord_fadd_v4f32(ptr %x, float %s) {
403; CHECK-LABEL: vreduce_ord_fadd_v4f32:
404; CHECK:       # %bb.0:
405; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
406; CHECK-NEXT:    vle32.v v8, (a0)
407; CHECK-NEXT:    vfmv.s.f v9, fa0
408; CHECK-NEXT:    vfredosum.vs v8, v8, v9
409; CHECK-NEXT:    vfmv.f.s fa0, v8
410; CHECK-NEXT:    ret
411  %v = load <4 x float>, ptr %x
412  %red = call float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v)
413  ret float %red
414}
415
416define float @vreduce_fwadd_v4f32(ptr %x, float %s) {
417; CHECK-LABEL: vreduce_fwadd_v4f32:
418; CHECK:       # %bb.0:
419; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
420; CHECK-NEXT:    vle16.v v8, (a0)
421; CHECK-NEXT:    vfmv.s.f v9, fa0
422; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
423; CHECK-NEXT:    vfwredusum.vs v8, v8, v9
424; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
425; CHECK-NEXT:    vfmv.f.s fa0, v8
426; CHECK-NEXT:    ret
427  %v = load <4 x half>, ptr %x
428  %e = fpext <4 x half> %v to <4 x float>
429  %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %e)
430  ret float %red
431}
432
433define float @vreduce_ord_fwadd_v4f32(ptr %x, float %s) {
434; CHECK-LABEL: vreduce_ord_fwadd_v4f32:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
437; CHECK-NEXT:    vle16.v v8, (a0)
438; CHECK-NEXT:    vfmv.s.f v9, fa0
439; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
440; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
441; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
442; CHECK-NEXT:    vfmv.f.s fa0, v8
443; CHECK-NEXT:    ret
444  %v = load <4 x half>, ptr %x
445  %e = fpext <4 x half> %v to <4 x float>
446  %red = call float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %e)
447  ret float %red
448}
449
450declare float @llvm.vector.reduce.fadd.v7f32(float, <7 x float>)
451
452define float @vreduce_fadd_v7f32(ptr %x, float %s) {
453; CHECK-LABEL: vreduce_fadd_v7f32:
454; CHECK:       # %bb.0:
455; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
456; CHECK-NEXT:    vle32.v v8, (a0)
457; CHECK-NEXT:    vfmv.s.f v10, fa0
458; CHECK-NEXT:    vfredusum.vs v8, v8, v10
459; CHECK-NEXT:    vfmv.f.s fa0, v8
460; CHECK-NEXT:    ret
461  %v = load <7 x float>, ptr %x
462  %red = call reassoc float @llvm.vector.reduce.fadd.v7f32(float %s, <7 x float> %v)
463  ret float %red
464}
465
466define float @vreduce_ord_fadd_v7f32(ptr %x, float %s) {
467; CHECK-LABEL: vreduce_ord_fadd_v7f32:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
470; CHECK-NEXT:    vle32.v v8, (a0)
471; CHECK-NEXT:    vfmv.s.f v10, fa0
472; CHECK-NEXT:    vfredosum.vs v8, v8, v10
473; CHECK-NEXT:    vfmv.f.s fa0, v8
474; CHECK-NEXT:    ret
475  %v = load <7 x float>, ptr %x
476  %red = call float @llvm.vector.reduce.fadd.v7f32(float %s, <7 x float> %v)
477  ret float %red
478}
479
480define float @vreduce_fadd_v7f32_neutralstart(ptr %x) {
481; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart:
482; CHECK:       # %bb.0:
483; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
484; CHECK-NEXT:    vle32.v v8, (a0)
485; CHECK-NEXT:    lui a0, 524288
486; CHECK-NEXT:    vmv.s.x v10, a0
487; CHECK-NEXT:    vfredusum.vs v8, v8, v10
488; CHECK-NEXT:    vfmv.f.s fa0, v8
489; CHECK-NEXT:    ret
490  %v = load <7 x float>, ptr %x
491  %red = call reassoc float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v)
492  ret float %red
493}
494
495define float @vreduce_fadd_v7f32_neutralstart_nsz(ptr %x) {
496; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart_nsz:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
499; CHECK-NEXT:    vle32.v v8, (a0)
500; CHECK-NEXT:    lui a0, 524288
501; CHECK-NEXT:    vmv.s.x v10, a0
502; CHECK-NEXT:    vfredosum.vs v8, v8, v10
503; CHECK-NEXT:    vfmv.f.s fa0, v8
504; CHECK-NEXT:    ret
505  %v = load <7 x float>, ptr %x
506  %red = call nsz float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v)
507  ret float %red
508}
509
510define float @vreduce_fadd_v7f32_neutralstart_fast(ptr %x) {
511; CHECK-LABEL: vreduce_fadd_v7f32_neutralstart_fast:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
514; CHECK-NEXT:    vle32.v v8, (a0)
515; CHECK-NEXT:    vmv.s.x v10, zero
516; CHECK-NEXT:    vfredusum.vs v8, v8, v10
517; CHECK-NEXT:    vfmv.f.s fa0, v8
518; CHECK-NEXT:    ret
519  %v = load <7 x float>, ptr %x
520  %red = call fast float @llvm.vector.reduce.fadd.v7f32(float -0.0, <7 x float> %v)
521  ret float %red
522}
523
524
525declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
526
527define float @vreduce_fadd_v8f32(ptr %x, float %s) {
528; CHECK-LABEL: vreduce_fadd_v8f32:
529; CHECK:       # %bb.0:
530; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
531; CHECK-NEXT:    vle32.v v8, (a0)
532; CHECK-NEXT:    vfmv.s.f v10, fa0
533; CHECK-NEXT:    vfredusum.vs v8, v8, v10
534; CHECK-NEXT:    vfmv.f.s fa0, v8
535; CHECK-NEXT:    ret
536  %v = load <8 x float>, ptr %x
537  %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v)
538  ret float %red
539}
540
541define float @vreduce_ord_fadd_v8f32(ptr %x, float %s) {
542; CHECK-LABEL: vreduce_ord_fadd_v8f32:
543; CHECK:       # %bb.0:
544; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
545; CHECK-NEXT:    vle32.v v8, (a0)
546; CHECK-NEXT:    vfmv.s.f v10, fa0
547; CHECK-NEXT:    vfredosum.vs v8, v8, v10
548; CHECK-NEXT:    vfmv.f.s fa0, v8
549; CHECK-NEXT:    ret
550  %v = load <8 x float>, ptr %x
551  %red = call float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v)
552  ret float %red
553}
554
555define float @vreduce_fwadd_v8f32(ptr %x, float %s) {
556; CHECK-LABEL: vreduce_fwadd_v8f32:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
559; CHECK-NEXT:    vle16.v v8, (a0)
560; CHECK-NEXT:    vfmv.s.f v9, fa0
561; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
562; CHECK-NEXT:    vfwredusum.vs v8, v8, v9
563; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
564; CHECK-NEXT:    vfmv.f.s fa0, v8
565; CHECK-NEXT:    ret
566  %v = load <8 x half>, ptr %x
567  %e = fpext <8 x half> %v to <8 x float>
568  %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %e)
569  ret float %red
570}
571
572define float @vreduce_ord_fwadd_v8f32(ptr %x, float %s) {
573; CHECK-LABEL: vreduce_ord_fwadd_v8f32:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
576; CHECK-NEXT:    vle16.v v8, (a0)
577; CHECK-NEXT:    vfmv.s.f v9, fa0
578; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
579; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
580; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
581; CHECK-NEXT:    vfmv.f.s fa0, v8
582; CHECK-NEXT:    ret
583  %v = load <8 x half>, ptr %x
584  %e = fpext <8 x half> %v to <8 x float>
585  %red = call float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %e)
586  ret float %red
587}
588
589declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
590
591define float @vreduce_fadd_v16f32(ptr %x, float %s) {
592; CHECK-LABEL: vreduce_fadd_v16f32:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
595; CHECK-NEXT:    vle32.v v8, (a0)
596; CHECK-NEXT:    vfmv.s.f v12, fa0
597; CHECK-NEXT:    vfredusum.vs v8, v8, v12
598; CHECK-NEXT:    vfmv.f.s fa0, v8
599; CHECK-NEXT:    ret
600  %v = load <16 x float>, ptr %x
601  %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v)
602  ret float %red
603}
604
605define float @vreduce_ord_fadd_v16f32(ptr %x, float %s) {
606; CHECK-LABEL: vreduce_ord_fadd_v16f32:
607; CHECK:       # %bb.0:
608; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
609; CHECK-NEXT:    vle32.v v8, (a0)
610; CHECK-NEXT:    vfmv.s.f v12, fa0
611; CHECK-NEXT:    vfredosum.vs v8, v8, v12
612; CHECK-NEXT:    vfmv.f.s fa0, v8
613; CHECK-NEXT:    ret
614  %v = load <16 x float>, ptr %x
615  %red = call float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v)
616  ret float %red
617}
618
619define float @vreduce_fwadd_v16f32(ptr %x, float %s) {
620; CHECK-LABEL: vreduce_fwadd_v16f32:
621; CHECK:       # %bb.0:
622; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
623; CHECK-NEXT:    vle16.v v8, (a0)
624; CHECK-NEXT:    vfmv.s.f v10, fa0
625; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
626; CHECK-NEXT:    vfwredusum.vs v8, v8, v10
627; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
628; CHECK-NEXT:    vfmv.f.s fa0, v8
629; CHECK-NEXT:    ret
630  %v = load <16 x half>, ptr %x
631  %e = fpext <16 x half> %v to <16 x float>
632  %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %e)
633  ret float %red
634}
635
636define float @vreduce_ord_fwadd_v16f32(ptr %x, float %s) {
637; CHECK-LABEL: vreduce_ord_fwadd_v16f32:
638; CHECK:       # %bb.0:
639; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
640; CHECK-NEXT:    vle16.v v8, (a0)
641; CHECK-NEXT:    vfmv.s.f v10, fa0
642; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
643; CHECK-NEXT:    vfwredosum.vs v8, v8, v10
644; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
645; CHECK-NEXT:    vfmv.f.s fa0, v8
646; CHECK-NEXT:    ret
647  %v = load <16 x half>, ptr %x
648  %e = fpext <16 x half> %v to <16 x float>
649  %red = call float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %e)
650  ret float %red
651}
652
653declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
654
655define float @vreduce_fadd_v32f32(ptr %x, float %s) {
656; CHECK-LABEL: vreduce_fadd_v32f32:
657; CHECK:       # %bb.0:
658; CHECK-NEXT:    li a1, 32
659; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
660; CHECK-NEXT:    vle32.v v8, (a0)
661; CHECK-NEXT:    vfmv.s.f v16, fa0
662; CHECK-NEXT:    vfredusum.vs v8, v8, v16
663; CHECK-NEXT:    vfmv.f.s fa0, v8
664; CHECK-NEXT:    ret
665  %v = load <32 x float>, ptr %x
666  %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v)
667  ret float %red
668}
669
670define float @vreduce_ord_fadd_v32f32(ptr %x, float %s) {
671; CHECK-LABEL: vreduce_ord_fadd_v32f32:
672; CHECK:       # %bb.0:
673; CHECK-NEXT:    li a1, 32
674; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
675; CHECK-NEXT:    vle32.v v8, (a0)
676; CHECK-NEXT:    vfmv.s.f v16, fa0
677; CHECK-NEXT:    vfredosum.vs v8, v8, v16
678; CHECK-NEXT:    vfmv.f.s fa0, v8
679; CHECK-NEXT:    ret
680  %v = load <32 x float>, ptr %x
681  %red = call float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v)
682  ret float %red
683}
684
685define float @vreduce_fwadd_v32f32(ptr %x, float %s) {
686; CHECK-LABEL: vreduce_fwadd_v32f32:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    li a1, 32
689; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
690; CHECK-NEXT:    vle16.v v8, (a0)
691; CHECK-NEXT:    vfmv.s.f v12, fa0
692; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
693; CHECK-NEXT:    vfwredusum.vs v8, v8, v12
694; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
695; CHECK-NEXT:    vfmv.f.s fa0, v8
696; CHECK-NEXT:    ret
697  %v = load <32 x half>, ptr %x
698  %e = fpext <32 x half> %v to <32 x float>
699  %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %e)
700  ret float %red
701}
702
703define float @vreduce_ord_fwadd_v32f32(ptr %x, float %s) {
704; CHECK-LABEL: vreduce_ord_fwadd_v32f32:
705; CHECK:       # %bb.0:
706; CHECK-NEXT:    li a1, 32
707; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
708; CHECK-NEXT:    vle16.v v8, (a0)
709; CHECK-NEXT:    vfmv.s.f v12, fa0
710; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
711; CHECK-NEXT:    vfwredosum.vs v8, v8, v12
712; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
713; CHECK-NEXT:    vfmv.f.s fa0, v8
714; CHECK-NEXT:    ret
715  %v = load <32 x half>, ptr %x
716  %e = fpext <32 x half> %v to <32 x float>
717  %red = call float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %e)
718  ret float %red
719}
720
721declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
722
723define float @vreduce_fadd_v64f32(ptr %x, float %s) {
724; CHECK-LABEL: vreduce_fadd_v64f32:
725; CHECK:       # %bb.0:
726; CHECK-NEXT:    li a1, 32
727; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
728; CHECK-NEXT:    vle32.v v8, (a0)
729; CHECK-NEXT:    addi a0, a0, 128
730; CHECK-NEXT:    vle32.v v16, (a0)
731; CHECK-NEXT:    vfadd.vv v8, v8, v16
732; CHECK-NEXT:    vfmv.s.f v16, fa0
733; CHECK-NEXT:    vfredusum.vs v8, v8, v16
734; CHECK-NEXT:    vfmv.f.s fa0, v8
735; CHECK-NEXT:    ret
736  %v = load <64 x float>, ptr %x
737  %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v)
738  ret float %red
739}
740
741define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) {
742; CHECK-LABEL: vreduce_ord_fadd_v64f32:
743; CHECK:       # %bb.0:
744; CHECK-NEXT:    addi a1, a0, 128
745; CHECK-NEXT:    li a2, 32
746; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
747; CHECK-NEXT:    vle32.v v8, (a0)
748; CHECK-NEXT:    vle32.v v16, (a1)
749; CHECK-NEXT:    vfmv.s.f v24, fa0
750; CHECK-NEXT:    vfredosum.vs v8, v8, v24
751; CHECK-NEXT:    vfredosum.vs v8, v16, v8
752; CHECK-NEXT:    vfmv.f.s fa0, v8
753; CHECK-NEXT:    ret
754  %v = load <64 x float>, ptr %x
755  %red = call float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v)
756  ret float %red
757}
758
759define float @vreduce_fwadd_v64f32(ptr %x, float %s) {
760; CHECK-LABEL: vreduce_fwadd_v64f32:
761; CHECK:       # %bb.0:
762; CHECK-NEXT:    li a1, 64
763; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
764; CHECK-NEXT:    vle16.v v8, (a0)
765; CHECK-NEXT:    li a0, 32
766; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
767; CHECK-NEXT:    vslidedown.vx v16, v8, a0
768; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
769; CHECK-NEXT:    vfwadd.vv v24, v8, v16
770; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
771; CHECK-NEXT:    vfmv.s.f v8, fa0
772; CHECK-NEXT:    vfredusum.vs v8, v24, v8
773; CHECK-NEXT:    vfmv.f.s fa0, v8
774; CHECK-NEXT:    ret
775  %v = load <64 x half>, ptr %x
776  %e = fpext <64 x half> %v to <64 x float>
777  %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %e)
778  ret float %red
779}
780
781define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) {
782; CHECK-LABEL: vreduce_ord_fwadd_v64f32:
783; CHECK:       # %bb.0:
784; CHECK-NEXT:    li a1, 64
785; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
786; CHECK-NEXT:    vle16.v v8, (a0)
787; CHECK-NEXT:    li a0, 32
788; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
789; CHECK-NEXT:    vfmv.s.f v16, fa0
790; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
791; CHECK-NEXT:    vslidedown.vx v24, v8, a0
792; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
793; CHECK-NEXT:    vfwredosum.vs v8, v8, v16
794; CHECK-NEXT:    vfwredosum.vs v8, v24, v8
795; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
796; CHECK-NEXT:    vfmv.f.s fa0, v8
797; CHECK-NEXT:    ret
798  %v = load <64 x half>, ptr %x
799  %e = fpext <64 x half> %v to <64 x float>
800  %red = call float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %e)
801  ret float %red
802}
803
804declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
805
806define double @vreduce_fadd_v1f64(<1 x double> %v, double %s) {
807; CHECK-LABEL: vreduce_fadd_v1f64:
808; CHECK:       # %bb.0:
809; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
810; CHECK-NEXT:    vfmv.f.s fa5, v8
811; CHECK-NEXT:    fadd.d fa0, fa0, fa5
812; CHECK-NEXT:    ret
813  %red = call reassoc double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v)
814  ret double %red
815}
816
817define double @vreduce_ord_fadd_v1f64(<1 x double> %v, double %s) {
818; CHECK-LABEL: vreduce_ord_fadd_v1f64:
819; CHECK:       # %bb.0:
820; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
821; CHECK-NEXT:    vfmv.s.f v9, fa0
822; CHECK-NEXT:    vfredosum.vs v8, v8, v9
823; CHECK-NEXT:    vfmv.f.s fa0, v8
824; CHECK-NEXT:    ret
825  %red = call double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v)
826  ret double %red
827}
828
829define double @vreduce_fwadd_v1f64(<1 x float> %v, double %s) {
830; CHECK-LABEL: vreduce_fwadd_v1f64:
831; CHECK:       # %bb.0:
832; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
833; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
834; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
835; CHECK-NEXT:    vfmv.f.s fa5, v9
836; CHECK-NEXT:    fadd.d fa0, fa0, fa5
837; CHECK-NEXT:    ret
838  %e = fpext <1 x float> %v to <1 x double>
839  %red = call reassoc double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %e)
840  ret double %red
841}
842
843define double @vreduce_ord_fwadd_v1f64(<1 x float> %v, double %s) {
844; CHECK-LABEL: vreduce_ord_fwadd_v1f64:
845; CHECK:       # %bb.0:
846; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
847; CHECK-NEXT:    vfmv.s.f v9, fa0
848; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
849; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
850; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
851; CHECK-NEXT:    vfmv.f.s fa0, v8
852; CHECK-NEXT:    ret
853  %e = fpext <1 x float> %v to <1 x double>
854  %red = call double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %e)
855  ret double %red
856}
857
858declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
859
860define double @vreduce_fadd_v2f64(ptr %x, double %s) {
861; CHECK-LABEL: vreduce_fadd_v2f64:
862; CHECK:       # %bb.0:
863; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
864; CHECK-NEXT:    vle64.v v8, (a0)
865; CHECK-NEXT:    vfmv.s.f v9, fa0
866; CHECK-NEXT:    vfredusum.vs v8, v8, v9
867; CHECK-NEXT:    vfmv.f.s fa0, v8
868; CHECK-NEXT:    ret
869  %v = load <2 x double>, ptr %x
870  %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v)
871  ret double %red
872}
873
874define double @vreduce_ord_fadd_v2f64(ptr %x, double %s) {
875; CHECK-LABEL: vreduce_ord_fadd_v2f64:
876; CHECK:       # %bb.0:
877; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
878; CHECK-NEXT:    vle64.v v8, (a0)
879; CHECK-NEXT:    vfmv.s.f v9, fa0
880; CHECK-NEXT:    vfredosum.vs v8, v8, v9
881; CHECK-NEXT:    vfmv.f.s fa0, v8
882; CHECK-NEXT:    ret
883  %v = load <2 x double>, ptr %x
884  %red = call double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v)
885  ret double %red
886}
887
888define double @vreduce_fwadd_v2f64(ptr %x, double %s) {
889; CHECK-LABEL: vreduce_fwadd_v2f64:
890; CHECK:       # %bb.0:
891; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
892; CHECK-NEXT:    vle32.v v8, (a0)
893; CHECK-NEXT:    vfmv.s.f v9, fa0
894; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
895; CHECK-NEXT:    vfwredusum.vs v8, v8, v9
896; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
897; CHECK-NEXT:    vfmv.f.s fa0, v8
898; CHECK-NEXT:    ret
899  %v = load <2 x float>, ptr %x
900  %e = fpext <2 x float> %v to <2 x double>
901  %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e)
902  ret double %red
903}
904
905define double @vreduce_ord_fwadd_v2f64(ptr %x, double %s) {
906; CHECK-LABEL: vreduce_ord_fwadd_v2f64:
907; CHECK:       # %bb.0:
908; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
909; CHECK-NEXT:    vle32.v v8, (a0)
910; CHECK-NEXT:    vfmv.s.f v9, fa0
911; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
912; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
913; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
914; CHECK-NEXT:    vfmv.f.s fa0, v8
915; CHECK-NEXT:    ret
916  %v = load <2 x float>, ptr %x
917  %e = fpext <2 x float> %v to <2 x double>
918  %red = call double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e)
919  ret double %red
920}
921
922declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
923
924define double @vreduce_fadd_v4f64(ptr %x, double %s) {
925; CHECK-LABEL: vreduce_fadd_v4f64:
926; CHECK:       # %bb.0:
927; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
928; CHECK-NEXT:    vle64.v v8, (a0)
929; CHECK-NEXT:    vfmv.s.f v10, fa0
930; CHECK-NEXT:    vfredusum.vs v8, v8, v10
931; CHECK-NEXT:    vfmv.f.s fa0, v8
932; CHECK-NEXT:    ret
933  %v = load <4 x double>, ptr %x
934  %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v)
935  ret double %red
936}
937
938define double @vreduce_ord_fadd_v4f64(ptr %x, double %s) {
939; CHECK-LABEL: vreduce_ord_fadd_v4f64:
940; CHECK:       # %bb.0:
941; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
942; CHECK-NEXT:    vle64.v v8, (a0)
943; CHECK-NEXT:    vfmv.s.f v10, fa0
944; CHECK-NEXT:    vfredosum.vs v8, v8, v10
945; CHECK-NEXT:    vfmv.f.s fa0, v8
946; CHECK-NEXT:    ret
947  %v = load <4 x double>, ptr %x
948  %red = call double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v)
949  ret double %red
950}
951
952define double @vreduce_fwadd_v4f64(ptr %x, double %s) {
953; CHECK-LABEL: vreduce_fwadd_v4f64:
954; CHECK:       # %bb.0:
955; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
956; CHECK-NEXT:    vle32.v v8, (a0)
957; CHECK-NEXT:    vfmv.s.f v9, fa0
958; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
959; CHECK-NEXT:    vfwredusum.vs v8, v8, v9
960; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
961; CHECK-NEXT:    vfmv.f.s fa0, v8
962; CHECK-NEXT:    ret
963  %v = load <4 x float>, ptr %x
964  %e = fpext <4 x float> %v to <4 x double>
965  %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %e)
966  ret double %red
967}
968
969define double @vreduce_ord_fwadd_v4f64(ptr %x, double %s) {
970; CHECK-LABEL: vreduce_ord_fwadd_v4f64:
971; CHECK:       # %bb.0:
972; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
973; CHECK-NEXT:    vle32.v v8, (a0)
974; CHECK-NEXT:    vfmv.s.f v9, fa0
975; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
976; CHECK-NEXT:    vfwredosum.vs v8, v8, v9
977; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
978; CHECK-NEXT:    vfmv.f.s fa0, v8
979; CHECK-NEXT:    ret
980  %v = load <4 x float>, ptr %x
981  %e = fpext <4 x float> %v to <4 x double>
982  %red = call double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %e)
983  ret double %red
984}
985
986declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
987
988define double @vreduce_fadd_v8f64(ptr %x, double %s) {
989; CHECK-LABEL: vreduce_fadd_v8f64:
990; CHECK:       # %bb.0:
991; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
992; CHECK-NEXT:    vle64.v v8, (a0)
993; CHECK-NEXT:    vfmv.s.f v12, fa0
994; CHECK-NEXT:    vfredusum.vs v8, v8, v12
995; CHECK-NEXT:    vfmv.f.s fa0, v8
996; CHECK-NEXT:    ret
997  %v = load <8 x double>, ptr %x
998  %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v)
999  ret double %red
1000}
1001
1002define double @vreduce_ord_fadd_v8f64(ptr %x, double %s) {
1003; CHECK-LABEL: vreduce_ord_fadd_v8f64:
1004; CHECK:       # %bb.0:
1005; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1006; CHECK-NEXT:    vle64.v v8, (a0)
1007; CHECK-NEXT:    vfmv.s.f v12, fa0
1008; CHECK-NEXT:    vfredosum.vs v8, v8, v12
1009; CHECK-NEXT:    vfmv.f.s fa0, v8
1010; CHECK-NEXT:    ret
1011  %v = load <8 x double>, ptr %x
1012  %red = call double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v)
1013  ret double %red
1014}
1015
1016define double @vreduce_fwadd_v8f64(ptr %x, double %s) {
1017; CHECK-LABEL: vreduce_fwadd_v8f64:
1018; CHECK:       # %bb.0:
1019; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1020; CHECK-NEXT:    vle32.v v8, (a0)
1021; CHECK-NEXT:    vfmv.s.f v10, fa0
1022; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1023; CHECK-NEXT:    vfwredusum.vs v8, v8, v10
1024; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1025; CHECK-NEXT:    vfmv.f.s fa0, v8
1026; CHECK-NEXT:    ret
1027  %v = load <8 x float>, ptr %x
1028  %e = fpext <8 x float> %v to <8 x double>
1029  %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %e)
1030  ret double %red
1031}
1032
1033define double @vreduce_ord_fwadd_v8f64(ptr %x, double %s) {
1034; CHECK-LABEL: vreduce_ord_fwadd_v8f64:
1035; CHECK:       # %bb.0:
1036; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1037; CHECK-NEXT:    vle32.v v8, (a0)
1038; CHECK-NEXT:    vfmv.s.f v10, fa0
1039; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1040; CHECK-NEXT:    vfwredosum.vs v8, v8, v10
1041; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
1042; CHECK-NEXT:    vfmv.f.s fa0, v8
1043; CHECK-NEXT:    ret
1044  %v = load <8 x float>, ptr %x
1045  %e = fpext <8 x float> %v to <8 x double>
1046  %red = call double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %e)
1047  ret double %red
1048}
1049
1050declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
1051
1052define double @vreduce_fadd_v16f64(ptr %x, double %s) {
1053; CHECK-LABEL: vreduce_fadd_v16f64:
1054; CHECK:       # %bb.0:
1055; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1056; CHECK-NEXT:    vle64.v v8, (a0)
1057; CHECK-NEXT:    vfmv.s.f v16, fa0
1058; CHECK-NEXT:    vfredusum.vs v8, v8, v16
1059; CHECK-NEXT:    vfmv.f.s fa0, v8
1060; CHECK-NEXT:    ret
1061  %v = load <16 x double>, ptr %x
1062  %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v)
1063  ret double %red
1064}
1065
1066define double @vreduce_ord_fadd_v16f64(ptr %x, double %s) {
1067; CHECK-LABEL: vreduce_ord_fadd_v16f64:
1068; CHECK:       # %bb.0:
1069; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1070; CHECK-NEXT:    vle64.v v8, (a0)
1071; CHECK-NEXT:    vfmv.s.f v16, fa0
1072; CHECK-NEXT:    vfredosum.vs v8, v8, v16
1073; CHECK-NEXT:    vfmv.f.s fa0, v8
1074; CHECK-NEXT:    ret
1075  %v = load <16 x double>, ptr %x
1076  %red = call double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v)
1077  ret double %red
1078}
1079
1080define double @vreduce_fwadd_v16f64(ptr %x, double %s) {
1081; CHECK-LABEL: vreduce_fwadd_v16f64:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1084; CHECK-NEXT:    vle32.v v8, (a0)
1085; CHECK-NEXT:    vfmv.s.f v12, fa0
1086; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1087; CHECK-NEXT:    vfwredusum.vs v8, v8, v12
1088; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1089; CHECK-NEXT:    vfmv.f.s fa0, v8
1090; CHECK-NEXT:    ret
1091  %v = load <16 x float>, ptr %x
1092  %e = fpext <16 x float> %v to <16 x double>
1093  %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %e)
1094  ret double %red
1095}
1096
1097define double @vreduce_ord_fwadd_v16f64(ptr %x, double %s) {
1098; CHECK-LABEL: vreduce_ord_fwadd_v16f64:
1099; CHECK:       # %bb.0:
1100; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1101; CHECK-NEXT:    vle32.v v8, (a0)
1102; CHECK-NEXT:    vfmv.s.f v12, fa0
1103; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1104; CHECK-NEXT:    vfwredosum.vs v8, v8, v12
1105; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1106; CHECK-NEXT:    vfmv.f.s fa0, v8
1107; CHECK-NEXT:    ret
1108  %v = load <16 x float>, ptr %x
1109  %e = fpext <16 x float> %v to <16 x double>
1110  %red = call double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %e)
1111  ret double %red
1112}
1113
1114declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
1115
1116define double @vreduce_fadd_v32f64(ptr %x, double %s) {
1117; CHECK-LABEL: vreduce_fadd_v32f64:
1118; CHECK:       # %bb.0:
1119; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1120; CHECK-NEXT:    vle64.v v8, (a0)
1121; CHECK-NEXT:    addi a0, a0, 128
1122; CHECK-NEXT:    vle64.v v16, (a0)
1123; CHECK-NEXT:    vfadd.vv v8, v8, v16
1124; CHECK-NEXT:    vfmv.s.f v16, fa0
1125; CHECK-NEXT:    vfredusum.vs v8, v8, v16
1126; CHECK-NEXT:    vfmv.f.s fa0, v8
1127; CHECK-NEXT:    ret
1128  %v = load <32 x double>, ptr %x
1129  %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v)
1130  ret double %red
1131}
1132
1133define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) {
1134; CHECK-LABEL: vreduce_ord_fadd_v32f64:
1135; CHECK:       # %bb.0:
1136; CHECK-NEXT:    addi a1, a0, 128
1137; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1138; CHECK-NEXT:    vle64.v v8, (a0)
1139; CHECK-NEXT:    vle64.v v16, (a1)
1140; CHECK-NEXT:    vfmv.s.f v24, fa0
1141; CHECK-NEXT:    vfredosum.vs v8, v8, v24
1142; CHECK-NEXT:    vfredosum.vs v8, v16, v8
1143; CHECK-NEXT:    vfmv.f.s fa0, v8
1144; CHECK-NEXT:    ret
1145  %v = load <32 x double>, ptr %x
1146  %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v)
1147  ret double %red
1148}
1149
1150define double @vreduce_fwadd_v32f64(ptr %x, double %s) {
1151; CHECK-LABEL: vreduce_fwadd_v32f64:
1152; CHECK:       # %bb.0:
1153; CHECK-NEXT:    li a1, 32
1154; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1155; CHECK-NEXT:    vle32.v v8, (a0)
1156; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1157; CHECK-NEXT:    vslidedown.vi v16, v8, 16
1158; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1159; CHECK-NEXT:    vfwadd.vv v24, v8, v16
1160; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1161; CHECK-NEXT:    vfmv.s.f v8, fa0
1162; CHECK-NEXT:    vfredusum.vs v8, v24, v8
1163; CHECK-NEXT:    vfmv.f.s fa0, v8
1164; CHECK-NEXT:    ret
1165  %v = load <32 x float>, ptr %x
1166  %e = fpext <32 x float> %v to <32 x double>
1167  %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %e)
1168  ret double %red
1169}
1170
1171define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
1172; CHECK-LABEL: vreduce_ord_fwadd_v32f64:
1173; CHECK:       # %bb.0:
1174; CHECK-NEXT:    li a1, 32
1175; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1176; CHECK-NEXT:    vle32.v v8, (a0)
1177; CHECK-NEXT:    vsetivli zero, 16, e64, m1, ta, ma
1178; CHECK-NEXT:    vfmv.s.f v16, fa0
1179; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1180; CHECK-NEXT:    vslidedown.vi v24, v8, 16
1181; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1182; CHECK-NEXT:    vfwredosum.vs v8, v8, v16
1183; CHECK-NEXT:    vfwredosum.vs v8, v24, v8
1184; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1185; CHECK-NEXT:    vfmv.f.s fa0, v8
1186; CHECK-NEXT:    ret
1187  %v = load <32 x float>, ptr %x
1188  %e = fpext <32 x float> %v to <32 x double>
1189  %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %e)
1190  ret double %red
1191}
1192
1193declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
1194
1195define half @vreduce_fmin_v2f16(ptr %x) {
1196; CHECK-LABEL: vreduce_fmin_v2f16:
1197; CHECK:       # %bb.0:
1198; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
1199; CHECK-NEXT:    vle16.v v8, (a0)
1200; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1201; CHECK-NEXT:    vfmv.f.s fa0, v8
1202; CHECK-NEXT:    ret
1203  %v = load <2 x half>, ptr %x
1204  %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v)
1205  ret half %red
1206}
1207
1208declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
1209
1210define half @vreduce_fmin_v4f16(ptr %x) {
1211; CHECK-LABEL: vreduce_fmin_v4f16:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1214; CHECK-NEXT:    vle16.v v8, (a0)
1215; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1216; CHECK-NEXT:    vfmv.f.s fa0, v8
1217; CHECK-NEXT:    ret
1218  %v = load <4 x half>, ptr %x
1219  %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
1220  ret half %red
1221}
1222
1223define half @vreduce_fmin_v4f16_nonans(ptr %x) {
1224; CHECK-LABEL: vreduce_fmin_v4f16_nonans:
1225; CHECK:       # %bb.0:
1226; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1227; CHECK-NEXT:    vle16.v v8, (a0)
1228; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1229; CHECK-NEXT:    vfmv.f.s fa0, v8
1230; CHECK-NEXT:    ret
1231  %v = load <4 x half>, ptr %x
1232  %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
1233  ret half %red
1234}
1235
1236define half @vreduce_fmin_v4f16_nonans_noinfs(ptr %x) {
1237; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
1238; CHECK:       # %bb.0:
1239; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1240; CHECK-NEXT:    vle16.v v8, (a0)
1241; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1242; CHECK-NEXT:    vfmv.f.s fa0, v8
1243; CHECK-NEXT:    ret
1244  %v = load <4 x half>, ptr %x
1245  %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
1246  ret half %red
1247}
1248
1249declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>)
1250
1251define half @vreduce_fmin_v128f16(ptr %x) {
1252; CHECK-LABEL: vreduce_fmin_v128f16:
1253; CHECK:       # %bb.0:
1254; CHECK-NEXT:    li a1, 64
1255; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1256; CHECK-NEXT:    vle16.v v8, (a0)
1257; CHECK-NEXT:    addi a0, a0, 128
1258; CHECK-NEXT:    vle16.v v16, (a0)
1259; CHECK-NEXT:    vfmin.vv v8, v8, v16
1260; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1261; CHECK-NEXT:    vfmv.f.s fa0, v8
1262; CHECK-NEXT:    ret
1263  %v = load <128 x half>, ptr %x
1264  %red = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %v)
1265  ret half %red
1266}
1267
1268declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
1269
1270define float @vreduce_fmin_v2f32(ptr %x) {
1271; CHECK-LABEL: vreduce_fmin_v2f32:
1272; CHECK:       # %bb.0:
1273; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1274; CHECK-NEXT:    vle32.v v8, (a0)
1275; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1276; CHECK-NEXT:    vfmv.f.s fa0, v8
1277; CHECK-NEXT:    ret
1278  %v = load <2 x float>, ptr %x
1279  %red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v)
1280  ret float %red
1281}
1282
1283declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
1284
1285define float @vreduce_fmin_v4f32(ptr %x) {
1286; CHECK-LABEL: vreduce_fmin_v4f32:
1287; CHECK:       # %bb.0:
1288; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1289; CHECK-NEXT:    vle32.v v8, (a0)
1290; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1291; CHECK-NEXT:    vfmv.f.s fa0, v8
1292; CHECK-NEXT:    ret
1293  %v = load <4 x float>, ptr %x
1294  %red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
1295  ret float %red
1296}
1297
1298define float @vreduce_fmin_v4f32_nonans(ptr %x) {
1299; CHECK-LABEL: vreduce_fmin_v4f32_nonans:
1300; CHECK:       # %bb.0:
1301; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1302; CHECK-NEXT:    vle32.v v8, (a0)
1303; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1304; CHECK-NEXT:    vfmv.f.s fa0, v8
1305; CHECK-NEXT:    ret
1306  %v = load <4 x float>, ptr %x
1307  %red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
1308  ret float %red
1309}
1310
1311define float @vreduce_fmin_v4f32_nonans_noinfs(ptr %x) {
1312; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
1313; CHECK:       # %bb.0:
1314; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1315; CHECK-NEXT:    vle32.v v8, (a0)
1316; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1317; CHECK-NEXT:    vfmv.f.s fa0, v8
1318; CHECK-NEXT:    ret
1319  %v = load <4 x float>, ptr %x
1320  %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
1321  ret float %red
1322}
1323
1324declare float @llvm.vector.reduce.fmin.v7f32(<7 x float>)
1325
1326define float @vreduce_fmin_v7f32(ptr %x) {
1327; CHECK-LABEL: vreduce_fmin_v7f32:
1328; CHECK:       # %bb.0:
1329; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
1330; CHECK-NEXT:    vle32.v v8, (a0)
1331; CHECK-NEXT:    lui a0, 523264
1332; CHECK-NEXT:    vmv.s.x v10, a0
1333; CHECK-NEXT:    vfredmin.vs v8, v8, v10
1334; CHECK-NEXT:    vfmv.f.s fa0, v8
1335; CHECK-NEXT:    ret
1336  %v = load <7 x float>, ptr %x
1337  %red = call float @llvm.vector.reduce.fmin.v7f32(<7 x float> %v)
1338  ret float %red
1339}
1340
1341declare float @llvm.vector.reduce.fmin.v128f32(<128 x float>)
1342
1343define float @vreduce_fmin_v128f32(ptr %x) {
1344; CHECK-LABEL: vreduce_fmin_v128f32:
1345; CHECK:       # %bb.0:
1346; CHECK-NEXT:    li a1, 32
1347; CHECK-NEXT:    addi a2, a0, 384
1348; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1349; CHECK-NEXT:    vle32.v v8, (a2)
1350; CHECK-NEXT:    addi a1, a0, 256
1351; CHECK-NEXT:    vle32.v v16, (a0)
1352; CHECK-NEXT:    addi a0, a0, 128
1353; CHECK-NEXT:    vle32.v v24, (a0)
1354; CHECK-NEXT:    vle32.v v0, (a1)
1355; CHECK-NEXT:    vfmin.vv v8, v24, v8
1356; CHECK-NEXT:    vfmin.vv v16, v16, v0
1357; CHECK-NEXT:    vfmin.vv v8, v16, v8
1358; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1359; CHECK-NEXT:    vfmv.f.s fa0, v8
1360; CHECK-NEXT:    ret
1361  %v = load <128 x float>, ptr %x
1362  %red = call float @llvm.vector.reduce.fmin.v128f32(<128 x float> %v)
1363  ret float %red
1364}
1365
1366declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
1367
1368define double @vreduce_fmin_v2f64(ptr %x) {
1369; CHECK-LABEL: vreduce_fmin_v2f64:
1370; CHECK:       # %bb.0:
1371; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1372; CHECK-NEXT:    vle64.v v8, (a0)
1373; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1374; CHECK-NEXT:    vfmv.f.s fa0, v8
1375; CHECK-NEXT:    ret
1376  %v = load <2 x double>, ptr %x
1377  %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v)
1378  ret double %red
1379}
1380
1381declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
1382
1383define double @vreduce_fmin_v4f64(ptr %x) {
1384; CHECK-LABEL: vreduce_fmin_v4f64:
1385; CHECK:       # %bb.0:
1386; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1387; CHECK-NEXT:    vle64.v v8, (a0)
1388; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1389; CHECK-NEXT:    vfmv.f.s fa0, v8
1390; CHECK-NEXT:    ret
1391  %v = load <4 x double>, ptr %x
1392  %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
1393  ret double %red
1394}
1395
1396define double @vreduce_fmin_v4f64_nonans(ptr %x) {
1397; CHECK-LABEL: vreduce_fmin_v4f64_nonans:
1398; CHECK:       # %bb.0:
1399; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1400; CHECK-NEXT:    vle64.v v8, (a0)
1401; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1402; CHECK-NEXT:    vfmv.f.s fa0, v8
1403; CHECK-NEXT:    ret
1404  %v = load <4 x double>, ptr %x
1405  %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
1406  ret double %red
1407}
1408
1409define double @vreduce_fmin_v4f64_nonans_noinfs(ptr %x) {
1410; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
1411; CHECK:       # %bb.0:
1412; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1413; CHECK-NEXT:    vle64.v v8, (a0)
1414; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1415; CHECK-NEXT:    vfmv.f.s fa0, v8
1416; CHECK-NEXT:    ret
1417  %v = load <4 x double>, ptr %x
1418  %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
1419  ret double %red
1420}
1421
1422declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
1423
1424define double @vreduce_fmin_v32f64(ptr %x) {
1425; CHECK-LABEL: vreduce_fmin_v32f64:
1426; CHECK:       # %bb.0:
1427; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1428; CHECK-NEXT:    vle64.v v8, (a0)
1429; CHECK-NEXT:    addi a0, a0, 128
1430; CHECK-NEXT:    vle64.v v16, (a0)
1431; CHECK-NEXT:    vfmin.vv v8, v8, v16
1432; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1433; CHECK-NEXT:    vfmv.f.s fa0, v8
1434; CHECK-NEXT:    ret
1435  %v = load <32 x double>, ptr %x
1436  %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v)
1437  ret double %red
1438}
1439
1440declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
1441
1442define half @vreduce_fmax_v2f16(ptr %x) {
1443; CHECK-LABEL: vreduce_fmax_v2f16:
1444; CHECK:       # %bb.0:
1445; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
1446; CHECK-NEXT:    vle16.v v8, (a0)
1447; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1448; CHECK-NEXT:    vfmv.f.s fa0, v8
1449; CHECK-NEXT:    ret
1450  %v = load <2 x half>, ptr %x
1451  %red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v)
1452  ret half %red
1453}
1454
1455declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
1456
1457define half @vreduce_fmax_v4f16(ptr %x) {
1458; CHECK-LABEL: vreduce_fmax_v4f16:
1459; CHECK:       # %bb.0:
1460; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1461; CHECK-NEXT:    vle16.v v8, (a0)
1462; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1463; CHECK-NEXT:    vfmv.f.s fa0, v8
1464; CHECK-NEXT:    ret
1465  %v = load <4 x half>, ptr %x
1466  %red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
1467  ret half %red
1468}
1469
1470define half @vreduce_fmax_v4f16_nonans(ptr %x) {
1471; CHECK-LABEL: vreduce_fmax_v4f16_nonans:
1472; CHECK:       # %bb.0:
1473; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1474; CHECK-NEXT:    vle16.v v8, (a0)
1475; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1476; CHECK-NEXT:    vfmv.f.s fa0, v8
1477; CHECK-NEXT:    ret
1478  %v = load <4 x half>, ptr %x
1479  %red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
1480  ret half %red
1481}
1482
1483define half @vreduce_fmax_v4f16_nonans_noinfs(ptr %x) {
1484; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
1485; CHECK:       # %bb.0:
1486; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1487; CHECK-NEXT:    vle16.v v8, (a0)
1488; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1489; CHECK-NEXT:    vfmv.f.s fa0, v8
1490; CHECK-NEXT:    ret
1491  %v = load <4 x half>, ptr %x
1492  %red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
1493  ret half %red
1494}
1495
1496declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>)
1497
1498define half @vreduce_fmax_v128f16(ptr %x) {
1499; CHECK-LABEL: vreduce_fmax_v128f16:
1500; CHECK:       # %bb.0:
1501; CHECK-NEXT:    li a1, 64
1502; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1503; CHECK-NEXT:    vle16.v v8, (a0)
1504; CHECK-NEXT:    addi a0, a0, 128
1505; CHECK-NEXT:    vle16.v v16, (a0)
1506; CHECK-NEXT:    vfmax.vv v8, v8, v16
1507; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1508; CHECK-NEXT:    vfmv.f.s fa0, v8
1509; CHECK-NEXT:    ret
1510  %v = load <128 x half>, ptr %x
1511  %red = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %v)
1512  ret half %red
1513}
1514
1515declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
1516
1517define float @vreduce_fmax_v2f32(ptr %x) {
1518; CHECK-LABEL: vreduce_fmax_v2f32:
1519; CHECK:       # %bb.0:
1520; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1521; CHECK-NEXT:    vle32.v v8, (a0)
1522; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1523; CHECK-NEXT:    vfmv.f.s fa0, v8
1524; CHECK-NEXT:    ret
1525  %v = load <2 x float>, ptr %x
1526  %red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v)
1527  ret float %red
1528}
1529
1530declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
1531
1532define float @vreduce_fmax_v4f32(ptr %x) {
1533; CHECK-LABEL: vreduce_fmax_v4f32:
1534; CHECK:       # %bb.0:
1535; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1536; CHECK-NEXT:    vle32.v v8, (a0)
1537; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1538; CHECK-NEXT:    vfmv.f.s fa0, v8
1539; CHECK-NEXT:    ret
1540  %v = load <4 x float>, ptr %x
1541  %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
1542  ret float %red
1543}
1544
1545define float @vreduce_fmax_v4f32_nonans(ptr %x) {
1546; CHECK-LABEL: vreduce_fmax_v4f32_nonans:
1547; CHECK:       # %bb.0:
1548; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1549; CHECK-NEXT:    vle32.v v8, (a0)
1550; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1551; CHECK-NEXT:    vfmv.f.s fa0, v8
1552; CHECK-NEXT:    ret
1553  %v = load <4 x float>, ptr %x
1554  %red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
1555  ret float %red
1556}
1557
1558define float @vreduce_fmax_v4f32_nonans_noinfs(ptr %x) {
1559; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
1560; CHECK:       # %bb.0:
1561; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1562; CHECK-NEXT:    vle32.v v8, (a0)
1563; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1564; CHECK-NEXT:    vfmv.f.s fa0, v8
1565; CHECK-NEXT:    ret
1566  %v = load <4 x float>, ptr %x
1567  %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
1568  ret float %red
1569}
1570
1571declare float @llvm.vector.reduce.fmax.v7f32(<7 x float>)
1572
1573define float @vreduce_fmax_v7f32(ptr %x) {
1574; CHECK-LABEL: vreduce_fmax_v7f32:
1575; CHECK:       # %bb.0:
1576; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
1577; CHECK-NEXT:    vle32.v v8, (a0)
1578; CHECK-NEXT:    lui a0, 1047552
1579; CHECK-NEXT:    vmv.s.x v10, a0
1580; CHECK-NEXT:    vfredmax.vs v8, v8, v10
1581; CHECK-NEXT:    vfmv.f.s fa0, v8
1582; CHECK-NEXT:    ret
1583  %v = load <7 x float>, ptr %x
1584  %red = call float @llvm.vector.reduce.fmax.v7f32(<7 x float> %v)
1585  ret float %red
1586}
1587
1588declare float @llvm.vector.reduce.fmax.v128f32(<128 x float>)
1589
1590define float @vreduce_fmax_v128f32(ptr %x) {
1591; CHECK-LABEL: vreduce_fmax_v128f32:
1592; CHECK:       # %bb.0:
1593; CHECK-NEXT:    li a1, 32
1594; CHECK-NEXT:    addi a2, a0, 384
1595; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1596; CHECK-NEXT:    vle32.v v8, (a2)
1597; CHECK-NEXT:    addi a1, a0, 256
1598; CHECK-NEXT:    vle32.v v16, (a0)
1599; CHECK-NEXT:    addi a0, a0, 128
1600; CHECK-NEXT:    vle32.v v24, (a0)
1601; CHECK-NEXT:    vle32.v v0, (a1)
1602; CHECK-NEXT:    vfmax.vv v8, v24, v8
1603; CHECK-NEXT:    vfmax.vv v16, v16, v0
1604; CHECK-NEXT:    vfmax.vv v8, v16, v8
1605; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1606; CHECK-NEXT:    vfmv.f.s fa0, v8
1607; CHECK-NEXT:    ret
1608  %v = load <128 x float>, ptr %x
1609  %red = call float @llvm.vector.reduce.fmax.v128f32(<128 x float> %v)
1610  ret float %red
1611}
1612
1613declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
1614
1615define double @vreduce_fmax_v2f64(ptr %x) {
1616; CHECK-LABEL: vreduce_fmax_v2f64:
1617; CHECK:       # %bb.0:
1618; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1619; CHECK-NEXT:    vle64.v v8, (a0)
1620; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1621; CHECK-NEXT:    vfmv.f.s fa0, v8
1622; CHECK-NEXT:    ret
1623  %v = load <2 x double>, ptr %x
1624  %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v)
1625  ret double %red
1626}
1627
1628declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
1629
1630define double @vreduce_fmax_v4f64(ptr %x) {
1631; CHECK-LABEL: vreduce_fmax_v4f64:
1632; CHECK:       # %bb.0:
1633; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1634; CHECK-NEXT:    vle64.v v8, (a0)
1635; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1636; CHECK-NEXT:    vfmv.f.s fa0, v8
1637; CHECK-NEXT:    ret
1638  %v = load <4 x double>, ptr %x
1639  %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
1640  ret double %red
1641}
1642
1643define double @vreduce_fmax_v4f64_nonans(ptr %x) {
1644; CHECK-LABEL: vreduce_fmax_v4f64_nonans:
1645; CHECK:       # %bb.0:
1646; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1647; CHECK-NEXT:    vle64.v v8, (a0)
1648; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1649; CHECK-NEXT:    vfmv.f.s fa0, v8
1650; CHECK-NEXT:    ret
1651  %v = load <4 x double>, ptr %x
1652  %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
1653  ret double %red
1654}
1655
1656define double @vreduce_fmax_v4f64_nonans_noinfs(ptr %x) {
1657; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
1658; CHECK:       # %bb.0:
1659; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1660; CHECK-NEXT:    vle64.v v8, (a0)
1661; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1662; CHECK-NEXT:    vfmv.f.s fa0, v8
1663; CHECK-NEXT:    ret
1664  %v = load <4 x double>, ptr %x
1665  %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
1666  ret double %red
1667}
1668
1669declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
1670
1671define double @vreduce_fmax_v32f64(ptr %x) {
1672; CHECK-LABEL: vreduce_fmax_v32f64:
1673; CHECK:       # %bb.0:
1674; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1675; CHECK-NEXT:    vle64.v v8, (a0)
1676; CHECK-NEXT:    addi a0, a0, 128
1677; CHECK-NEXT:    vle64.v v16, (a0)
1678; CHECK-NEXT:    vfmax.vv v8, v8, v16
1679; CHECK-NEXT:    vfredmax.vs v8, v8, v8
1680; CHECK-NEXT:    vfmv.f.s fa0, v8
1681; CHECK-NEXT:    ret
1682  %v = load <32 x double>, ptr %x
1683  %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v)
1684  ret double %red
1685}
1686
1687define float @vreduce_nsz_fadd_v4f32(ptr %x, float %s) {
1688; CHECK-LABEL: vreduce_nsz_fadd_v4f32:
1689; CHECK:       # %bb.0:
1690; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1691; CHECK-NEXT:    vle32.v v8, (a0)
1692; CHECK-NEXT:    vfmv.s.f v9, fa0
1693; CHECK-NEXT:    vfredusum.vs v8, v8, v9
1694; CHECK-NEXT:    vfmv.f.s fa0, v8
1695; CHECK-NEXT:    ret
1696  %v = load <4 x float>, ptr %x
1697  %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v)
1698  ret float %red
1699}
1700
1701declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
1702
1703define float @vreduce_fminimum_v2f32(ptr %x) {
1704; CHECK-LABEL: vreduce_fminimum_v2f32:
1705; CHECK:       # %bb.0:
1706; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1707; CHECK-NEXT:    vle32.v v8, (a0)
1708; CHECK-NEXT:    vmfne.vv v9, v8, v8
1709; CHECK-NEXT:    vcpop.m a0, v9
1710; CHECK-NEXT:    beqz a0, .LBB107_2
1711; CHECK-NEXT:  # %bb.1:
1712; CHECK-NEXT:    lui a0, 523264
1713; CHECK-NEXT:    fmv.w.x fa0, a0
1714; CHECK-NEXT:    ret
1715; CHECK-NEXT:  .LBB107_2:
1716; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1717; CHECK-NEXT:    vfmv.f.s fa0, v8
1718; CHECK-NEXT:    ret
1719  %v = load <2 x float>, ptr %x
1720  %red = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %v)
1721  ret float %red
1722}
1723
1724define float @vreduce_fminimum_v2f32_nonans(ptr %x) {
1725; CHECK-LABEL: vreduce_fminimum_v2f32_nonans:
1726; CHECK:       # %bb.0:
1727; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1728; CHECK-NEXT:    vle32.v v8, (a0)
1729; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1730; CHECK-NEXT:    vfmv.f.s fa0, v8
1731; CHECK-NEXT:    ret
1732  %v = load <2 x float>, ptr %x
1733  %red = call nnan float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %v)
1734  ret float %red
1735}
1736
1737declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
1738
1739define float @vreduce_fminimum_v4f32(ptr %x) {
1740; CHECK-LABEL: vreduce_fminimum_v4f32:
1741; CHECK:       # %bb.0:
1742; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1743; CHECK-NEXT:    vle32.v v8, (a0)
1744; CHECK-NEXT:    vmfne.vv v9, v8, v8
1745; CHECK-NEXT:    vcpop.m a0, v9
1746; CHECK-NEXT:    beqz a0, .LBB109_2
1747; CHECK-NEXT:  # %bb.1:
1748; CHECK-NEXT:    lui a0, 523264
1749; CHECK-NEXT:    fmv.w.x fa0, a0
1750; CHECK-NEXT:    ret
1751; CHECK-NEXT:  .LBB109_2:
1752; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1753; CHECK-NEXT:    vfmv.f.s fa0, v8
1754; CHECK-NEXT:    ret
1755  %v = load <4 x float>, ptr %x
1756  %red = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %v)
1757  ret float %red
1758}
1759
1760define float @vreduce_fminimum_v4f32_nonans(ptr %x) {
1761; CHECK-LABEL: vreduce_fminimum_v4f32_nonans:
1762; CHECK:       # %bb.0:
1763; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1764; CHECK-NEXT:    vle32.v v8, (a0)
1765; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1766; CHECK-NEXT:    vfmv.f.s fa0, v8
1767; CHECK-NEXT:    ret
1768  %v = load <4 x float>, ptr %x
1769  %red = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %v)
1770  ret float %red
1771}
1772
1773declare float @llvm.vector.reduce.fminimum.v7f32(<7 x float>)
1774
1775define float @vreduce_fminimum_v7f32(ptr %x) {
1776; CHECK-LABEL: vreduce_fminimum_v7f32:
1777; CHECK:       # %bb.0:
1778; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
1779; CHECK-NEXT:    vle32.v v8, (a0)
1780; CHECK-NEXT:    vmfne.vv v10, v8, v8
1781; CHECK-NEXT:    vcpop.m a0, v10
1782; CHECK-NEXT:    beqz a0, .LBB111_2
1783; CHECK-NEXT:  # %bb.1:
1784; CHECK-NEXT:    lui a0, 523264
1785; CHECK-NEXT:    fmv.w.x fa0, a0
1786; CHECK-NEXT:    ret
1787; CHECK-NEXT:  .LBB111_2:
1788; CHECK-NEXT:    lui a0, 522240
1789; CHECK-NEXT:    vmv.s.x v10, a0
1790; CHECK-NEXT:    vfredmin.vs v8, v8, v10
1791; CHECK-NEXT:    vfmv.f.s fa0, v8
1792; CHECK-NEXT:    ret
1793  %v = load <7 x float>, ptr %x
1794  %red = call float @llvm.vector.reduce.fminimum.v7f32(<7 x float> %v)
1795  ret float %red
1796}
1797
1798define float @vreduce_fminimum_v7f32_nonans(ptr %x) {
1799; CHECK-LABEL: vreduce_fminimum_v7f32_nonans:
1800; CHECK:       # %bb.0:
1801; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
1802; CHECK-NEXT:    vle32.v v8, (a0)
1803; CHECK-NEXT:    lui a0, 522240
1804; CHECK-NEXT:    vmv.s.x v10, a0
1805; CHECK-NEXT:    vfredmin.vs v8, v8, v10
1806; CHECK-NEXT:    vfmv.f.s fa0, v8
1807; CHECK-NEXT:    ret
1808  %v = load <7 x float>, ptr %x
1809  %red = call nnan float @llvm.vector.reduce.fminimum.v7f32(<7 x float> %v)
1810  ret float %red
1811}
1812
1813declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
1814
1815define float @vreduce_fminimum_v8f32(ptr %x) {
1816; CHECK-LABEL: vreduce_fminimum_v8f32:
1817; CHECK:       # %bb.0:
1818; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1819; CHECK-NEXT:    vle32.v v8, (a0)
1820; CHECK-NEXT:    vmfne.vv v10, v8, v8
1821; CHECK-NEXT:    vcpop.m a0, v10
1822; CHECK-NEXT:    beqz a0, .LBB113_2
1823; CHECK-NEXT:  # %bb.1:
1824; CHECK-NEXT:    lui a0, 523264
1825; CHECK-NEXT:    fmv.w.x fa0, a0
1826; CHECK-NEXT:    ret
1827; CHECK-NEXT:  .LBB113_2:
1828; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1829; CHECK-NEXT:    vfmv.f.s fa0, v8
1830; CHECK-NEXT:    ret
1831  %v = load <8 x float>, ptr %x
1832  %red = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %v)
1833  ret float %red
1834}
1835
1836define float @vreduce_fminimum_v8f32_nonans(ptr %x) {
1837; CHECK-LABEL: vreduce_fminimum_v8f32_nonans:
1838; CHECK:       # %bb.0:
1839; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1840; CHECK-NEXT:    vle32.v v8, (a0)
1841; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1842; CHECK-NEXT:    vfmv.f.s fa0, v8
1843; CHECK-NEXT:    ret
1844  %v = load <8 x float>, ptr %x
1845  %red = call nnan float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %v)
1846  ret float %red
1847}
1848
1849declare float @llvm.vector.reduce.fminimum.v16f32(<16 x float>)
1850
1851define float @vreduce_fminimum_v16f32(ptr %x) {
1852; CHECK-LABEL: vreduce_fminimum_v16f32:
1853; CHECK:       # %bb.0:
1854; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1855; CHECK-NEXT:    vle32.v v8, (a0)
1856; CHECK-NEXT:    vmfne.vv v12, v8, v8
1857; CHECK-NEXT:    vcpop.m a0, v12
1858; CHECK-NEXT:    beqz a0, .LBB115_2
1859; CHECK-NEXT:  # %bb.1:
1860; CHECK-NEXT:    lui a0, 523264
1861; CHECK-NEXT:    fmv.w.x fa0, a0
1862; CHECK-NEXT:    ret
1863; CHECK-NEXT:  .LBB115_2:
1864; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1865; CHECK-NEXT:    vfmv.f.s fa0, v8
1866; CHECK-NEXT:    ret
1867  %v = load <16 x float>, ptr %x
1868  %red = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %v)
1869  ret float %red
1870}
1871
1872define float @vreduce_fminimum_v16f32_nonans(ptr %x) {
1873; CHECK-LABEL: vreduce_fminimum_v16f32_nonans:
1874; CHECK:       # %bb.0:
1875; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1876; CHECK-NEXT:    vle32.v v8, (a0)
1877; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1878; CHECK-NEXT:    vfmv.f.s fa0, v8
1879; CHECK-NEXT:    ret
1880  %v = load <16 x float>, ptr %x
1881  %red = call nnan float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %v)
1882  ret float %red
1883}
1884
1885declare float @llvm.vector.reduce.fminimum.v32f32(<32 x float>)
1886
1887define float @vreduce_fminimum_v32f32(ptr %x) {
1888; CHECK-LABEL: vreduce_fminimum_v32f32:
1889; CHECK:       # %bb.0:
1890; CHECK-NEXT:    li a1, 32
1891; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1892; CHECK-NEXT:    vle32.v v8, (a0)
1893; CHECK-NEXT:    vmfne.vv v16, v8, v8
1894; CHECK-NEXT:    vcpop.m a0, v16
1895; CHECK-NEXT:    beqz a0, .LBB117_2
1896; CHECK-NEXT:  # %bb.1:
1897; CHECK-NEXT:    lui a0, 523264
1898; CHECK-NEXT:    fmv.w.x fa0, a0
1899; CHECK-NEXT:    ret
1900; CHECK-NEXT:  .LBB117_2:
1901; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1902; CHECK-NEXT:    vfmv.f.s fa0, v8
1903; CHECK-NEXT:    ret
1904  %v = load <32 x float>, ptr %x
1905  %red = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %v)
1906  ret float %red
1907}
1908
1909define float @vreduce_fminimum_v32f32_nonans(ptr %x) {
1910; CHECK-LABEL: vreduce_fminimum_v32f32_nonans:
1911; CHECK:       # %bb.0:
1912; CHECK-NEXT:    li a1, 32
1913; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1914; CHECK-NEXT:    vle32.v v8, (a0)
1915; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1916; CHECK-NEXT:    vfmv.f.s fa0, v8
1917; CHECK-NEXT:    ret
1918  %v = load <32 x float>, ptr %x
1919  %red = call nnan float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %v)
1920  ret float %red
1921}
1922
1923declare float @llvm.vector.reduce.fminimum.v64f32(<64 x float>)
1924
1925define float @vreduce_fminimum_v64f32(ptr %x) {
1926; CHECK-LABEL: vreduce_fminimum_v64f32:
1927; CHECK:       # %bb.0:
1928; CHECK-NEXT:    addi sp, sp, -16
1929; CHECK-NEXT:    .cfi_def_cfa_offset 16
1930; CHECK-NEXT:    csrr a1, vlenb
1931; CHECK-NEXT:    slli a1, a1, 3
1932; CHECK-NEXT:    sub sp, sp, a1
1933; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1934; CHECK-NEXT:    addi a1, a0, 128
1935; CHECK-NEXT:    li a2, 32
1936; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
1937; CHECK-NEXT:    vle32.v v16, (a0)
1938; CHECK-NEXT:    vle32.v v24, (a1)
1939; CHECK-NEXT:    vmfeq.vv v0, v16, v16
1940; CHECK-NEXT:    vmfeq.vv v7, v24, v24
1941; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
1942; CHECK-NEXT:    addi a0, sp, 16
1943; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
1944; CHECK-NEXT:    vmv1r.v v0, v7
1945; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
1946; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1947; CHECK-NEXT:    vfmin.vv v8, v8, v16
1948; CHECK-NEXT:    vmfne.vv v16, v8, v8
1949; CHECK-NEXT:    vcpop.m a0, v16
1950; CHECK-NEXT:    beqz a0, .LBB119_2
1951; CHECK-NEXT:  # %bb.1:
1952; CHECK-NEXT:    lui a0, 523264
1953; CHECK-NEXT:    fmv.w.x fa0, a0
1954; CHECK-NEXT:    j .LBB119_3
1955; CHECK-NEXT:  .LBB119_2:
1956; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1957; CHECK-NEXT:    vfmv.f.s fa0, v8
1958; CHECK-NEXT:  .LBB119_3:
1959; CHECK-NEXT:    csrr a0, vlenb
1960; CHECK-NEXT:    slli a0, a0, 3
1961; CHECK-NEXT:    add sp, sp, a0
1962; CHECK-NEXT:    .cfi_def_cfa sp, 16
1963; CHECK-NEXT:    addi sp, sp, 16
1964; CHECK-NEXT:    .cfi_def_cfa_offset 0
1965; CHECK-NEXT:    ret
1966  %v = load <64 x float>, ptr %x
1967  %red = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %v)
1968  ret float %red
1969}
1970
1971define float @vreduce_fminimum_v64f32_nonans(ptr %x) {
1972; CHECK-LABEL: vreduce_fminimum_v64f32_nonans:
1973; CHECK:       # %bb.0:
1974; CHECK-NEXT:    li a1, 32
1975; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1976; CHECK-NEXT:    vle32.v v8, (a0)
1977; CHECK-NEXT:    addi a0, a0, 128
1978; CHECK-NEXT:    vle32.v v16, (a0)
1979; CHECK-NEXT:    vfmin.vv v8, v8, v16
1980; CHECK-NEXT:    vfredmin.vs v8, v8, v8
1981; CHECK-NEXT:    vfmv.f.s fa0, v8
1982; CHECK-NEXT:    ret
1983  %v = load <64 x float>, ptr %x
1984  %red = call nnan float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %v)
1985  ret float %red
1986}
1987
1988declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
1989
1990define float @vreduce_fminimum_v128f32(ptr %x) {
1991; CHECK-LABEL: vreduce_fminimum_v128f32:
1992; CHECK:       # %bb.0:
1993; CHECK-NEXT:    addi sp, sp, -16
1994; CHECK-NEXT:    .cfi_def_cfa_offset 16
1995; CHECK-NEXT:    csrr a1, vlenb
1996; CHECK-NEXT:    slli a1, a1, 4
1997; CHECK-NEXT:    sub sp, sp, a1
1998; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1999; CHECK-NEXT:    li a1, 32
2000; CHECK-NEXT:    addi a2, a0, 128
2001; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2002; CHECK-NEXT:    vle32.v v8, (a2)
2003; CHECK-NEXT:    addi a1, a0, 384
2004; CHECK-NEXT:    vle32.v v16, (a1)
2005; CHECK-NEXT:    addi a1, a0, 256
2006; CHECK-NEXT:    vle32.v v24, (a0)
2007; CHECK-NEXT:    addi a0, sp, 16
2008; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2009; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2010; CHECK-NEXT:    vmfeq.vv v7, v16, v16
2011; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
2012; CHECK-NEXT:    csrr a0, vlenb
2013; CHECK-NEXT:    slli a0, a0, 3
2014; CHECK-NEXT:    add a0, sp, a0
2015; CHECK-NEXT:    addi a0, a0, 16
2016; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2017; CHECK-NEXT:    vle32.v v24, (a1)
2018; CHECK-NEXT:    vmv1r.v v0, v7
2019; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
2020; CHECK-NEXT:    csrr a0, vlenb
2021; CHECK-NEXT:    slli a0, a0, 3
2022; CHECK-NEXT:    add a0, sp, a0
2023; CHECK-NEXT:    addi a0, a0, 16
2024; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2025; CHECK-NEXT:    vfmin.vv v8, v16, v8
2026; CHECK-NEXT:    csrr a0, vlenb
2027; CHECK-NEXT:    slli a0, a0, 3
2028; CHECK-NEXT:    add a0, sp, a0
2029; CHECK-NEXT:    addi a0, a0, 16
2030; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2031; CHECK-NEXT:    addi a0, sp, 16
2032; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2033; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2034; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2035; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
2036; CHECK-NEXT:    vmv1r.v v0, v7
2037; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
2038; CHECK-NEXT:    vfmin.vv v16, v8, v16
2039; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2040; CHECK-NEXT:    csrr a0, vlenb
2041; CHECK-NEXT:    slli a0, a0, 3
2042; CHECK-NEXT:    add a0, sp, a0
2043; CHECK-NEXT:    addi a0, a0, 16
2044; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2045; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2046; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2047; CHECK-NEXT:    addi a0, sp, 16
2048; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2049; CHECK-NEXT:    vmv1r.v v0, v7
2050; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2051; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2052; CHECK-NEXT:    vfmin.vv v8, v8, v16
2053; CHECK-NEXT:    vmfne.vv v16, v8, v8
2054; CHECK-NEXT:    vcpop.m a0, v16
2055; CHECK-NEXT:    beqz a0, .LBB121_2
2056; CHECK-NEXT:  # %bb.1:
2057; CHECK-NEXT:    lui a0, 523264
2058; CHECK-NEXT:    fmv.w.x fa0, a0
2059; CHECK-NEXT:    j .LBB121_3
2060; CHECK-NEXT:  .LBB121_2:
2061; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2062; CHECK-NEXT:    vfmv.f.s fa0, v8
2063; CHECK-NEXT:  .LBB121_3:
2064; CHECK-NEXT:    csrr a0, vlenb
2065; CHECK-NEXT:    slli a0, a0, 4
2066; CHECK-NEXT:    add sp, sp, a0
2067; CHECK-NEXT:    .cfi_def_cfa sp, 16
2068; CHECK-NEXT:    addi sp, sp, 16
2069; CHECK-NEXT:    .cfi_def_cfa_offset 0
2070; CHECK-NEXT:    ret
2071  %v = load <128 x float>, ptr %x
2072  %red = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> %v)
2073  ret float %red
2074}
2075
2076define float @vreduce_fminimum_v128f32_nonans(ptr %x) {
2077; CHECK-LABEL: vreduce_fminimum_v128f32_nonans:
2078; CHECK:       # %bb.0:
2079; CHECK-NEXT:    li a1, 32
2080; CHECK-NEXT:    addi a2, a0, 384
2081; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2082; CHECK-NEXT:    vle32.v v8, (a2)
2083; CHECK-NEXT:    addi a1, a0, 256
2084; CHECK-NEXT:    vle32.v v16, (a0)
2085; CHECK-NEXT:    addi a0, a0, 128
2086; CHECK-NEXT:    vle32.v v24, (a0)
2087; CHECK-NEXT:    vle32.v v0, (a1)
2088; CHECK-NEXT:    vfmin.vv v8, v24, v8
2089; CHECK-NEXT:    vfmin.vv v16, v16, v0
2090; CHECK-NEXT:    vfmin.vv v8, v16, v8
2091; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2092; CHECK-NEXT:    vfmv.f.s fa0, v8
2093; CHECK-NEXT:    ret
2094  %v = load <128 x float>, ptr %x
2095  %red = call nnan float @llvm.vector.reduce.fminimum.v128f32(<128 x float> %v)
2096  ret float %red
2097}
2098
2099declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
2100
2101define double @vreduce_fminimum_v2f64(ptr %x) {
2102; CHECK-LABEL: vreduce_fminimum_v2f64:
2103; CHECK:       # %bb.0:
2104; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2105; CHECK-NEXT:    vle64.v v8, (a0)
2106; CHECK-NEXT:    vmfne.vv v9, v8, v8
2107; CHECK-NEXT:    vcpop.m a0, v9
2108; CHECK-NEXT:    beqz a0, .LBB123_2
2109; CHECK-NEXT:  # %bb.1:
2110; CHECK-NEXT:    lui a0, %hi(.LCPI123_0)
2111; CHECK-NEXT:    fld fa0, %lo(.LCPI123_0)(a0)
2112; CHECK-NEXT:    ret
2113; CHECK-NEXT:  .LBB123_2:
2114; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2115; CHECK-NEXT:    vfmv.f.s fa0, v8
2116; CHECK-NEXT:    ret
2117  %v = load <2 x double>, ptr %x
2118  %red = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %v)
2119  ret double %red
2120}
2121
2122define double @vreduce_fminimum_v2f64_nonans(ptr %x) {
2123; CHECK-LABEL: vreduce_fminimum_v2f64_nonans:
2124; CHECK:       # %bb.0:
2125; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2126; CHECK-NEXT:    vle64.v v8, (a0)
2127; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2128; CHECK-NEXT:    vfmv.f.s fa0, v8
2129; CHECK-NEXT:    ret
2130  %v = load <2 x double>, ptr %x
2131  %red = call nnan double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %v)
2132  ret double %red
2133}
2134
2135declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
2136
2137define double @vreduce_fminimum_v4f64(ptr %x) {
2138; CHECK-LABEL: vreduce_fminimum_v4f64:
2139; CHECK:       # %bb.0:
2140; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2141; CHECK-NEXT:    vle64.v v8, (a0)
2142; CHECK-NEXT:    vmfne.vv v10, v8, v8
2143; CHECK-NEXT:    vcpop.m a0, v10
2144; CHECK-NEXT:    beqz a0, .LBB125_2
2145; CHECK-NEXT:  # %bb.1:
2146; CHECK-NEXT:    lui a0, %hi(.LCPI125_0)
2147; CHECK-NEXT:    fld fa0, %lo(.LCPI125_0)(a0)
2148; CHECK-NEXT:    ret
2149; CHECK-NEXT:  .LBB125_2:
2150; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2151; CHECK-NEXT:    vfmv.f.s fa0, v8
2152; CHECK-NEXT:    ret
2153  %v = load <4 x double>, ptr %x
2154  %red = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %v)
2155  ret double %red
2156}
2157
2158define double @vreduce_fminimum_v4f64_nonans(ptr %x) {
2159; CHECK-LABEL: vreduce_fminimum_v4f64_nonans:
2160; CHECK:       # %bb.0:
2161; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2162; CHECK-NEXT:    vle64.v v8, (a0)
2163; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2164; CHECK-NEXT:    vfmv.f.s fa0, v8
2165; CHECK-NEXT:    ret
2166  %v = load <4 x double>, ptr %x
2167  %red = call nnan double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %v)
2168  ret double %red
2169}
2170
2171declare double @llvm.vector.reduce.fminimum.v8f64(<8 x double>)
2172
2173define double @vreduce_fminimum_v8f64(ptr %x) {
2174; CHECK-LABEL: vreduce_fminimum_v8f64:
2175; CHECK:       # %bb.0:
2176; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2177; CHECK-NEXT:    vle64.v v8, (a0)
2178; CHECK-NEXT:    vmfne.vv v12, v8, v8
2179; CHECK-NEXT:    vcpop.m a0, v12
2180; CHECK-NEXT:    beqz a0, .LBB127_2
2181; CHECK-NEXT:  # %bb.1:
2182; CHECK-NEXT:    lui a0, %hi(.LCPI127_0)
2183; CHECK-NEXT:    fld fa0, %lo(.LCPI127_0)(a0)
2184; CHECK-NEXT:    ret
2185; CHECK-NEXT:  .LBB127_2:
2186; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2187; CHECK-NEXT:    vfmv.f.s fa0, v8
2188; CHECK-NEXT:    ret
2189  %v = load <8 x double>, ptr %x
2190  %red = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %v)
2191  ret double %red
2192}
2193
2194define double @vreduce_fminimum_v8f64_nonans(ptr %x) {
2195; CHECK-LABEL: vreduce_fminimum_v8f64_nonans:
2196; CHECK:       # %bb.0:
2197; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2198; CHECK-NEXT:    vle64.v v8, (a0)
2199; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2200; CHECK-NEXT:    vfmv.f.s fa0, v8
2201; CHECK-NEXT:    ret
2202  %v = load <8 x double>, ptr %x
2203  %red = call nnan double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %v)
2204  ret double %red
2205}
2206
2207declare double @llvm.vector.reduce.fminimum.v16f64(<16 x double>)
2208
2209define double @vreduce_fminimum_v16f64(ptr %x) {
2210; CHECK-LABEL: vreduce_fminimum_v16f64:
2211; CHECK:       # %bb.0:
2212; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2213; CHECK-NEXT:    vle64.v v8, (a0)
2214; CHECK-NEXT:    vmfne.vv v16, v8, v8
2215; CHECK-NEXT:    vcpop.m a0, v16
2216; CHECK-NEXT:    beqz a0, .LBB129_2
2217; CHECK-NEXT:  # %bb.1:
2218; CHECK-NEXT:    lui a0, %hi(.LCPI129_0)
2219; CHECK-NEXT:    fld fa0, %lo(.LCPI129_0)(a0)
2220; CHECK-NEXT:    ret
2221; CHECK-NEXT:  .LBB129_2:
2222; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2223; CHECK-NEXT:    vfmv.f.s fa0, v8
2224; CHECK-NEXT:    ret
2225  %v = load <16 x double>, ptr %x
2226  %red = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %v)
2227  ret double %red
2228}
2229
2230define double @vreduce_fminimum_v16f64_nonans(ptr %x) {
2231; CHECK-LABEL: vreduce_fminimum_v16f64_nonans:
2232; CHECK:       # %bb.0:
2233; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2234; CHECK-NEXT:    vle64.v v8, (a0)
2235; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2236; CHECK-NEXT:    vfmv.f.s fa0, v8
2237; CHECK-NEXT:    ret
2238  %v = load <16 x double>, ptr %x
2239  %red = call nnan double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %v)
2240  ret double %red
2241}
2242
2243declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>)
2244
2245define double @vreduce_fminimum_v32f64(ptr %x) {
2246; CHECK-LABEL: vreduce_fminimum_v32f64:
2247; CHECK:       # %bb.0:
2248; CHECK-NEXT:    addi sp, sp, -16
2249; CHECK-NEXT:    .cfi_def_cfa_offset 16
2250; CHECK-NEXT:    csrr a1, vlenb
2251; CHECK-NEXT:    slli a1, a1, 3
2252; CHECK-NEXT:    sub sp, sp, a1
2253; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2254; CHECK-NEXT:    addi a1, a0, 128
2255; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2256; CHECK-NEXT:    vle64.v v16, (a0)
2257; CHECK-NEXT:    vle64.v v24, (a1)
2258; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2259; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2260; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2261; CHECK-NEXT:    addi a0, sp, 16
2262; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2263; CHECK-NEXT:    vmv1r.v v0, v7
2264; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2265; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2266; CHECK-NEXT:    vfmin.vv v8, v8, v16
2267; CHECK-NEXT:    vmfne.vv v16, v8, v8
2268; CHECK-NEXT:    vcpop.m a0, v16
2269; CHECK-NEXT:    beqz a0, .LBB131_2
2270; CHECK-NEXT:  # %bb.1:
2271; CHECK-NEXT:    lui a0, %hi(.LCPI131_0)
2272; CHECK-NEXT:    fld fa0, %lo(.LCPI131_0)(a0)
2273; CHECK-NEXT:    j .LBB131_3
2274; CHECK-NEXT:  .LBB131_2:
2275; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2276; CHECK-NEXT:    vfmv.f.s fa0, v8
2277; CHECK-NEXT:  .LBB131_3:
2278; CHECK-NEXT:    csrr a0, vlenb
2279; CHECK-NEXT:    slli a0, a0, 3
2280; CHECK-NEXT:    add sp, sp, a0
2281; CHECK-NEXT:    .cfi_def_cfa sp, 16
2282; CHECK-NEXT:    addi sp, sp, 16
2283; CHECK-NEXT:    .cfi_def_cfa_offset 0
2284; CHECK-NEXT:    ret
2285  %v = load <32 x double>, ptr %x
2286  %red = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v)
2287  ret double %red
2288}
2289
2290define double @vreduce_fminimum_v32f64_nonans(ptr %x) {
2291; CHECK-LABEL: vreduce_fminimum_v32f64_nonans:
2292; CHECK:       # %bb.0:
2293; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2294; CHECK-NEXT:    vle64.v v8, (a0)
2295; CHECK-NEXT:    addi a0, a0, 128
2296; CHECK-NEXT:    vle64.v v16, (a0)
2297; CHECK-NEXT:    vfmin.vv v8, v8, v16
2298; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2299; CHECK-NEXT:    vfmv.f.s fa0, v8
2300; CHECK-NEXT:    ret
2301  %v = load <32 x double>, ptr %x
2302  %red = call nnan double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v)
2303  ret double %red
2304}
2305
2306declare double @llvm.vector.reduce.fminimum.v64f64(<64 x double>)
2307
2308define double @vreduce_fminimum_v64f64(ptr %x) {
2309; CHECK-LABEL: vreduce_fminimum_v64f64:
2310; CHECK:       # %bb.0:
2311; CHECK-NEXT:    addi sp, sp, -16
2312; CHECK-NEXT:    .cfi_def_cfa_offset 16
2313; CHECK-NEXT:    csrr a1, vlenb
2314; CHECK-NEXT:    slli a1, a1, 4
2315; CHECK-NEXT:    sub sp, sp, a1
2316; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2317; CHECK-NEXT:    addi a1, a0, 128
2318; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2319; CHECK-NEXT:    vle64.v v8, (a1)
2320; CHECK-NEXT:    addi a1, a0, 384
2321; CHECK-NEXT:    vle64.v v16, (a1)
2322; CHECK-NEXT:    addi a1, a0, 256
2323; CHECK-NEXT:    vle64.v v24, (a0)
2324; CHECK-NEXT:    addi a0, sp, 16
2325; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2326; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2327; CHECK-NEXT:    vmfeq.vv v7, v16, v16
2328; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
2329; CHECK-NEXT:    csrr a0, vlenb
2330; CHECK-NEXT:    slli a0, a0, 3
2331; CHECK-NEXT:    add a0, sp, a0
2332; CHECK-NEXT:    addi a0, a0, 16
2333; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2334; CHECK-NEXT:    vle64.v v24, (a1)
2335; CHECK-NEXT:    vmv1r.v v0, v7
2336; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
2337; CHECK-NEXT:    csrr a0, vlenb
2338; CHECK-NEXT:    slli a0, a0, 3
2339; CHECK-NEXT:    add a0, sp, a0
2340; CHECK-NEXT:    addi a0, a0, 16
2341; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2342; CHECK-NEXT:    vfmin.vv v8, v16, v8
2343; CHECK-NEXT:    csrr a0, vlenb
2344; CHECK-NEXT:    slli a0, a0, 3
2345; CHECK-NEXT:    add a0, sp, a0
2346; CHECK-NEXT:    addi a0, a0, 16
2347; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2348; CHECK-NEXT:    addi a0, sp, 16
2349; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2350; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2351; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2352; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
2353; CHECK-NEXT:    vmv1r.v v0, v7
2354; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
2355; CHECK-NEXT:    vfmin.vv v16, v8, v16
2356; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2357; CHECK-NEXT:    csrr a0, vlenb
2358; CHECK-NEXT:    slli a0, a0, 3
2359; CHECK-NEXT:    add a0, sp, a0
2360; CHECK-NEXT:    addi a0, a0, 16
2361; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2362; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2363; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2364; CHECK-NEXT:    addi a0, sp, 16
2365; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2366; CHECK-NEXT:    vmv1r.v v0, v7
2367; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2368; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2369; CHECK-NEXT:    vfmin.vv v8, v8, v16
2370; CHECK-NEXT:    vmfne.vv v16, v8, v8
2371; CHECK-NEXT:    vcpop.m a0, v16
2372; CHECK-NEXT:    beqz a0, .LBB133_2
2373; CHECK-NEXT:  # %bb.1:
2374; CHECK-NEXT:    lui a0, %hi(.LCPI133_0)
2375; CHECK-NEXT:    fld fa0, %lo(.LCPI133_0)(a0)
2376; CHECK-NEXT:    j .LBB133_3
2377; CHECK-NEXT:  .LBB133_2:
2378; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2379; CHECK-NEXT:    vfmv.f.s fa0, v8
2380; CHECK-NEXT:  .LBB133_3:
2381; CHECK-NEXT:    csrr a0, vlenb
2382; CHECK-NEXT:    slli a0, a0, 4
2383; CHECK-NEXT:    add sp, sp, a0
2384; CHECK-NEXT:    .cfi_def_cfa sp, 16
2385; CHECK-NEXT:    addi sp, sp, 16
2386; CHECK-NEXT:    .cfi_def_cfa_offset 0
2387; CHECK-NEXT:    ret
2388  %v = load <64 x double>, ptr %x
2389  %red = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> %v)
2390  ret double %red
2391}
2392
2393define double @vreduce_fminimum_v64f64_nonans(ptr %x) {
2394; CHECK-LABEL: vreduce_fminimum_v64f64_nonans:
2395; CHECK:       # %bb.0:
2396; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2397; CHECK-NEXT:    vle64.v v8, (a0)
2398; CHECK-NEXT:    addi a1, a0, 384
2399; CHECK-NEXT:    vle64.v v16, (a1)
2400; CHECK-NEXT:    addi a1, a0, 256
2401; CHECK-NEXT:    addi a0, a0, 128
2402; CHECK-NEXT:    vle64.v v24, (a0)
2403; CHECK-NEXT:    vle64.v v0, (a1)
2404; CHECK-NEXT:    vfmin.vv v16, v24, v16
2405; CHECK-NEXT:    vfmin.vv v8, v8, v0
2406; CHECK-NEXT:    vfmin.vv v8, v8, v16
2407; CHECK-NEXT:    vfredmin.vs v8, v8, v8
2408; CHECK-NEXT:    vfmv.f.s fa0, v8
2409; CHECK-NEXT:    ret
2410  %v = load <64 x double>, ptr %x
2411  %red = call nnan double @llvm.vector.reduce.fminimum.v64f64(<64 x double> %v)
2412  ret double %red
2413}
2414
2415declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
2416
2417define float @vreduce_fmaximum_v2f32(ptr %x) {
2418; CHECK-LABEL: vreduce_fmaximum_v2f32:
2419; CHECK:       # %bb.0:
2420; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
2421; CHECK-NEXT:    vle32.v v8, (a0)
2422; CHECK-NEXT:    vmfne.vv v9, v8, v8
2423; CHECK-NEXT:    vcpop.m a0, v9
2424; CHECK-NEXT:    beqz a0, .LBB135_2
2425; CHECK-NEXT:  # %bb.1:
2426; CHECK-NEXT:    lui a0, 523264
2427; CHECK-NEXT:    fmv.w.x fa0, a0
2428; CHECK-NEXT:    ret
2429; CHECK-NEXT:  .LBB135_2:
2430; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2431; CHECK-NEXT:    vfmv.f.s fa0, v8
2432; CHECK-NEXT:    ret
2433  %v = load <2 x float>, ptr %x
2434  %red = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %v)
2435  ret float %red
2436}
2437
2438define float @vreduce_fmaximum_v2f32_nonans(ptr %x) {
2439; CHECK-LABEL: vreduce_fmaximum_v2f32_nonans:
2440; CHECK:       # %bb.0:
2441; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
2442; CHECK-NEXT:    vle32.v v8, (a0)
2443; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2444; CHECK-NEXT:    vfmv.f.s fa0, v8
2445; CHECK-NEXT:    ret
2446  %v = load <2 x float>, ptr %x
2447  %red = call nnan float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %v)
2448  ret float %red
2449}
2450
2451declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
2452
2453define float @vreduce_fmaximum_v4f32(ptr %x) {
2454; CHECK-LABEL: vreduce_fmaximum_v4f32:
2455; CHECK:       # %bb.0:
2456; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2457; CHECK-NEXT:    vle32.v v8, (a0)
2458; CHECK-NEXT:    vmfne.vv v9, v8, v8
2459; CHECK-NEXT:    vcpop.m a0, v9
2460; CHECK-NEXT:    beqz a0, .LBB137_2
2461; CHECK-NEXT:  # %bb.1:
2462; CHECK-NEXT:    lui a0, 523264
2463; CHECK-NEXT:    fmv.w.x fa0, a0
2464; CHECK-NEXT:    ret
2465; CHECK-NEXT:  .LBB137_2:
2466; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2467; CHECK-NEXT:    vfmv.f.s fa0, v8
2468; CHECK-NEXT:    ret
2469  %v = load <4 x float>, ptr %x
2470  %red = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %v)
2471  ret float %red
2472}
2473
2474define float @vreduce_fmaximum_v4f32_nonans(ptr %x) {
2475; CHECK-LABEL: vreduce_fmaximum_v4f32_nonans:
2476; CHECK:       # %bb.0:
2477; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2478; CHECK-NEXT:    vle32.v v8, (a0)
2479; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2480; CHECK-NEXT:    vfmv.f.s fa0, v8
2481; CHECK-NEXT:    ret
2482  %v = load <4 x float>, ptr %x
2483  %red = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %v)
2484  ret float %red
2485}
2486
2487declare float @llvm.vector.reduce.fmaximum.v7f32(<7 x float>)
2488
2489define float @vreduce_fmaximum_v7f32(ptr %x) {
2490; CHECK-LABEL: vreduce_fmaximum_v7f32:
2491; CHECK:       # %bb.0:
2492; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
2493; CHECK-NEXT:    vle32.v v8, (a0)
2494; CHECK-NEXT:    vmfne.vv v10, v8, v8
2495; CHECK-NEXT:    vcpop.m a0, v10
2496; CHECK-NEXT:    beqz a0, .LBB139_2
2497; CHECK-NEXT:  # %bb.1:
2498; CHECK-NEXT:    lui a0, 523264
2499; CHECK-NEXT:    fmv.w.x fa0, a0
2500; CHECK-NEXT:    ret
2501; CHECK-NEXT:  .LBB139_2:
2502; CHECK-NEXT:    lui a0, 1046528
2503; CHECK-NEXT:    vmv.s.x v10, a0
2504; CHECK-NEXT:    vfredmax.vs v8, v8, v10
2505; CHECK-NEXT:    vfmv.f.s fa0, v8
2506; CHECK-NEXT:    ret
2507  %v = load <7 x float>, ptr %x
2508  %red = call float @llvm.vector.reduce.fmaximum.v7f32(<7 x float> %v)
2509  ret float %red
2510}
2511
2512define float @vreduce_fmaximum_v7f32_nonans(ptr %x) {
2513; CHECK-LABEL: vreduce_fmaximum_v7f32_nonans:
2514; CHECK:       # %bb.0:
2515; CHECK-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
2516; CHECK-NEXT:    vle32.v v8, (a0)
2517; CHECK-NEXT:    lui a0, 1046528
2518; CHECK-NEXT:    vmv.s.x v10, a0
2519; CHECK-NEXT:    vfredmax.vs v8, v8, v10
2520; CHECK-NEXT:    vfmv.f.s fa0, v8
2521; CHECK-NEXT:    ret
2522  %v = load <7 x float>, ptr %x
2523  %red = call nnan float @llvm.vector.reduce.fmaximum.v7f32(<7 x float> %v)
2524  ret float %red
2525}
2526
2527declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
2528
2529define float @vreduce_fmaximum_v8f32(ptr %x) {
2530; CHECK-LABEL: vreduce_fmaximum_v8f32:
2531; CHECK:       # %bb.0:
2532; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2533; CHECK-NEXT:    vle32.v v8, (a0)
2534; CHECK-NEXT:    vmfne.vv v10, v8, v8
2535; CHECK-NEXT:    vcpop.m a0, v10
2536; CHECK-NEXT:    beqz a0, .LBB141_2
2537; CHECK-NEXT:  # %bb.1:
2538; CHECK-NEXT:    lui a0, 523264
2539; CHECK-NEXT:    fmv.w.x fa0, a0
2540; CHECK-NEXT:    ret
2541; CHECK-NEXT:  .LBB141_2:
2542; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2543; CHECK-NEXT:    vfmv.f.s fa0, v8
2544; CHECK-NEXT:    ret
2545  %v = load <8 x float>, ptr %x
2546  %red = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %v)
2547  ret float %red
2548}
2549
2550define float @vreduce_fmaximum_v8f32_nonans(ptr %x) {
2551; CHECK-LABEL: vreduce_fmaximum_v8f32_nonans:
2552; CHECK:       # %bb.0:
2553; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2554; CHECK-NEXT:    vle32.v v8, (a0)
2555; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2556; CHECK-NEXT:    vfmv.f.s fa0, v8
2557; CHECK-NEXT:    ret
2558  %v = load <8 x float>, ptr %x
2559  %red = call nnan float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %v)
2560  ret float %red
2561}
2562
2563declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float>)
2564
2565define float @vreduce_fmaximum_v16f32(ptr %x) {
2566; CHECK-LABEL: vreduce_fmaximum_v16f32:
2567; CHECK:       # %bb.0:
2568; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
2569; CHECK-NEXT:    vle32.v v8, (a0)
2570; CHECK-NEXT:    vmfne.vv v12, v8, v8
2571; CHECK-NEXT:    vcpop.m a0, v12
2572; CHECK-NEXT:    beqz a0, .LBB143_2
2573; CHECK-NEXT:  # %bb.1:
2574; CHECK-NEXT:    lui a0, 523264
2575; CHECK-NEXT:    fmv.w.x fa0, a0
2576; CHECK-NEXT:    ret
2577; CHECK-NEXT:  .LBB143_2:
2578; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2579; CHECK-NEXT:    vfmv.f.s fa0, v8
2580; CHECK-NEXT:    ret
2581  %v = load <16 x float>, ptr %x
2582  %red = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v)
2583  ret float %red
2584}
2585
2586define float @vreduce_fmaximum_v16f32_nonans(ptr %x) {
2587; CHECK-LABEL: vreduce_fmaximum_v16f32_nonans:
2588; CHECK:       # %bb.0:
2589; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
2590; CHECK-NEXT:    vle32.v v8, (a0)
2591; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2592; CHECK-NEXT:    vfmv.f.s fa0, v8
2593; CHECK-NEXT:    ret
2594  %v = load <16 x float>, ptr %x
2595  %red = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v)
2596  ret float %red
2597}
2598
2599declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>)
2600
2601define float @vreduce_fmaximum_v32f32(ptr %x) {
2602; CHECK-LABEL: vreduce_fmaximum_v32f32:
2603; CHECK:       # %bb.0:
2604; CHECK-NEXT:    li a1, 32
2605; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2606; CHECK-NEXT:    vle32.v v8, (a0)
2607; CHECK-NEXT:    vmfne.vv v16, v8, v8
2608; CHECK-NEXT:    vcpop.m a0, v16
2609; CHECK-NEXT:    beqz a0, .LBB145_2
2610; CHECK-NEXT:  # %bb.1:
2611; CHECK-NEXT:    lui a0, 523264
2612; CHECK-NEXT:    fmv.w.x fa0, a0
2613; CHECK-NEXT:    ret
2614; CHECK-NEXT:  .LBB145_2:
2615; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2616; CHECK-NEXT:    vfmv.f.s fa0, v8
2617; CHECK-NEXT:    ret
2618  %v = load <32 x float>, ptr %x
2619  %red = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v)
2620  ret float %red
2621}
2622
2623define float @vreduce_fmaximum_v32f32_nonans(ptr %x) {
2624; CHECK-LABEL: vreduce_fmaximum_v32f32_nonans:
2625; CHECK:       # %bb.0:
2626; CHECK-NEXT:    li a1, 32
2627; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2628; CHECK-NEXT:    vle32.v v8, (a0)
2629; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2630; CHECK-NEXT:    vfmv.f.s fa0, v8
2631; CHECK-NEXT:    ret
2632  %v = load <32 x float>, ptr %x
2633  %red = call nnan float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v)
2634  ret float %red
2635}
2636
2637declare float @llvm.vector.reduce.fmaximum.v64f32(<64 x float>)
2638
2639define float @vreduce_fmaximum_v64f32(ptr %x) {
2640; CHECK-LABEL: vreduce_fmaximum_v64f32:
2641; CHECK:       # %bb.0:
2642; CHECK-NEXT:    addi sp, sp, -16
2643; CHECK-NEXT:    .cfi_def_cfa_offset 16
2644; CHECK-NEXT:    csrr a1, vlenb
2645; CHECK-NEXT:    slli a1, a1, 3
2646; CHECK-NEXT:    sub sp, sp, a1
2647; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2648; CHECK-NEXT:    addi a1, a0, 128
2649; CHECK-NEXT:    li a2, 32
2650; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2651; CHECK-NEXT:    vle32.v v16, (a0)
2652; CHECK-NEXT:    vle32.v v24, (a1)
2653; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2654; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2655; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2656; CHECK-NEXT:    addi a0, sp, 16
2657; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2658; CHECK-NEXT:    vmv1r.v v0, v7
2659; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2660; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2661; CHECK-NEXT:    vfmax.vv v8, v8, v16
2662; CHECK-NEXT:    vmfne.vv v16, v8, v8
2663; CHECK-NEXT:    vcpop.m a0, v16
2664; CHECK-NEXT:    beqz a0, .LBB147_2
2665; CHECK-NEXT:  # %bb.1:
2666; CHECK-NEXT:    lui a0, 523264
2667; CHECK-NEXT:    fmv.w.x fa0, a0
2668; CHECK-NEXT:    j .LBB147_3
2669; CHECK-NEXT:  .LBB147_2:
2670; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2671; CHECK-NEXT:    vfmv.f.s fa0, v8
2672; CHECK-NEXT:  .LBB147_3:
2673; CHECK-NEXT:    csrr a0, vlenb
2674; CHECK-NEXT:    slli a0, a0, 3
2675; CHECK-NEXT:    add sp, sp, a0
2676; CHECK-NEXT:    .cfi_def_cfa sp, 16
2677; CHECK-NEXT:    addi sp, sp, 16
2678; CHECK-NEXT:    .cfi_def_cfa_offset 0
2679; CHECK-NEXT:    ret
2680  %v = load <64 x float>, ptr %x
2681  %red = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %v)
2682  ret float %red
2683}
2684
2685define float @vreduce_fmaximum_v64f32_nonans(ptr %x) {
2686; CHECK-LABEL: vreduce_fmaximum_v64f32_nonans:
2687; CHECK:       # %bb.0:
2688; CHECK-NEXT:    li a1, 32
2689; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2690; CHECK-NEXT:    vle32.v v8, (a0)
2691; CHECK-NEXT:    addi a0, a0, 128
2692; CHECK-NEXT:    vle32.v v16, (a0)
2693; CHECK-NEXT:    vfmax.vv v8, v8, v16
2694; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2695; CHECK-NEXT:    vfmv.f.s fa0, v8
2696; CHECK-NEXT:    ret
2697  %v = load <64 x float>, ptr %x
2698  %red = call nnan float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %v)
2699  ret float %red
2700}
2701
2702declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
2703
2704define float @vreduce_fmaximum_v128f32(ptr %x) {
2705; CHECK-LABEL: vreduce_fmaximum_v128f32:
2706; CHECK:       # %bb.0:
2707; CHECK-NEXT:    addi sp, sp, -16
2708; CHECK-NEXT:    .cfi_def_cfa_offset 16
2709; CHECK-NEXT:    csrr a1, vlenb
2710; CHECK-NEXT:    slli a1, a1, 4
2711; CHECK-NEXT:    sub sp, sp, a1
2712; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2713; CHECK-NEXT:    li a1, 32
2714; CHECK-NEXT:    addi a2, a0, 128
2715; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2716; CHECK-NEXT:    vle32.v v8, (a2)
2717; CHECK-NEXT:    addi a1, a0, 384
2718; CHECK-NEXT:    vle32.v v16, (a1)
2719; CHECK-NEXT:    addi a1, a0, 256
2720; CHECK-NEXT:    vle32.v v24, (a0)
2721; CHECK-NEXT:    addi a0, sp, 16
2722; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2723; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2724; CHECK-NEXT:    vmfeq.vv v7, v16, v16
2725; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
2726; CHECK-NEXT:    csrr a0, vlenb
2727; CHECK-NEXT:    slli a0, a0, 3
2728; CHECK-NEXT:    add a0, sp, a0
2729; CHECK-NEXT:    addi a0, a0, 16
2730; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
2731; CHECK-NEXT:    vle32.v v24, (a1)
2732; CHECK-NEXT:    vmv1r.v v0, v7
2733; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
2734; CHECK-NEXT:    csrr a0, vlenb
2735; CHECK-NEXT:    slli a0, a0, 3
2736; CHECK-NEXT:    add a0, sp, a0
2737; CHECK-NEXT:    addi a0, a0, 16
2738; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2739; CHECK-NEXT:    vfmax.vv v8, v16, v8
2740; CHECK-NEXT:    csrr a0, vlenb
2741; CHECK-NEXT:    slli a0, a0, 3
2742; CHECK-NEXT:    add a0, sp, a0
2743; CHECK-NEXT:    addi a0, a0, 16
2744; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2745; CHECK-NEXT:    addi a0, sp, 16
2746; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2747; CHECK-NEXT:    vmfeq.vv v0, v8, v8
2748; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2749; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
2750; CHECK-NEXT:    vmv1r.v v0, v7
2751; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
2752; CHECK-NEXT:    vfmax.vv v16, v8, v16
2753; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2754; CHECK-NEXT:    csrr a0, vlenb
2755; CHECK-NEXT:    slli a0, a0, 3
2756; CHECK-NEXT:    add a0, sp, a0
2757; CHECK-NEXT:    addi a0, a0, 16
2758; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2759; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2760; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2761; CHECK-NEXT:    addi a0, sp, 16
2762; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2763; CHECK-NEXT:    vmv1r.v v0, v7
2764; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2765; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2766; CHECK-NEXT:    vfmax.vv v8, v8, v16
2767; CHECK-NEXT:    vmfne.vv v16, v8, v8
2768; CHECK-NEXT:    vcpop.m a0, v16
2769; CHECK-NEXT:    beqz a0, .LBB149_2
2770; CHECK-NEXT:  # %bb.1:
2771; CHECK-NEXT:    lui a0, 523264
2772; CHECK-NEXT:    fmv.w.x fa0, a0
2773; CHECK-NEXT:    j .LBB149_3
2774; CHECK-NEXT:  .LBB149_2:
2775; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2776; CHECK-NEXT:    vfmv.f.s fa0, v8
2777; CHECK-NEXT:  .LBB149_3:
2778; CHECK-NEXT:    csrr a0, vlenb
2779; CHECK-NEXT:    slli a0, a0, 4
2780; CHECK-NEXT:    add sp, sp, a0
2781; CHECK-NEXT:    .cfi_def_cfa sp, 16
2782; CHECK-NEXT:    addi sp, sp, 16
2783; CHECK-NEXT:    .cfi_def_cfa_offset 0
2784; CHECK-NEXT:    ret
2785  %v = load <128 x float>, ptr %x
2786  %red = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> %v)
2787  ret float %red
2788}
2789
2790define float @vreduce_fmaximum_v128f32_nonans(ptr %x) {
2791; CHECK-LABEL: vreduce_fmaximum_v128f32_nonans:
2792; CHECK:       # %bb.0:
2793; CHECK-NEXT:    li a1, 32
2794; CHECK-NEXT:    addi a2, a0, 384
2795; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
2796; CHECK-NEXT:    vle32.v v8, (a2)
2797; CHECK-NEXT:    addi a1, a0, 256
2798; CHECK-NEXT:    vle32.v v16, (a0)
2799; CHECK-NEXT:    addi a0, a0, 128
2800; CHECK-NEXT:    vle32.v v24, (a0)
2801; CHECK-NEXT:    vle32.v v0, (a1)
2802; CHECK-NEXT:    vfmax.vv v8, v24, v8
2803; CHECK-NEXT:    vfmax.vv v16, v16, v0
2804; CHECK-NEXT:    vfmax.vv v8, v16, v8
2805; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2806; CHECK-NEXT:    vfmv.f.s fa0, v8
2807; CHECK-NEXT:    ret
2808  %v = load <128 x float>, ptr %x
2809  %red = call nnan float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> %v)
2810  ret float %red
2811}
2812
2813declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
2814
2815define double @vreduce_fmaximum_v2f64(ptr %x) {
2816; CHECK-LABEL: vreduce_fmaximum_v2f64:
2817; CHECK:       # %bb.0:
2818; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2819; CHECK-NEXT:    vle64.v v8, (a0)
2820; CHECK-NEXT:    vmfne.vv v9, v8, v8
2821; CHECK-NEXT:    vcpop.m a0, v9
2822; CHECK-NEXT:    beqz a0, .LBB151_2
2823; CHECK-NEXT:  # %bb.1:
2824; CHECK-NEXT:    lui a0, %hi(.LCPI151_0)
2825; CHECK-NEXT:    fld fa0, %lo(.LCPI151_0)(a0)
2826; CHECK-NEXT:    ret
2827; CHECK-NEXT:  .LBB151_2:
2828; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2829; CHECK-NEXT:    vfmv.f.s fa0, v8
2830; CHECK-NEXT:    ret
2831  %v = load <2 x double>, ptr %x
2832  %red = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %v)
2833  ret double %red
2834}
2835
2836define double @vreduce_fmaximum_v2f64_nonans(ptr %x) {
2837; CHECK-LABEL: vreduce_fmaximum_v2f64_nonans:
2838; CHECK:       # %bb.0:
2839; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2840; CHECK-NEXT:    vle64.v v8, (a0)
2841; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2842; CHECK-NEXT:    vfmv.f.s fa0, v8
2843; CHECK-NEXT:    ret
2844  %v = load <2 x double>, ptr %x
2845  %red = call nnan double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %v)
2846  ret double %red
2847}
2848
2849declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
2850
2851define double @vreduce_fmaximum_v4f64(ptr %x) {
2852; CHECK-LABEL: vreduce_fmaximum_v4f64:
2853; CHECK:       # %bb.0:
2854; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2855; CHECK-NEXT:    vle64.v v8, (a0)
2856; CHECK-NEXT:    vmfne.vv v10, v8, v8
2857; CHECK-NEXT:    vcpop.m a0, v10
2858; CHECK-NEXT:    beqz a0, .LBB153_2
2859; CHECK-NEXT:  # %bb.1:
2860; CHECK-NEXT:    lui a0, %hi(.LCPI153_0)
2861; CHECK-NEXT:    fld fa0, %lo(.LCPI153_0)(a0)
2862; CHECK-NEXT:    ret
2863; CHECK-NEXT:  .LBB153_2:
2864; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2865; CHECK-NEXT:    vfmv.f.s fa0, v8
2866; CHECK-NEXT:    ret
2867  %v = load <4 x double>, ptr %x
2868  %red = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %v)
2869  ret double %red
2870}
2871
2872define double @vreduce_fmaximum_v4f64_nonans(ptr %x) {
2873; CHECK-LABEL: vreduce_fmaximum_v4f64_nonans:
2874; CHECK:       # %bb.0:
2875; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
2876; CHECK-NEXT:    vle64.v v8, (a0)
2877; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2878; CHECK-NEXT:    vfmv.f.s fa0, v8
2879; CHECK-NEXT:    ret
2880  %v = load <4 x double>, ptr %x
2881  %red = call nnan double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %v)
2882  ret double %red
2883}
2884
2885declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>)
2886
2887define double @vreduce_fmaximum_v8f64(ptr %x) {
2888; CHECK-LABEL: vreduce_fmaximum_v8f64:
2889; CHECK:       # %bb.0:
2890; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2891; CHECK-NEXT:    vle64.v v8, (a0)
2892; CHECK-NEXT:    vmfne.vv v12, v8, v8
2893; CHECK-NEXT:    vcpop.m a0, v12
2894; CHECK-NEXT:    beqz a0, .LBB155_2
2895; CHECK-NEXT:  # %bb.1:
2896; CHECK-NEXT:    lui a0, %hi(.LCPI155_0)
2897; CHECK-NEXT:    fld fa0, %lo(.LCPI155_0)(a0)
2898; CHECK-NEXT:    ret
2899; CHECK-NEXT:  .LBB155_2:
2900; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2901; CHECK-NEXT:    vfmv.f.s fa0, v8
2902; CHECK-NEXT:    ret
2903  %v = load <8 x double>, ptr %x
2904  %red = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %v)
2905  ret double %red
2906}
2907
2908define double @vreduce_fmaximum_v8f64_nonans(ptr %x) {
2909; CHECK-LABEL: vreduce_fmaximum_v8f64_nonans:
2910; CHECK:       # %bb.0:
2911; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2912; CHECK-NEXT:    vle64.v v8, (a0)
2913; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2914; CHECK-NEXT:    vfmv.f.s fa0, v8
2915; CHECK-NEXT:    ret
2916  %v = load <8 x double>, ptr %x
2917  %red = call nnan double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %v)
2918  ret double %red
2919}
2920
2921declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>)
2922
2923define double @vreduce_fmaximum_v16f64(ptr %x) {
2924; CHECK-LABEL: vreduce_fmaximum_v16f64:
2925; CHECK:       # %bb.0:
2926; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2927; CHECK-NEXT:    vle64.v v8, (a0)
2928; CHECK-NEXT:    vmfne.vv v16, v8, v8
2929; CHECK-NEXT:    vcpop.m a0, v16
2930; CHECK-NEXT:    beqz a0, .LBB157_2
2931; CHECK-NEXT:  # %bb.1:
2932; CHECK-NEXT:    lui a0, %hi(.LCPI157_0)
2933; CHECK-NEXT:    fld fa0, %lo(.LCPI157_0)(a0)
2934; CHECK-NEXT:    ret
2935; CHECK-NEXT:  .LBB157_2:
2936; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2937; CHECK-NEXT:    vfmv.f.s fa0, v8
2938; CHECK-NEXT:    ret
2939  %v = load <16 x double>, ptr %x
2940  %red = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %v)
2941  ret double %red
2942}
2943
2944define double @vreduce_fmaximum_v16f64_nonans(ptr %x) {
2945; CHECK-LABEL: vreduce_fmaximum_v16f64_nonans:
2946; CHECK:       # %bb.0:
2947; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2948; CHECK-NEXT:    vle64.v v8, (a0)
2949; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2950; CHECK-NEXT:    vfmv.f.s fa0, v8
2951; CHECK-NEXT:    ret
2952  %v = load <16 x double>, ptr %x
2953  %red = call nnan double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %v)
2954  ret double %red
2955}
2956
2957declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>)
2958
2959define double @vreduce_fmaximum_v32f64(ptr %x) {
2960; CHECK-LABEL: vreduce_fmaximum_v32f64:
2961; CHECK:       # %bb.0:
2962; CHECK-NEXT:    addi sp, sp, -16
2963; CHECK-NEXT:    .cfi_def_cfa_offset 16
2964; CHECK-NEXT:    csrr a1, vlenb
2965; CHECK-NEXT:    slli a1, a1, 3
2966; CHECK-NEXT:    sub sp, sp, a1
2967; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2968; CHECK-NEXT:    addi a1, a0, 128
2969; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2970; CHECK-NEXT:    vle64.v v16, (a0)
2971; CHECK-NEXT:    vle64.v v24, (a1)
2972; CHECK-NEXT:    vmfeq.vv v0, v16, v16
2973; CHECK-NEXT:    vmfeq.vv v7, v24, v24
2974; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
2975; CHECK-NEXT:    addi a0, sp, 16
2976; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2977; CHECK-NEXT:    vmv1r.v v0, v7
2978; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
2979; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2980; CHECK-NEXT:    vfmax.vv v8, v8, v16
2981; CHECK-NEXT:    vmfne.vv v16, v8, v8
2982; CHECK-NEXT:    vcpop.m a0, v16
2983; CHECK-NEXT:    beqz a0, .LBB159_2
2984; CHECK-NEXT:  # %bb.1:
2985; CHECK-NEXT:    lui a0, %hi(.LCPI159_0)
2986; CHECK-NEXT:    fld fa0, %lo(.LCPI159_0)(a0)
2987; CHECK-NEXT:    j .LBB159_3
2988; CHECK-NEXT:  .LBB159_2:
2989; CHECK-NEXT:    vfredmax.vs v8, v8, v8
2990; CHECK-NEXT:    vfmv.f.s fa0, v8
2991; CHECK-NEXT:  .LBB159_3:
2992; CHECK-NEXT:    csrr a0, vlenb
2993; CHECK-NEXT:    slli a0, a0, 3
2994; CHECK-NEXT:    add sp, sp, a0
2995; CHECK-NEXT:    .cfi_def_cfa sp, 16
2996; CHECK-NEXT:    addi sp, sp, 16
2997; CHECK-NEXT:    .cfi_def_cfa_offset 0
2998; CHECK-NEXT:    ret
2999  %v = load <32 x double>, ptr %x
3000  %red = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v)
3001  ret double %red
3002}
3003
3004define double @vreduce_fmaximum_v32f64_nonans(ptr %x) {
3005; CHECK-LABEL: vreduce_fmaximum_v32f64_nonans:
3006; CHECK:       # %bb.0:
3007; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3008; CHECK-NEXT:    vle64.v v8, (a0)
3009; CHECK-NEXT:    addi a0, a0, 128
3010; CHECK-NEXT:    vle64.v v16, (a0)
3011; CHECK-NEXT:    vfmax.vv v8, v8, v16
3012; CHECK-NEXT:    vfredmax.vs v8, v8, v8
3013; CHECK-NEXT:    vfmv.f.s fa0, v8
3014; CHECK-NEXT:    ret
3015  %v = load <32 x double>, ptr %x
3016  %red = call nnan double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v)
3017  ret double %red
3018}
3019
3020declare double @llvm.vector.reduce.fmaximum.v64f64(<64 x double>)
3021
3022define double @vreduce_fmaximum_v64f64(ptr %x) {
3023; CHECK-LABEL: vreduce_fmaximum_v64f64:
3024; CHECK:       # %bb.0:
3025; CHECK-NEXT:    addi sp, sp, -16
3026; CHECK-NEXT:    .cfi_def_cfa_offset 16
3027; CHECK-NEXT:    csrr a1, vlenb
3028; CHECK-NEXT:    slli a1, a1, 4
3029; CHECK-NEXT:    sub sp, sp, a1
3030; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3031; CHECK-NEXT:    addi a1, a0, 128
3032; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3033; CHECK-NEXT:    vle64.v v8, (a1)
3034; CHECK-NEXT:    addi a1, a0, 384
3035; CHECK-NEXT:    vle64.v v16, (a1)
3036; CHECK-NEXT:    addi a1, a0, 256
3037; CHECK-NEXT:    vle64.v v24, (a0)
3038; CHECK-NEXT:    addi a0, sp, 16
3039; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
3040; CHECK-NEXT:    vmfeq.vv v0, v8, v8
3041; CHECK-NEXT:    vmfeq.vv v7, v16, v16
3042; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
3043; CHECK-NEXT:    csrr a0, vlenb
3044; CHECK-NEXT:    slli a0, a0, 3
3045; CHECK-NEXT:    add a0, sp, a0
3046; CHECK-NEXT:    addi a0, a0, 16
3047; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
3048; CHECK-NEXT:    vle64.v v24, (a1)
3049; CHECK-NEXT:    vmv1r.v v0, v7
3050; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
3051; CHECK-NEXT:    csrr a0, vlenb
3052; CHECK-NEXT:    slli a0, a0, 3
3053; CHECK-NEXT:    add a0, sp, a0
3054; CHECK-NEXT:    addi a0, a0, 16
3055; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
3056; CHECK-NEXT:    vfmax.vv v8, v16, v8
3057; CHECK-NEXT:    csrr a0, vlenb
3058; CHECK-NEXT:    slli a0, a0, 3
3059; CHECK-NEXT:    add a0, sp, a0
3060; CHECK-NEXT:    addi a0, a0, 16
3061; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
3062; CHECK-NEXT:    addi a0, sp, 16
3063; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
3064; CHECK-NEXT:    vmfeq.vv v0, v8, v8
3065; CHECK-NEXT:    vmfeq.vv v7, v24, v24
3066; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
3067; CHECK-NEXT:    vmv1r.v v0, v7
3068; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
3069; CHECK-NEXT:    vfmax.vv v16, v8, v16
3070; CHECK-NEXT:    vmfeq.vv v0, v16, v16
3071; CHECK-NEXT:    csrr a0, vlenb
3072; CHECK-NEXT:    slli a0, a0, 3
3073; CHECK-NEXT:    add a0, sp, a0
3074; CHECK-NEXT:    addi a0, a0, 16
3075; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
3076; CHECK-NEXT:    vmfeq.vv v7, v24, v24
3077; CHECK-NEXT:    vmerge.vvm v8, v16, v24, v0
3078; CHECK-NEXT:    addi a0, sp, 16
3079; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
3080; CHECK-NEXT:    vmv1r.v v0, v7
3081; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
3082; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
3083; CHECK-NEXT:    vfmax.vv v8, v8, v16
3084; CHECK-NEXT:    vmfne.vv v16, v8, v8
3085; CHECK-NEXT:    vcpop.m a0, v16
3086; CHECK-NEXT:    beqz a0, .LBB161_2
3087; CHECK-NEXT:  # %bb.1:
3088; CHECK-NEXT:    lui a0, %hi(.LCPI161_0)
3089; CHECK-NEXT:    fld fa0, %lo(.LCPI161_0)(a0)
3090; CHECK-NEXT:    j .LBB161_3
3091; CHECK-NEXT:  .LBB161_2:
3092; CHECK-NEXT:    vfredmax.vs v8, v8, v8
3093; CHECK-NEXT:    vfmv.f.s fa0, v8
3094; CHECK-NEXT:  .LBB161_3:
3095; CHECK-NEXT:    csrr a0, vlenb
3096; CHECK-NEXT:    slli a0, a0, 4
3097; CHECK-NEXT:    add sp, sp, a0
3098; CHECK-NEXT:    .cfi_def_cfa sp, 16
3099; CHECK-NEXT:    addi sp, sp, 16
3100; CHECK-NEXT:    .cfi_def_cfa_offset 0
3101; CHECK-NEXT:    ret
3102  %v = load <64 x double>, ptr %x
3103  %red = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> %v)
3104  ret double %red
3105}
3106
3107define double @vreduce_fmaximum_v64f64_nonans(ptr %x) {
3108; CHECK-LABEL: vreduce_fmaximum_v64f64_nonans:
3109; CHECK:       # %bb.0:
3110; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3111; CHECK-NEXT:    vle64.v v8, (a0)
3112; CHECK-NEXT:    addi a1, a0, 384
3113; CHECK-NEXT:    vle64.v v16, (a1)
3114; CHECK-NEXT:    addi a1, a0, 256
3115; CHECK-NEXT:    addi a0, a0, 128
3116; CHECK-NEXT:    vle64.v v24, (a0)
3117; CHECK-NEXT:    vle64.v v0, (a1)
3118; CHECK-NEXT:    vfmax.vv v16, v24, v16
3119; CHECK-NEXT:    vfmax.vv v8, v8, v0
3120; CHECK-NEXT:    vfmax.vv v8, v8, v16
3121; CHECK-NEXT:    vfredmax.vs v8, v8, v8
3122; CHECK-NEXT:    vfmv.f.s fa0, v8
3123; CHECK-NEXT:    ret
3124  %v = load <64 x double>, ptr %x
3125  %red = call nnan double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> %v)
3126  ret double %red
3127}
3128