xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll (revision f6ace2bc15bfde4cc9bd140859fa92618568a006)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
4
5target triple = "aarch64-unknown-linux-gnu"
6
7;
8; FADDA
9;
10
11define half @fadda_v4f16(half %start, <4 x half> %a) {
12; CHECK-LABEL: fadda_v4f16:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
15; CHECK-NEXT:    fadd h0, h0, h1
16; CHECK-NEXT:    mov z2.h, z1.h[1]
17; CHECK-NEXT:    fadd h0, h0, h2
18; CHECK-NEXT:    mov z2.h, z1.h[2]
19; CHECK-NEXT:    mov z1.h, z1.h[3]
20; CHECK-NEXT:    fadd h0, h0, h2
21; CHECK-NEXT:    fadd h0, h0, h1
22; CHECK-NEXT:    ret
23;
24; NONEON-NOSVE-LABEL: fadda_v4f16:
25; NONEON-NOSVE:       // %bb.0:
26; NONEON-NOSVE-NEXT:    sub sp, sp, #16
27; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
28; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
29; NONEON-NOSVE-NEXT:    fcvt s0, h0
30; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
31; NONEON-NOSVE-NEXT:    fcvt s1, h1
32; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
33; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
34; NONEON-NOSVE-NEXT:    fcvt s1, h1
35; NONEON-NOSVE-NEXT:    fcvt h0, s0
36; NONEON-NOSVE-NEXT:    fcvt s0, h0
37; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
38; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
39; NONEON-NOSVE-NEXT:    fcvt s1, h1
40; NONEON-NOSVE-NEXT:    fcvt h0, s0
41; NONEON-NOSVE-NEXT:    fcvt s0, h0
42; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
43; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
44; NONEON-NOSVE-NEXT:    fcvt s1, h1
45; NONEON-NOSVE-NEXT:    fcvt h0, s0
46; NONEON-NOSVE-NEXT:    fcvt s0, h0
47; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
48; NONEON-NOSVE-NEXT:    fcvt h0, s0
49; NONEON-NOSVE-NEXT:    add sp, sp, #16
50; NONEON-NOSVE-NEXT:    ret
51  %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
52  ret half %res
53}
54
55define half @fadda_v8f16(half %start, <8 x half> %a) {
56; CHECK-LABEL: fadda_v8f16:
57; CHECK:       // %bb.0:
58; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
59; CHECK-NEXT:    fadd h0, h0, h1
60; CHECK-NEXT:    mov z2.h, z1.h[1]
61; CHECK-NEXT:    fadd h0, h0, h2
62; CHECK-NEXT:    mov z2.h, z1.h[2]
63; CHECK-NEXT:    fadd h0, h0, h2
64; CHECK-NEXT:    mov z2.h, z1.h[3]
65; CHECK-NEXT:    fadd h0, h0, h2
66; CHECK-NEXT:    mov z2.h, z1.h[4]
67; CHECK-NEXT:    fadd h0, h0, h2
68; CHECK-NEXT:    mov z2.h, z1.h[5]
69; CHECK-NEXT:    fadd h0, h0, h2
70; CHECK-NEXT:    mov z2.h, z1.h[6]
71; CHECK-NEXT:    mov z1.h, z1.h[7]
72; CHECK-NEXT:    fadd h0, h0, h2
73; CHECK-NEXT:    fadd h0, h0, h1
74; CHECK-NEXT:    ret
75;
76; NONEON-NOSVE-LABEL: fadda_v8f16:
77; NONEON-NOSVE:       // %bb.0:
78; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
79; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
80; NONEON-NOSVE-NEXT:    ldr h1, [sp]
81; NONEON-NOSVE-NEXT:    fcvt s0, h0
82; NONEON-NOSVE-NEXT:    fcvt s1, h1
83; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
84; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
85; NONEON-NOSVE-NEXT:    fcvt s1, h1
86; NONEON-NOSVE-NEXT:    fcvt h0, s0
87; NONEON-NOSVE-NEXT:    fcvt s0, h0
88; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
89; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
90; NONEON-NOSVE-NEXT:    fcvt s1, h1
91; NONEON-NOSVE-NEXT:    fcvt h0, s0
92; NONEON-NOSVE-NEXT:    fcvt s0, h0
93; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
94; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
95; NONEON-NOSVE-NEXT:    fcvt s1, h1
96; NONEON-NOSVE-NEXT:    fcvt h0, s0
97; NONEON-NOSVE-NEXT:    fcvt s0, h0
98; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
99; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
100; NONEON-NOSVE-NEXT:    fcvt s1, h1
101; NONEON-NOSVE-NEXT:    fcvt h0, s0
102; NONEON-NOSVE-NEXT:    fcvt s0, h0
103; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
104; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
105; NONEON-NOSVE-NEXT:    fcvt s1, h1
106; NONEON-NOSVE-NEXT:    fcvt h0, s0
107; NONEON-NOSVE-NEXT:    fcvt s0, h0
108; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
109; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
110; NONEON-NOSVE-NEXT:    fcvt s1, h1
111; NONEON-NOSVE-NEXT:    fcvt h0, s0
112; NONEON-NOSVE-NEXT:    fcvt s0, h0
113; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
114; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
115; NONEON-NOSVE-NEXT:    fcvt s1, h1
116; NONEON-NOSVE-NEXT:    fcvt h0, s0
117; NONEON-NOSVE-NEXT:    fcvt s0, h0
118; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
119; NONEON-NOSVE-NEXT:    fcvt h0, s0
120; NONEON-NOSVE-NEXT:    add sp, sp, #16
121; NONEON-NOSVE-NEXT:    ret
122  %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
123  ret half %res
124}
125
126define half @fadda_v16f16(half %start, ptr %a) {
127; CHECK-LABEL: fadda_v16f16:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    ldr q1, [x0]
130; CHECK-NEXT:    fadd h0, h0, h1
131; CHECK-NEXT:    mov z2.h, z1.h[1]
132; CHECK-NEXT:    fadd h0, h0, h2
133; CHECK-NEXT:    mov z2.h, z1.h[2]
134; CHECK-NEXT:    fadd h0, h0, h2
135; CHECK-NEXT:    mov z2.h, z1.h[3]
136; CHECK-NEXT:    fadd h0, h0, h2
137; CHECK-NEXT:    mov z2.h, z1.h[4]
138; CHECK-NEXT:    fadd h0, h0, h2
139; CHECK-NEXT:    mov z2.h, z1.h[5]
140; CHECK-NEXT:    fadd h0, h0, h2
141; CHECK-NEXT:    mov z2.h, z1.h[6]
142; CHECK-NEXT:    mov z1.h, z1.h[7]
143; CHECK-NEXT:    fadd h0, h0, h2
144; CHECK-NEXT:    fadd h0, h0, h1
145; CHECK-NEXT:    ldr q1, [x0, #16]
146; CHECK-NEXT:    mov z2.h, z1.h[1]
147; CHECK-NEXT:    fadd h0, h0, h1
148; CHECK-NEXT:    fadd h0, h0, h2
149; CHECK-NEXT:    mov z2.h, z1.h[2]
150; CHECK-NEXT:    fadd h0, h0, h2
151; CHECK-NEXT:    mov z2.h, z1.h[3]
152; CHECK-NEXT:    fadd h0, h0, h2
153; CHECK-NEXT:    mov z2.h, z1.h[4]
154; CHECK-NEXT:    fadd h0, h0, h2
155; CHECK-NEXT:    mov z2.h, z1.h[5]
156; CHECK-NEXT:    fadd h0, h0, h2
157; CHECK-NEXT:    mov z2.h, z1.h[6]
158; CHECK-NEXT:    mov z1.h, z1.h[7]
159; CHECK-NEXT:    fadd h0, h0, h2
160; CHECK-NEXT:    fadd h0, h0, h1
161; CHECK-NEXT:    ret
162;
163; NONEON-NOSVE-LABEL: fadda_v16f16:
164; NONEON-NOSVE:       // %bb.0:
165; NONEON-NOSVE-NEXT:    sub sp, sp, #32
166; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
167; NONEON-NOSVE-NEXT:    ldr q1, [x0, #16]
168; NONEON-NOSVE-NEXT:    fcvt s0, h0
169; NONEON-NOSVE-NEXT:    str q1, [sp, #16]
170; NONEON-NOSVE-NEXT:    ldr q1, [x0]
171; NONEON-NOSVE-NEXT:    str q1, [sp]
172; NONEON-NOSVE-NEXT:    ldr h1, [sp]
173; NONEON-NOSVE-NEXT:    fcvt s1, h1
174; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
175; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
176; NONEON-NOSVE-NEXT:    fcvt s1, h1
177; NONEON-NOSVE-NEXT:    fcvt h0, s0
178; NONEON-NOSVE-NEXT:    fcvt s0, h0
179; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
180; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
181; NONEON-NOSVE-NEXT:    fcvt s1, h1
182; NONEON-NOSVE-NEXT:    fcvt h0, s0
183; NONEON-NOSVE-NEXT:    fcvt s0, h0
184; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
185; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
186; NONEON-NOSVE-NEXT:    fcvt s1, h1
187; NONEON-NOSVE-NEXT:    fcvt h0, s0
188; NONEON-NOSVE-NEXT:    fcvt s0, h0
189; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
190; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
191; NONEON-NOSVE-NEXT:    fcvt s1, h1
192; NONEON-NOSVE-NEXT:    fcvt h0, s0
193; NONEON-NOSVE-NEXT:    fcvt s0, h0
194; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
195; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
196; NONEON-NOSVE-NEXT:    fcvt s1, h1
197; NONEON-NOSVE-NEXT:    fcvt h0, s0
198; NONEON-NOSVE-NEXT:    fcvt s0, h0
199; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
200; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
201; NONEON-NOSVE-NEXT:    fcvt s1, h1
202; NONEON-NOSVE-NEXT:    fcvt h0, s0
203; NONEON-NOSVE-NEXT:    fcvt s0, h0
204; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
205; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
206; NONEON-NOSVE-NEXT:    fcvt s1, h1
207; NONEON-NOSVE-NEXT:    fcvt h0, s0
208; NONEON-NOSVE-NEXT:    fcvt s0, h0
209; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
210; NONEON-NOSVE-NEXT:    ldr h1, [sp, #16]
211; NONEON-NOSVE-NEXT:    fcvt s1, h1
212; NONEON-NOSVE-NEXT:    fcvt h0, s0
213; NONEON-NOSVE-NEXT:    fcvt s0, h0
214; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
215; NONEON-NOSVE-NEXT:    ldr h1, [sp, #18]
216; NONEON-NOSVE-NEXT:    fcvt s1, h1
217; NONEON-NOSVE-NEXT:    fcvt h0, s0
218; NONEON-NOSVE-NEXT:    fcvt s0, h0
219; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
220; NONEON-NOSVE-NEXT:    ldr h1, [sp, #20]
221; NONEON-NOSVE-NEXT:    fcvt s1, h1
222; NONEON-NOSVE-NEXT:    fcvt h0, s0
223; NONEON-NOSVE-NEXT:    fcvt s0, h0
224; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
225; NONEON-NOSVE-NEXT:    ldr h1, [sp, #22]
226; NONEON-NOSVE-NEXT:    fcvt s1, h1
227; NONEON-NOSVE-NEXT:    fcvt h0, s0
228; NONEON-NOSVE-NEXT:    fcvt s0, h0
229; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
230; NONEON-NOSVE-NEXT:    ldr h1, [sp, #24]
231; NONEON-NOSVE-NEXT:    fcvt s1, h1
232; NONEON-NOSVE-NEXT:    fcvt h0, s0
233; NONEON-NOSVE-NEXT:    fcvt s0, h0
234; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
235; NONEON-NOSVE-NEXT:    ldr h1, [sp, #26]
236; NONEON-NOSVE-NEXT:    fcvt s1, h1
237; NONEON-NOSVE-NEXT:    fcvt h0, s0
238; NONEON-NOSVE-NEXT:    fcvt s0, h0
239; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
240; NONEON-NOSVE-NEXT:    ldr h1, [sp, #28]
241; NONEON-NOSVE-NEXT:    fcvt s1, h1
242; NONEON-NOSVE-NEXT:    fcvt h0, s0
243; NONEON-NOSVE-NEXT:    fcvt s0, h0
244; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
245; NONEON-NOSVE-NEXT:    ldr h1, [sp, #30]
246; NONEON-NOSVE-NEXT:    fcvt s1, h1
247; NONEON-NOSVE-NEXT:    fcvt h0, s0
248; NONEON-NOSVE-NEXT:    fcvt s0, h0
249; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
250; NONEON-NOSVE-NEXT:    fcvt h0, s0
251; NONEON-NOSVE-NEXT:    add sp, sp, #32
252; NONEON-NOSVE-NEXT:    ret
253  %op = load <16 x half>, ptr %a
254  %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
255  ret half %res
256}
257
258define float @fadda_v2f32(float %start, <2 x float> %a) {
259; CHECK-LABEL: fadda_v2f32:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
262; CHECK-NEXT:    fadd s0, s0, s1
263; CHECK-NEXT:    mov z1.s, z1.s[1]
264; CHECK-NEXT:    fadd s0, s0, s1
265; CHECK-NEXT:    ret
266;
267; NONEON-NOSVE-LABEL: fadda_v2f32:
268; NONEON-NOSVE:       // %bb.0:
269; NONEON-NOSVE-NEXT:    sub sp, sp, #16
270; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
271; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
272; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
273; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
274; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
275; NONEON-NOSVE-NEXT:    add sp, sp, #16
276; NONEON-NOSVE-NEXT:    ret
277  %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
278  ret float %res
279}
280
281define float @fadda_v4f32(float %start, <4 x float> %a) {
282; CHECK-LABEL: fadda_v4f32:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
285; CHECK-NEXT:    fadd s0, s0, s1
286; CHECK-NEXT:    mov z2.s, z1.s[1]
287; CHECK-NEXT:    fadd s0, s0, s2
288; CHECK-NEXT:    mov z2.s, z1.s[2]
289; CHECK-NEXT:    mov z1.s, z1.s[3]
290; CHECK-NEXT:    fadd s0, s0, s2
291; CHECK-NEXT:    fadd s0, s0, s1
292; CHECK-NEXT:    ret
293;
294; NONEON-NOSVE-LABEL: fadda_v4f32:
295; NONEON-NOSVE:       // %bb.0:
296; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
297; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
298; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp]
299; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
300; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
301; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
302; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
303; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
304; NONEON-NOSVE-NEXT:    add sp, sp, #16
305; NONEON-NOSVE-NEXT:    ret
306  %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
307  ret float %res
308}
309
310define float @fadda_v8f32(float %start, ptr %a) {
311; CHECK-LABEL: fadda_v8f32:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ldr q1, [x0]
314; CHECK-NEXT:    fadd s0, s0, s1
315; CHECK-NEXT:    mov z2.s, z1.s[1]
316; CHECK-NEXT:    fadd s0, s0, s2
317; CHECK-NEXT:    mov z2.s, z1.s[2]
318; CHECK-NEXT:    mov z1.s, z1.s[3]
319; CHECK-NEXT:    fadd s0, s0, s2
320; CHECK-NEXT:    fadd s0, s0, s1
321; CHECK-NEXT:    ldr q1, [x0, #16]
322; CHECK-NEXT:    mov z2.s, z1.s[1]
323; CHECK-NEXT:    fadd s0, s0, s1
324; CHECK-NEXT:    fadd s0, s0, s2
325; CHECK-NEXT:    mov z2.s, z1.s[2]
326; CHECK-NEXT:    mov z1.s, z1.s[3]
327; CHECK-NEXT:    fadd s0, s0, s2
328; CHECK-NEXT:    fadd s0, s0, s1
329; CHECK-NEXT:    ret
330;
331; NONEON-NOSVE-LABEL: fadda_v8f32:
332; NONEON-NOSVE:       // %bb.0:
333; NONEON-NOSVE-NEXT:    sub sp, sp, #32
334; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
335; NONEON-NOSVE-NEXT:    ldr q1, [x0, #16]
336; NONEON-NOSVE-NEXT:    str q1, [sp, #16]
337; NONEON-NOSVE-NEXT:    ldr q1, [x0]
338; NONEON-NOSVE-NEXT:    str q1, [sp]
339; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp]
340; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
341; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
342; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
343; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
344; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
345; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #16]
346; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
347; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
348; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #24]
349; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
350; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
351; NONEON-NOSVE-NEXT:    add sp, sp, #32
352; NONEON-NOSVE-NEXT:    ret
353  %op = load <8 x float>, ptr %a
354  %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
355  ret float %res
356}
357
358define double @fadda_v1f64(double %start, <1 x double> %a) {
359; CHECK-LABEL: fadda_v1f64:
360; CHECK:       // %bb.0:
361; CHECK-NEXT:    fadd d0, d0, d1
362; CHECK-NEXT:    ret
363;
364; NONEON-NOSVE-LABEL: fadda_v1f64:
365; NONEON-NOSVE:       // %bb.0:
366; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
367; NONEON-NOSVE-NEXT:    ret
368  %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
369  ret double %res
370}
371
372define double @fadda_v2f64(double %start, <2 x double> %a) {
373; CHECK-LABEL: fadda_v2f64:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
376; CHECK-NEXT:    fadd d0, d0, d1
377; CHECK-NEXT:    mov z1.d, z1.d[1]
378; CHECK-NEXT:    fadd d0, d0, d1
379; CHECK-NEXT:    ret
380;
381; NONEON-NOSVE-LABEL: fadda_v2f64:
382; NONEON-NOSVE:       // %bb.0:
383; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
384; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
385; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp], #16
386; NONEON-NOSVE-NEXT:    fadd d0, d0, d2
387; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
388; NONEON-NOSVE-NEXT:    ret
389  %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
390  ret double %res
391}
392
393define double @fadda_v4f64(double %start, ptr %a) {
394; CHECK-LABEL: fadda_v4f64:
395; CHECK:       // %bb.0:
396; CHECK-NEXT:    ldr q1, [x0]
397; CHECK-NEXT:    fadd d0, d0, d1
398; CHECK-NEXT:    mov z1.d, z1.d[1]
399; CHECK-NEXT:    fadd d0, d0, d1
400; CHECK-NEXT:    ldr q1, [x0, #16]
401; CHECK-NEXT:    fadd d0, d0, d1
402; CHECK-NEXT:    mov z1.d, z1.d[1]
403; CHECK-NEXT:    fadd d0, d0, d1
404; CHECK-NEXT:    ret
405;
406; NONEON-NOSVE-LABEL: fadda_v4f64:
407; NONEON-NOSVE:       // %bb.0:
408; NONEON-NOSVE-NEXT:    sub sp, sp, #32
409; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
410; NONEON-NOSVE-NEXT:    ldr q1, [x0, #16]
411; NONEON-NOSVE-NEXT:    str q1, [sp, #16]
412; NONEON-NOSVE-NEXT:    ldr q1, [x0]
413; NONEON-NOSVE-NEXT:    str q1, [sp]
414; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp]
415; NONEON-NOSVE-NEXT:    fadd d0, d0, d2
416; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
417; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #16]
418; NONEON-NOSVE-NEXT:    fadd d0, d0, d2
419; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
420; NONEON-NOSVE-NEXT:    add sp, sp, #32
421; NONEON-NOSVE-NEXT:    ret
422  %op = load <4 x double>, ptr %a
423  %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
424  ret double %res
425}
426
427;
428; FADDV
429;
430
431define half @faddv_v4f16(half %start, <4 x half> %a) {
432; CHECK-LABEL: faddv_v4f16:
433; CHECK:       // %bb.0:
434; CHECK-NEXT:    ptrue p0.h, vl4
435; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
436; CHECK-NEXT:    faddv h1, p0, z1.h
437; CHECK-NEXT:    fadd h0, h0, h1
438; CHECK-NEXT:    ret
439;
440; NONEON-NOSVE-LABEL: faddv_v4f16:
441; NONEON-NOSVE:       // %bb.0:
442; NONEON-NOSVE-NEXT:    sub sp, sp, #16
443; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
444; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
445; NONEON-NOSVE-NEXT:    fcvt s0, h0
446; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
447; NONEON-NOSVE-NEXT:    ldr h2, [sp, #10]
448; NONEON-NOSVE-NEXT:    fcvt s2, h2
449; NONEON-NOSVE-NEXT:    fcvt s1, h1
450; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
451; NONEON-NOSVE-NEXT:    ldr h2, [sp, #12]
452; NONEON-NOSVE-NEXT:    fcvt s2, h2
453; NONEON-NOSVE-NEXT:    fcvt h1, s1
454; NONEON-NOSVE-NEXT:    fcvt s1, h1
455; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
456; NONEON-NOSVE-NEXT:    ldr h2, [sp, #14]
457; NONEON-NOSVE-NEXT:    fcvt s2, h2
458; NONEON-NOSVE-NEXT:    fcvt h1, s1
459; NONEON-NOSVE-NEXT:    fcvt s1, h1
460; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
461; NONEON-NOSVE-NEXT:    fcvt h1, s1
462; NONEON-NOSVE-NEXT:    fcvt s1, h1
463; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
464; NONEON-NOSVE-NEXT:    fcvt h0, s0
465; NONEON-NOSVE-NEXT:    add sp, sp, #16
466; NONEON-NOSVE-NEXT:    ret
467  %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
468  ret half %res
469}
470
471define half @faddv_v8f16(half %start, <8 x half> %a) {
472; CHECK-LABEL: faddv_v8f16:
473; CHECK:       // %bb.0:
474; CHECK-NEXT:    ptrue p0.h, vl8
475; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
476; CHECK-NEXT:    faddv h1, p0, z1.h
477; CHECK-NEXT:    fadd h0, h0, h1
478; CHECK-NEXT:    ret
479;
480; NONEON-NOSVE-LABEL: faddv_v8f16:
481; NONEON-NOSVE:       // %bb.0:
482; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
483; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
484; NONEON-NOSVE-NEXT:    ldr h1, [sp]
485; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
486; NONEON-NOSVE-NEXT:    fcvt s0, h0
487; NONEON-NOSVE-NEXT:    fcvt s2, h2
488; NONEON-NOSVE-NEXT:    fcvt s1, h1
489; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
490; NONEON-NOSVE-NEXT:    ldr h2, [sp, #4]
491; NONEON-NOSVE-NEXT:    fcvt s2, h2
492; NONEON-NOSVE-NEXT:    fcvt h1, s1
493; NONEON-NOSVE-NEXT:    fcvt s1, h1
494; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
495; NONEON-NOSVE-NEXT:    ldr h2, [sp, #6]
496; NONEON-NOSVE-NEXT:    fcvt s2, h2
497; NONEON-NOSVE-NEXT:    fcvt h1, s1
498; NONEON-NOSVE-NEXT:    fcvt s1, h1
499; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
500; NONEON-NOSVE-NEXT:    ldr h2, [sp, #8]
501; NONEON-NOSVE-NEXT:    fcvt s2, h2
502; NONEON-NOSVE-NEXT:    fcvt h1, s1
503; NONEON-NOSVE-NEXT:    fcvt s1, h1
504; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
505; NONEON-NOSVE-NEXT:    ldr h2, [sp, #10]
506; NONEON-NOSVE-NEXT:    fcvt s2, h2
507; NONEON-NOSVE-NEXT:    fcvt h1, s1
508; NONEON-NOSVE-NEXT:    fcvt s1, h1
509; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
510; NONEON-NOSVE-NEXT:    ldr h2, [sp, #12]
511; NONEON-NOSVE-NEXT:    fcvt s2, h2
512; NONEON-NOSVE-NEXT:    fcvt h1, s1
513; NONEON-NOSVE-NEXT:    fcvt s1, h1
514; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
515; NONEON-NOSVE-NEXT:    ldr h2, [sp, #14]
516; NONEON-NOSVE-NEXT:    fcvt s2, h2
517; NONEON-NOSVE-NEXT:    fcvt h1, s1
518; NONEON-NOSVE-NEXT:    fcvt s1, h1
519; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
520; NONEON-NOSVE-NEXT:    fcvt h1, s1
521; NONEON-NOSVE-NEXT:    fcvt s1, h1
522; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
523; NONEON-NOSVE-NEXT:    fcvt h0, s0
524; NONEON-NOSVE-NEXT:    add sp, sp, #16
525; NONEON-NOSVE-NEXT:    ret
526  %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
527  ret half %res
528}
529
530define half @faddv_v16f16(half %start, ptr %a) {
531; CHECK-LABEL: faddv_v16f16:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    ldp q2, q1, [x0]
534; CHECK-NEXT:    ptrue p0.h, vl8
535; CHECK-NEXT:    fadd z1.h, p0/m, z1.h, z2.h
536; CHECK-NEXT:    faddv h1, p0, z1.h
537; CHECK-NEXT:    fadd h0, h0, h1
538; CHECK-NEXT:    ret
539;
540; NONEON-NOSVE-LABEL: faddv_v16f16:
541; NONEON-NOSVE:       // %bb.0:
542; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
543; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #-32]!
544; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
545; NONEON-NOSVE-NEXT:    ldr h1, [sp, #18]
546; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
547; NONEON-NOSVE-NEXT:    fcvt s0, h0
548; NONEON-NOSVE-NEXT:    ldr h3, [sp, #16]
549; NONEON-NOSVE-NEXT:    ldr h4, [sp]
550; NONEON-NOSVE-NEXT:    fcvt s1, h1
551; NONEON-NOSVE-NEXT:    fcvt s2, h2
552; NONEON-NOSVE-NEXT:    fcvt s3, h3
553; NONEON-NOSVE-NEXT:    fcvt s4, h4
554; NONEON-NOSVE-NEXT:    fadd s1, s2, s1
555; NONEON-NOSVE-NEXT:    fadd s2, s4, s3
556; NONEON-NOSVE-NEXT:    ldr h3, [sp, #20]
557; NONEON-NOSVE-NEXT:    ldr h4, [sp, #4]
558; NONEON-NOSVE-NEXT:    fcvt s3, h3
559; NONEON-NOSVE-NEXT:    fcvt s4, h4
560; NONEON-NOSVE-NEXT:    fcvt h1, s1
561; NONEON-NOSVE-NEXT:    fcvt h2, s2
562; NONEON-NOSVE-NEXT:    fadd s3, s4, s3
563; NONEON-NOSVE-NEXT:    ldr h4, [sp, #6]
564; NONEON-NOSVE-NEXT:    fcvt s1, h1
565; NONEON-NOSVE-NEXT:    fcvt s2, h2
566; NONEON-NOSVE-NEXT:    fcvt s4, h4
567; NONEON-NOSVE-NEXT:    fadd s1, s2, s1
568; NONEON-NOSVE-NEXT:    fcvt h2, s3
569; NONEON-NOSVE-NEXT:    ldr h3, [sp, #22]
570; NONEON-NOSVE-NEXT:    fcvt s3, h3
571; NONEON-NOSVE-NEXT:    fcvt h1, s1
572; NONEON-NOSVE-NEXT:    fcvt s2, h2
573; NONEON-NOSVE-NEXT:    fadd s3, s4, s3
574; NONEON-NOSVE-NEXT:    ldr h4, [sp, #8]
575; NONEON-NOSVE-NEXT:    fcvt s1, h1
576; NONEON-NOSVE-NEXT:    fcvt s4, h4
577; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
578; NONEON-NOSVE-NEXT:    fcvt h2, s3
579; NONEON-NOSVE-NEXT:    ldr h3, [sp, #24]
580; NONEON-NOSVE-NEXT:    fcvt s3, h3
581; NONEON-NOSVE-NEXT:    fcvt h1, s1
582; NONEON-NOSVE-NEXT:    fcvt s2, h2
583; NONEON-NOSVE-NEXT:    fadd s3, s4, s3
584; NONEON-NOSVE-NEXT:    ldr h4, [sp, #10]
585; NONEON-NOSVE-NEXT:    fcvt s1, h1
586; NONEON-NOSVE-NEXT:    fcvt s4, h4
587; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
588; NONEON-NOSVE-NEXT:    fcvt h2, s3
589; NONEON-NOSVE-NEXT:    ldr h3, [sp, #26]
590; NONEON-NOSVE-NEXT:    fcvt s3, h3
591; NONEON-NOSVE-NEXT:    fcvt h1, s1
592; NONEON-NOSVE-NEXT:    fcvt s2, h2
593; NONEON-NOSVE-NEXT:    fadd s3, s4, s3
594; NONEON-NOSVE-NEXT:    ldr h4, [sp, #12]
595; NONEON-NOSVE-NEXT:    fcvt s1, h1
596; NONEON-NOSVE-NEXT:    fcvt s4, h4
597; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
598; NONEON-NOSVE-NEXT:    fcvt h2, s3
599; NONEON-NOSVE-NEXT:    ldr h3, [sp, #28]
600; NONEON-NOSVE-NEXT:    fcvt s3, h3
601; NONEON-NOSVE-NEXT:    fcvt h1, s1
602; NONEON-NOSVE-NEXT:    fcvt s2, h2
603; NONEON-NOSVE-NEXT:    fcvt s1, h1
604; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
605; NONEON-NOSVE-NEXT:    fadd s2, s4, s3
606; NONEON-NOSVE-NEXT:    ldr h3, [sp, #30]
607; NONEON-NOSVE-NEXT:    ldr h4, [sp, #14]
608; NONEON-NOSVE-NEXT:    fcvt s3, h3
609; NONEON-NOSVE-NEXT:    fcvt s4, h4
610; NONEON-NOSVE-NEXT:    fcvt h1, s1
611; NONEON-NOSVE-NEXT:    fcvt h2, s2
612; NONEON-NOSVE-NEXT:    fcvt s1, h1
613; NONEON-NOSVE-NEXT:    fcvt s2, h2
614; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
615; NONEON-NOSVE-NEXT:    fadd s2, s4, s3
616; NONEON-NOSVE-NEXT:    fcvt h1, s1
617; NONEON-NOSVE-NEXT:    fcvt h2, s2
618; NONEON-NOSVE-NEXT:    fcvt s1, h1
619; NONEON-NOSVE-NEXT:    fcvt s2, h2
620; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
621; NONEON-NOSVE-NEXT:    fcvt h1, s1
622; NONEON-NOSVE-NEXT:    fcvt s1, h1
623; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
624; NONEON-NOSVE-NEXT:    fcvt h0, s0
625; NONEON-NOSVE-NEXT:    add sp, sp, #32
626; NONEON-NOSVE-NEXT:    ret
627  %op = load <16 x half>, ptr %a
628  %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
629  ret half %res
630}
631
632define float @faddv_v2f32(float %start, <2 x float> %a) {
633; CHECK-LABEL: faddv_v2f32:
634; CHECK:       // %bb.0:
635; CHECK-NEXT:    ptrue p0.s, vl2
636; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
637; CHECK-NEXT:    faddv s1, p0, z1.s
638; CHECK-NEXT:    fadd s0, s0, s1
639; CHECK-NEXT:    ret
640;
641; NONEON-NOSVE-LABEL: faddv_v2f32:
642; NONEON-NOSVE:       // %bb.0:
643; NONEON-NOSVE-NEXT:    sub sp, sp, #16
644; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
645; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
646; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
647; NONEON-NOSVE-NEXT:    fadd s1, s2, s1
648; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
649; NONEON-NOSVE-NEXT:    add sp, sp, #16
650; NONEON-NOSVE-NEXT:    ret
651  %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
652  ret float %res
653}
654
655define float @faddv_v4f32(float %start, <4 x float> %a) {
656; CHECK-LABEL: faddv_v4f32:
657; CHECK:       // %bb.0:
658; CHECK-NEXT:    ptrue p0.s, vl4
659; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
660; CHECK-NEXT:    faddv s1, p0, z1.s
661; CHECK-NEXT:    fadd s0, s0, s1
662; CHECK-NEXT:    ret
663;
664; NONEON-NOSVE-LABEL: faddv_v4f32:
665; NONEON-NOSVE:       // %bb.0:
666; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
667; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
668; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
669; NONEON-NOSVE-NEXT:    ldp s4, s3, [sp], #16
670; NONEON-NOSVE-NEXT:    fadd s3, s4, s3
671; NONEON-NOSVE-NEXT:    fadd s1, s2, s1
672; NONEON-NOSVE-NEXT:    fadd s1, s3, s1
673; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
674; NONEON-NOSVE-NEXT:    ret
675  %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
676  ret float %res
677}
678
679define float @faddv_v8f32(float %start, ptr %a) {
680; CHECK-LABEL: faddv_v8f32:
681; CHECK:       // %bb.0:
682; CHECK-NEXT:    ldp q2, q1, [x0]
683; CHECK-NEXT:    ptrue p0.s, vl4
684; CHECK-NEXT:    fadd z1.s, p0/m, z1.s, z2.s
685; CHECK-NEXT:    faddv s1, p0, z1.s
686; CHECK-NEXT:    fadd s0, s0, s1
687; CHECK-NEXT:    ret
688;
689; NONEON-NOSVE-LABEL: faddv_v8f32:
690; NONEON-NOSVE:       // %bb.0:
691; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
692; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #-32]!
693; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
694; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #16]
695; NONEON-NOSVE-NEXT:    ldp s4, s3, [sp]
696; NONEON-NOSVE-NEXT:    ldp s5, s6, [sp, #24]
697; NONEON-NOSVE-NEXT:    ldp s7, s16, [sp, #8]
698; NONEON-NOSVE-NEXT:    fadd s1, s3, s1
699; NONEON-NOSVE-NEXT:    fadd s2, s4, s2
700; NONEON-NOSVE-NEXT:    fadd s3, s7, s5
701; NONEON-NOSVE-NEXT:    fadd s4, s16, s6
702; NONEON-NOSVE-NEXT:    fadd s1, s2, s1
703; NONEON-NOSVE-NEXT:    fadd s2, s3, s4
704; NONEON-NOSVE-NEXT:    fadd s1, s1, s2
705; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
706; NONEON-NOSVE-NEXT:    add sp, sp, #32
707; NONEON-NOSVE-NEXT:    ret
708  %op = load <8 x float>, ptr %a
709  %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
710  ret float %res
711}
712
713define double @faddv_v1f64(double %start, <1 x double> %a) {
714; CHECK-LABEL: faddv_v1f64:
715; CHECK:       // %bb.0:
716; CHECK-NEXT:    fadd d0, d0, d1
717; CHECK-NEXT:    ret
718;
719; NONEON-NOSVE-LABEL: faddv_v1f64:
720; NONEON-NOSVE:       // %bb.0:
721; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
722; NONEON-NOSVE-NEXT:    ret
723  %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
724  ret double %res
725}
726
727define double @faddv_v2f64(double %start, <2 x double> %a) {
728; CHECK-LABEL: faddv_v2f64:
729; CHECK:       // %bb.0:
730; CHECK-NEXT:    ptrue p0.d, vl2
731; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
732; CHECK-NEXT:    faddv d1, p0, z1.d
733; CHECK-NEXT:    fadd d0, d0, d1
734; CHECK-NEXT:    ret
735;
736; NONEON-NOSVE-LABEL: faddv_v2f64:
737; NONEON-NOSVE:       // %bb.0:
738; NONEON-NOSVE-NEXT:    str q1, [sp, #-16]!
739; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
740; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp], #16
741; NONEON-NOSVE-NEXT:    fadd d1, d2, d1
742; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
743; NONEON-NOSVE-NEXT:    ret
744  %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
745  ret double %res
746}
747
748define double @faddv_v4f64(double %start, ptr %a) {
749; CHECK-LABEL: faddv_v4f64:
750; CHECK:       // %bb.0:
751; CHECK-NEXT:    ldp q2, q1, [x0]
752; CHECK-NEXT:    ptrue p0.d, vl2
753; CHECK-NEXT:    fadd z1.d, p0/m, z1.d, z2.d
754; CHECK-NEXT:    faddv d1, p0, z1.d
755; CHECK-NEXT:    fadd d0, d0, d1
756; CHECK-NEXT:    ret
757;
758; NONEON-NOSVE-LABEL: faddv_v4f64:
759; NONEON-NOSVE:       // %bb.0:
760; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
761; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #-32]!
762; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
763; NONEON-NOSVE-NEXT:    ldp d2, d1, [sp, #16]
764; NONEON-NOSVE-NEXT:    ldp d4, d3, [sp], #32
765; NONEON-NOSVE-NEXT:    fadd d1, d3, d1
766; NONEON-NOSVE-NEXT:    fadd d2, d4, d2
767; NONEON-NOSVE-NEXT:    fadd d1, d2, d1
768; NONEON-NOSVE-NEXT:    fadd d0, d0, d1
769; NONEON-NOSVE-NEXT:    ret
770  %op = load <4 x double>, ptr %a
771  %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
772  ret double %res
773}
774
775;
776; FMAXNMV
777;
778
779define half @fmaxv_v4f16(<4 x half> %a) {
780; CHECK-LABEL: fmaxv_v4f16:
781; CHECK:       // %bb.0:
782; CHECK-NEXT:    ptrue p0.h, vl4
783; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
784; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
785; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
786; CHECK-NEXT:    ret
787;
788; NONEON-NOSVE-LABEL: fmaxv_v4f16:
789; NONEON-NOSVE:       // %bb.0:
790; NONEON-NOSVE-NEXT:    sub sp, sp, #16
791; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
792; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
793; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
794; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
795; NONEON-NOSVE-NEXT:    fcvt s1, h1
796; NONEON-NOSVE-NEXT:    fcvt s0, h0
797; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
798; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
799; NONEON-NOSVE-NEXT:    fcvt s1, h1
800; NONEON-NOSVE-NEXT:    fcvt h0, s0
801; NONEON-NOSVE-NEXT:    fcvt s0, h0
802; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
803; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
804; NONEON-NOSVE-NEXT:    fcvt s1, h1
805; NONEON-NOSVE-NEXT:    fcvt h0, s0
806; NONEON-NOSVE-NEXT:    fcvt s0, h0
807; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
808; NONEON-NOSVE-NEXT:    fcvt h0, s0
809; NONEON-NOSVE-NEXT:    add sp, sp, #16
810; NONEON-NOSVE-NEXT:    ret
811  %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
812  ret half %res
813}
814
815define half @fmaxv_v8f16(<8 x half> %a) {
816; CHECK-LABEL: fmaxv_v8f16:
817; CHECK:       // %bb.0:
818; CHECK-NEXT:    ptrue p0.h, vl8
819; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
820; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
821; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
822; CHECK-NEXT:    ret
823;
824; NONEON-NOSVE-LABEL: fmaxv_v8f16:
825; NONEON-NOSVE:       // %bb.0:
826; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
827; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
828; NONEON-NOSVE-NEXT:    ldr h0, [sp]
829; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
830; NONEON-NOSVE-NEXT:    fcvt s1, h1
831; NONEON-NOSVE-NEXT:    fcvt s0, h0
832; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
833; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
834; NONEON-NOSVE-NEXT:    fcvt s1, h1
835; NONEON-NOSVE-NEXT:    fcvt h0, s0
836; NONEON-NOSVE-NEXT:    fcvt s0, h0
837; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
838; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
839; NONEON-NOSVE-NEXT:    fcvt s1, h1
840; NONEON-NOSVE-NEXT:    fcvt h0, s0
841; NONEON-NOSVE-NEXT:    fcvt s0, h0
842; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
843; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
844; NONEON-NOSVE-NEXT:    fcvt s1, h1
845; NONEON-NOSVE-NEXT:    fcvt h0, s0
846; NONEON-NOSVE-NEXT:    fcvt s0, h0
847; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
848; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
849; NONEON-NOSVE-NEXT:    fcvt s1, h1
850; NONEON-NOSVE-NEXT:    fcvt h0, s0
851; NONEON-NOSVE-NEXT:    fcvt s0, h0
852; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
853; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
854; NONEON-NOSVE-NEXT:    fcvt s1, h1
855; NONEON-NOSVE-NEXT:    fcvt h0, s0
856; NONEON-NOSVE-NEXT:    fcvt s0, h0
857; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
858; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
859; NONEON-NOSVE-NEXT:    fcvt s1, h1
860; NONEON-NOSVE-NEXT:    fcvt h0, s0
861; NONEON-NOSVE-NEXT:    fcvt s0, h0
862; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
863; NONEON-NOSVE-NEXT:    fcvt h0, s0
864; NONEON-NOSVE-NEXT:    add sp, sp, #16
865; NONEON-NOSVE-NEXT:    ret
866  %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
867  ret half %res
868}
869
870define half @fmaxv_v16f16(ptr %a) {
871; CHECK-LABEL: fmaxv_v16f16:
872; CHECK:       // %bb.0:
873; CHECK-NEXT:    ldp q1, q0, [x0]
874; CHECK-NEXT:    ptrue p0.h, vl8
875; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
876; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
877; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
878; CHECK-NEXT:    ret
879;
880; NONEON-NOSVE-LABEL: fmaxv_v16f16:
881; NONEON-NOSVE:       // %bb.0:
882; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
883; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
884; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
885; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
886; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
887; NONEON-NOSVE-NEXT:    ldr h2, [sp, #16]
888; NONEON-NOSVE-NEXT:    ldr h3, [sp]
889; NONEON-NOSVE-NEXT:    fcvt s0, h0
890; NONEON-NOSVE-NEXT:    fcvt s1, h1
891; NONEON-NOSVE-NEXT:    fcvt s2, h2
892; NONEON-NOSVE-NEXT:    fcvt s3, h3
893; NONEON-NOSVE-NEXT:    fmaxnm s0, s1, s0
894; NONEON-NOSVE-NEXT:    fmaxnm s1, s3, s2
895; NONEON-NOSVE-NEXT:    ldr h2, [sp, #20]
896; NONEON-NOSVE-NEXT:    ldr h3, [sp, #4]
897; NONEON-NOSVE-NEXT:    fcvt s2, h2
898; NONEON-NOSVE-NEXT:    fcvt s3, h3
899; NONEON-NOSVE-NEXT:    fcvt h0, s0
900; NONEON-NOSVE-NEXT:    fcvt h1, s1
901; NONEON-NOSVE-NEXT:    fmaxnm s2, s3, s2
902; NONEON-NOSVE-NEXT:    ldr h3, [sp, #6]
903; NONEON-NOSVE-NEXT:    fcvt s0, h0
904; NONEON-NOSVE-NEXT:    fcvt s1, h1
905; NONEON-NOSVE-NEXT:    fcvt s3, h3
906; NONEON-NOSVE-NEXT:    fmaxnm s0, s1, s0
907; NONEON-NOSVE-NEXT:    fcvt h1, s2
908; NONEON-NOSVE-NEXT:    ldr h2, [sp, #22]
909; NONEON-NOSVE-NEXT:    fcvt s2, h2
910; NONEON-NOSVE-NEXT:    fcvt h0, s0
911; NONEON-NOSVE-NEXT:    fcvt s1, h1
912; NONEON-NOSVE-NEXT:    fmaxnm s2, s3, s2
913; NONEON-NOSVE-NEXT:    ldr h3, [sp, #8]
914; NONEON-NOSVE-NEXT:    fcvt s0, h0
915; NONEON-NOSVE-NEXT:    fcvt s3, h3
916; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
917; NONEON-NOSVE-NEXT:    fcvt h1, s2
918; NONEON-NOSVE-NEXT:    ldr h2, [sp, #24]
919; NONEON-NOSVE-NEXT:    fcvt s2, h2
920; NONEON-NOSVE-NEXT:    fcvt h0, s0
921; NONEON-NOSVE-NEXT:    fcvt s1, h1
922; NONEON-NOSVE-NEXT:    fmaxnm s2, s3, s2
923; NONEON-NOSVE-NEXT:    ldr h3, [sp, #10]
924; NONEON-NOSVE-NEXT:    fcvt s0, h0
925; NONEON-NOSVE-NEXT:    fcvt s3, h3
926; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
927; NONEON-NOSVE-NEXT:    fcvt h1, s2
928; NONEON-NOSVE-NEXT:    ldr h2, [sp, #26]
929; NONEON-NOSVE-NEXT:    fcvt s2, h2
930; NONEON-NOSVE-NEXT:    fcvt h0, s0
931; NONEON-NOSVE-NEXT:    fcvt s1, h1
932; NONEON-NOSVE-NEXT:    fmaxnm s2, s3, s2
933; NONEON-NOSVE-NEXT:    ldr h3, [sp, #12]
934; NONEON-NOSVE-NEXT:    fcvt s0, h0
935; NONEON-NOSVE-NEXT:    fcvt s3, h3
936; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
937; NONEON-NOSVE-NEXT:    fcvt h1, s2
938; NONEON-NOSVE-NEXT:    ldr h2, [sp, #28]
939; NONEON-NOSVE-NEXT:    fcvt s2, h2
940; NONEON-NOSVE-NEXT:    fcvt h0, s0
941; NONEON-NOSVE-NEXT:    fcvt s1, h1
942; NONEON-NOSVE-NEXT:    fcvt s0, h0
943; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
944; NONEON-NOSVE-NEXT:    fmaxnm s1, s3, s2
945; NONEON-NOSVE-NEXT:    ldr h2, [sp, #30]
946; NONEON-NOSVE-NEXT:    ldr h3, [sp, #14]
947; NONEON-NOSVE-NEXT:    fcvt s2, h2
948; NONEON-NOSVE-NEXT:    fcvt s3, h3
949; NONEON-NOSVE-NEXT:    fcvt h0, s0
950; NONEON-NOSVE-NEXT:    fcvt h1, s1
951; NONEON-NOSVE-NEXT:    fcvt s0, h0
952; NONEON-NOSVE-NEXT:    fcvt s1, h1
953; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
954; NONEON-NOSVE-NEXT:    fmaxnm s1, s3, s2
955; NONEON-NOSVE-NEXT:    fcvt h0, s0
956; NONEON-NOSVE-NEXT:    fcvt h1, s1
957; NONEON-NOSVE-NEXT:    fcvt s0, h0
958; NONEON-NOSVE-NEXT:    fcvt s1, h1
959; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
960; NONEON-NOSVE-NEXT:    fcvt h0, s0
961; NONEON-NOSVE-NEXT:    add sp, sp, #32
962; NONEON-NOSVE-NEXT:    ret
963  %op = load <16 x half>, ptr %a
964  %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
965  ret half %res
966}
967
968define float @fmaxv_v2f32(<2 x float> %a) {
969; CHECK-LABEL: fmaxv_v2f32:
970; CHECK:       // %bb.0:
971; CHECK-NEXT:    ptrue p0.s, vl2
972; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
973; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
974; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
975; CHECK-NEXT:    ret
976;
977; NONEON-NOSVE-LABEL: fmaxv_v2f32:
978; NONEON-NOSVE:       // %bb.0:
979; NONEON-NOSVE-NEXT:    sub sp, sp, #16
980; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
981; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
982; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #8]
983; NONEON-NOSVE-NEXT:    fmaxnm s0, s1, s0
984; NONEON-NOSVE-NEXT:    add sp, sp, #16
985; NONEON-NOSVE-NEXT:    ret
986  %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
987  ret float %res
988}
989
990define float @fmaxv_v4f32(<4 x float> %a) {
991; CHECK-LABEL: fmaxv_v4f32:
992; CHECK:       // %bb.0:
993; CHECK-NEXT:    ptrue p0.s, vl4
994; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
995; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
996; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
997; CHECK-NEXT:    ret
998;
999; NONEON-NOSVE-LABEL: fmaxv_v4f32:
1000; NONEON-NOSVE:       // %bb.0:
1001; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1002; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1003; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp]
1004; NONEON-NOSVE-NEXT:    fmaxnm s0, s1, s0
1005; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
1006; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s2
1007; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
1008; NONEON-NOSVE-NEXT:    add sp, sp, #16
1009; NONEON-NOSVE-NEXT:    ret
1010  %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
1011  ret float %res
1012}
1013
1014define float @fmaxv_v8f32(ptr %a) {
1015; CHECK-LABEL: fmaxv_v8f32:
1016; CHECK:       // %bb.0:
1017; CHECK-NEXT:    ldp q1, q0, [x0]
1018; CHECK-NEXT:    ptrue p0.s, vl4
1019; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
1020; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
1021; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1022; CHECK-NEXT:    ret
1023;
1024; NONEON-NOSVE-LABEL: fmaxv_v8f32:
1025; NONEON-NOSVE:       // %bb.0:
1026; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1027; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1028; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1029; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #16]
1030; NONEON-NOSVE-NEXT:    ldp s3, s2, [sp]
1031; NONEON-NOSVE-NEXT:    fmaxnm s0, s2, s0
1032; NONEON-NOSVE-NEXT:    fmaxnm s1, s3, s1
1033; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
1034; NONEON-NOSVE-NEXT:    fmaxnm s0, s1, s0
1035; NONEON-NOSVE-NEXT:    ldp s3, s1, [sp, #24]
1036; NONEON-NOSVE-NEXT:    fmaxnm s2, s2, s3
1037; NONEON-NOSVE-NEXT:    fmaxnm s1, s4, s1
1038; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s2
1039; NONEON-NOSVE-NEXT:    fmaxnm s0, s0, s1
1040; NONEON-NOSVE-NEXT:    add sp, sp, #32
1041; NONEON-NOSVE-NEXT:    ret
1042  %op = load <8 x float>, ptr %a
1043  %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
1044  ret float %res
1045}
1046
1047define double @fmaxv_v1f64(<1 x double> %a) {
1048; CHECK-LABEL: fmaxv_v1f64:
1049; CHECK:       // %bb.0:
1050; CHECK-NEXT:    ret
1051;
1052; NONEON-NOSVE-LABEL: fmaxv_v1f64:
1053; NONEON-NOSVE:       // %bb.0:
1054; NONEON-NOSVE-NEXT:    ret
1055  %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
1056  ret double %res
1057}
1058
1059define double @fmaxv_v2f64(<2 x double> %a) {
1060; CHECK-LABEL: fmaxv_v2f64:
1061; CHECK:       // %bb.0:
1062; CHECK-NEXT:    ptrue p0.d, vl2
1063; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1064; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
1065; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1066; CHECK-NEXT:    ret
1067;
1068; NONEON-NOSVE-LABEL: fmaxv_v2f64:
1069; NONEON-NOSVE:       // %bb.0:
1070; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1071; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1072; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp], #16
1073; NONEON-NOSVE-NEXT:    fmaxnm d0, d1, d0
1074; NONEON-NOSVE-NEXT:    ret
1075  %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
1076  ret double %res
1077}
1078
1079define double @fmaxv_v4f64(ptr %a) {
1080; CHECK-LABEL: fmaxv_v4f64:
1081; CHECK:       // %bb.0:
1082; CHECK-NEXT:    ldp q1, q0, [x0]
1083; CHECK-NEXT:    ptrue p0.d, vl2
1084; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
1085; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
1086; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1087; CHECK-NEXT:    ret
1088;
1089; NONEON-NOSVE-LABEL: fmaxv_v4f64:
1090; NONEON-NOSVE:       // %bb.0:
1091; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1092; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1093; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1094; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
1095; NONEON-NOSVE-NEXT:    ldp d3, d2, [sp], #32
1096; NONEON-NOSVE-NEXT:    fmaxnm d0, d2, d0
1097; NONEON-NOSVE-NEXT:    fmaxnm d1, d3, d1
1098; NONEON-NOSVE-NEXT:    fmaxnm d0, d1, d0
1099; NONEON-NOSVE-NEXT:    ret
1100  %op = load <4 x double>, ptr %a
1101  %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
1102  ret double %res
1103}
1104
1105;
1106; FMINNMV
1107;
1108
1109define half @fminv_v4f16(<4 x half> %a) {
1110; CHECK-LABEL: fminv_v4f16:
1111; CHECK:       // %bb.0:
1112; CHECK-NEXT:    ptrue p0.h, vl4
1113; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1114; CHECK-NEXT:    fminnmv h0, p0, z0.h
1115; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1116; CHECK-NEXT:    ret
1117;
1118; NONEON-NOSVE-LABEL: fminv_v4f16:
1119; NONEON-NOSVE:       // %bb.0:
1120; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1121; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1122; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1123; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1124; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1125; NONEON-NOSVE-NEXT:    fcvt s1, h1
1126; NONEON-NOSVE-NEXT:    fcvt s0, h0
1127; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1128; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1129; NONEON-NOSVE-NEXT:    fcvt s1, h1
1130; NONEON-NOSVE-NEXT:    fcvt h0, s0
1131; NONEON-NOSVE-NEXT:    fcvt s0, h0
1132; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1133; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1134; NONEON-NOSVE-NEXT:    fcvt s1, h1
1135; NONEON-NOSVE-NEXT:    fcvt h0, s0
1136; NONEON-NOSVE-NEXT:    fcvt s0, h0
1137; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1138; NONEON-NOSVE-NEXT:    fcvt h0, s0
1139; NONEON-NOSVE-NEXT:    add sp, sp, #16
1140; NONEON-NOSVE-NEXT:    ret
1141  %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
1142  ret half %res
1143}
1144
1145define half @fminv_v8f16(<8 x half> %a) {
1146; CHECK-LABEL: fminv_v8f16:
1147; CHECK:       // %bb.0:
1148; CHECK-NEXT:    ptrue p0.h, vl8
1149; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1150; CHECK-NEXT:    fminnmv h0, p0, z0.h
1151; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1152; CHECK-NEXT:    ret
1153;
1154; NONEON-NOSVE-LABEL: fminv_v8f16:
1155; NONEON-NOSVE:       // %bb.0:
1156; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1157; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1158; NONEON-NOSVE-NEXT:    ldr h0, [sp]
1159; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1160; NONEON-NOSVE-NEXT:    fcvt s1, h1
1161; NONEON-NOSVE-NEXT:    fcvt s0, h0
1162; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1163; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1164; NONEON-NOSVE-NEXT:    fcvt s1, h1
1165; NONEON-NOSVE-NEXT:    fcvt h0, s0
1166; NONEON-NOSVE-NEXT:    fcvt s0, h0
1167; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1168; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1169; NONEON-NOSVE-NEXT:    fcvt s1, h1
1170; NONEON-NOSVE-NEXT:    fcvt h0, s0
1171; NONEON-NOSVE-NEXT:    fcvt s0, h0
1172; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1173; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1174; NONEON-NOSVE-NEXT:    fcvt s1, h1
1175; NONEON-NOSVE-NEXT:    fcvt h0, s0
1176; NONEON-NOSVE-NEXT:    fcvt s0, h0
1177; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1178; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1179; NONEON-NOSVE-NEXT:    fcvt s1, h1
1180; NONEON-NOSVE-NEXT:    fcvt h0, s0
1181; NONEON-NOSVE-NEXT:    fcvt s0, h0
1182; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1183; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1184; NONEON-NOSVE-NEXT:    fcvt s1, h1
1185; NONEON-NOSVE-NEXT:    fcvt h0, s0
1186; NONEON-NOSVE-NEXT:    fcvt s0, h0
1187; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1188; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1189; NONEON-NOSVE-NEXT:    fcvt s1, h1
1190; NONEON-NOSVE-NEXT:    fcvt h0, s0
1191; NONEON-NOSVE-NEXT:    fcvt s0, h0
1192; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1193; NONEON-NOSVE-NEXT:    fcvt h0, s0
1194; NONEON-NOSVE-NEXT:    add sp, sp, #16
1195; NONEON-NOSVE-NEXT:    ret
1196  %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
1197  ret half %res
1198}
1199
1200define half @fminv_v16f16(ptr %a) {
1201; CHECK-LABEL: fminv_v16f16:
1202; CHECK:       // %bb.0:
1203; CHECK-NEXT:    ldp q1, q0, [x0]
1204; CHECK-NEXT:    ptrue p0.h, vl8
1205; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
1206; CHECK-NEXT:    fminnmv h0, p0, z0.h
1207; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1208; CHECK-NEXT:    ret
1209;
1210; NONEON-NOSVE-LABEL: fminv_v16f16:
1211; NONEON-NOSVE:       // %bb.0:
1212; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1213; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1214; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1215; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1216; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1217; NONEON-NOSVE-NEXT:    ldr h2, [sp, #16]
1218; NONEON-NOSVE-NEXT:    ldr h3, [sp]
1219; NONEON-NOSVE-NEXT:    fcvt s0, h0
1220; NONEON-NOSVE-NEXT:    fcvt s1, h1
1221; NONEON-NOSVE-NEXT:    fcvt s2, h2
1222; NONEON-NOSVE-NEXT:    fcvt s3, h3
1223; NONEON-NOSVE-NEXT:    fminnm s0, s1, s0
1224; NONEON-NOSVE-NEXT:    fminnm s1, s3, s2
1225; NONEON-NOSVE-NEXT:    ldr h2, [sp, #20]
1226; NONEON-NOSVE-NEXT:    ldr h3, [sp, #4]
1227; NONEON-NOSVE-NEXT:    fcvt s2, h2
1228; NONEON-NOSVE-NEXT:    fcvt s3, h3
1229; NONEON-NOSVE-NEXT:    fcvt h0, s0
1230; NONEON-NOSVE-NEXT:    fcvt h1, s1
1231; NONEON-NOSVE-NEXT:    fminnm s2, s3, s2
1232; NONEON-NOSVE-NEXT:    ldr h3, [sp, #6]
1233; NONEON-NOSVE-NEXT:    fcvt s0, h0
1234; NONEON-NOSVE-NEXT:    fcvt s1, h1
1235; NONEON-NOSVE-NEXT:    fcvt s3, h3
1236; NONEON-NOSVE-NEXT:    fminnm s0, s1, s0
1237; NONEON-NOSVE-NEXT:    fcvt h1, s2
1238; NONEON-NOSVE-NEXT:    ldr h2, [sp, #22]
1239; NONEON-NOSVE-NEXT:    fcvt s2, h2
1240; NONEON-NOSVE-NEXT:    fcvt h0, s0
1241; NONEON-NOSVE-NEXT:    fcvt s1, h1
1242; NONEON-NOSVE-NEXT:    fminnm s2, s3, s2
1243; NONEON-NOSVE-NEXT:    ldr h3, [sp, #8]
1244; NONEON-NOSVE-NEXT:    fcvt s0, h0
1245; NONEON-NOSVE-NEXT:    fcvt s3, h3
1246; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1247; NONEON-NOSVE-NEXT:    fcvt h1, s2
1248; NONEON-NOSVE-NEXT:    ldr h2, [sp, #24]
1249; NONEON-NOSVE-NEXT:    fcvt s2, h2
1250; NONEON-NOSVE-NEXT:    fcvt h0, s0
1251; NONEON-NOSVE-NEXT:    fcvt s1, h1
1252; NONEON-NOSVE-NEXT:    fminnm s2, s3, s2
1253; NONEON-NOSVE-NEXT:    ldr h3, [sp, #10]
1254; NONEON-NOSVE-NEXT:    fcvt s0, h0
1255; NONEON-NOSVE-NEXT:    fcvt s3, h3
1256; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1257; NONEON-NOSVE-NEXT:    fcvt h1, s2
1258; NONEON-NOSVE-NEXT:    ldr h2, [sp, #26]
1259; NONEON-NOSVE-NEXT:    fcvt s2, h2
1260; NONEON-NOSVE-NEXT:    fcvt h0, s0
1261; NONEON-NOSVE-NEXT:    fcvt s1, h1
1262; NONEON-NOSVE-NEXT:    fminnm s2, s3, s2
1263; NONEON-NOSVE-NEXT:    ldr h3, [sp, #12]
1264; NONEON-NOSVE-NEXT:    fcvt s0, h0
1265; NONEON-NOSVE-NEXT:    fcvt s3, h3
1266; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1267; NONEON-NOSVE-NEXT:    fcvt h1, s2
1268; NONEON-NOSVE-NEXT:    ldr h2, [sp, #28]
1269; NONEON-NOSVE-NEXT:    fcvt s2, h2
1270; NONEON-NOSVE-NEXT:    fcvt h0, s0
1271; NONEON-NOSVE-NEXT:    fcvt s1, h1
1272; NONEON-NOSVE-NEXT:    fcvt s0, h0
1273; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1274; NONEON-NOSVE-NEXT:    fminnm s1, s3, s2
1275; NONEON-NOSVE-NEXT:    ldr h2, [sp, #30]
1276; NONEON-NOSVE-NEXT:    ldr h3, [sp, #14]
1277; NONEON-NOSVE-NEXT:    fcvt s2, h2
1278; NONEON-NOSVE-NEXT:    fcvt s3, h3
1279; NONEON-NOSVE-NEXT:    fcvt h0, s0
1280; NONEON-NOSVE-NEXT:    fcvt h1, s1
1281; NONEON-NOSVE-NEXT:    fcvt s0, h0
1282; NONEON-NOSVE-NEXT:    fcvt s1, h1
1283; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1284; NONEON-NOSVE-NEXT:    fminnm s1, s3, s2
1285; NONEON-NOSVE-NEXT:    fcvt h0, s0
1286; NONEON-NOSVE-NEXT:    fcvt h1, s1
1287; NONEON-NOSVE-NEXT:    fcvt s0, h0
1288; NONEON-NOSVE-NEXT:    fcvt s1, h1
1289; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1290; NONEON-NOSVE-NEXT:    fcvt h0, s0
1291; NONEON-NOSVE-NEXT:    add sp, sp, #32
1292; NONEON-NOSVE-NEXT:    ret
1293  %op = load <16 x half>, ptr %a
1294  %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
1295  ret half %res
1296}
1297
1298define float @fminv_v2f32(<2 x float> %a) {
1299; CHECK-LABEL: fminv_v2f32:
1300; CHECK:       // %bb.0:
1301; CHECK-NEXT:    ptrue p0.s, vl2
1302; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1303; CHECK-NEXT:    fminnmv s0, p0, z0.s
1304; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1305; CHECK-NEXT:    ret
1306;
1307; NONEON-NOSVE-LABEL: fminv_v2f32:
1308; NONEON-NOSVE:       // %bb.0:
1309; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1310; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1311; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1312; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #8]
1313; NONEON-NOSVE-NEXT:    fminnm s0, s1, s0
1314; NONEON-NOSVE-NEXT:    add sp, sp, #16
1315; NONEON-NOSVE-NEXT:    ret
1316  %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
1317  ret float %res
1318}
1319
1320define float @fminv_v4f32(<4 x float> %a) {
1321; CHECK-LABEL: fminv_v4f32:
1322; CHECK:       // %bb.0:
1323; CHECK-NEXT:    ptrue p0.s, vl4
1324; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1325; CHECK-NEXT:    fminnmv s0, p0, z0.s
1326; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1327; CHECK-NEXT:    ret
1328;
1329; NONEON-NOSVE-LABEL: fminv_v4f32:
1330; NONEON-NOSVE:       // %bb.0:
1331; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1332; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1333; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp]
1334; NONEON-NOSVE-NEXT:    fminnm s0, s1, s0
1335; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
1336; NONEON-NOSVE-NEXT:    fminnm s0, s0, s2
1337; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1338; NONEON-NOSVE-NEXT:    add sp, sp, #16
1339; NONEON-NOSVE-NEXT:    ret
1340  %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
1341  ret float %res
1342}
1343
1344define float @fminv_v8f32(ptr %a) {
1345; CHECK-LABEL: fminv_v8f32:
1346; CHECK:       // %bb.0:
1347; CHECK-NEXT:    ldp q1, q0, [x0]
1348; CHECK-NEXT:    ptrue p0.s, vl4
1349; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
1350; CHECK-NEXT:    fminnmv s0, p0, z0.s
1351; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1352; CHECK-NEXT:    ret
1353;
1354; NONEON-NOSVE-LABEL: fminv_v8f32:
1355; NONEON-NOSVE:       // %bb.0:
1356; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1357; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1358; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1359; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #16]
1360; NONEON-NOSVE-NEXT:    ldp s3, s2, [sp]
1361; NONEON-NOSVE-NEXT:    fminnm s0, s2, s0
1362; NONEON-NOSVE-NEXT:    fminnm s1, s3, s1
1363; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
1364; NONEON-NOSVE-NEXT:    fminnm s0, s1, s0
1365; NONEON-NOSVE-NEXT:    ldp s3, s1, [sp, #24]
1366; NONEON-NOSVE-NEXT:    fminnm s2, s2, s3
1367; NONEON-NOSVE-NEXT:    fminnm s1, s4, s1
1368; NONEON-NOSVE-NEXT:    fminnm s0, s0, s2
1369; NONEON-NOSVE-NEXT:    fminnm s0, s0, s1
1370; NONEON-NOSVE-NEXT:    add sp, sp, #32
1371; NONEON-NOSVE-NEXT:    ret
1372  %op = load <8 x float>, ptr %a
1373  %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
1374  ret float %res
1375}
1376
1377define double @fminv_v1f64(<1 x double> %a) {
1378; CHECK-LABEL: fminv_v1f64:
1379; CHECK:       // %bb.0:
1380; CHECK-NEXT:    ret
1381;
1382; NONEON-NOSVE-LABEL: fminv_v1f64:
1383; NONEON-NOSVE:       // %bb.0:
1384; NONEON-NOSVE-NEXT:    ret
1385  %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
1386  ret double %res
1387}
1388
1389define double @fminv_v2f64(<2 x double> %a) {
1390; CHECK-LABEL: fminv_v2f64:
1391; CHECK:       // %bb.0:
1392; CHECK-NEXT:    ptrue p0.d, vl2
1393; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1394; CHECK-NEXT:    fminnmv d0, p0, z0.d
1395; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1396; CHECK-NEXT:    ret
1397;
1398; NONEON-NOSVE-LABEL: fminv_v2f64:
1399; NONEON-NOSVE:       // %bb.0:
1400; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1401; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1402; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp], #16
1403; NONEON-NOSVE-NEXT:    fminnm d0, d1, d0
1404; NONEON-NOSVE-NEXT:    ret
1405  %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
1406  ret double %res
1407}
1408
1409define double @fminv_v4f64(ptr %a) {
1410; CHECK-LABEL: fminv_v4f64:
1411; CHECK:       // %bb.0:
1412; CHECK-NEXT:    ldp q1, q0, [x0]
1413; CHECK-NEXT:    ptrue p0.d, vl2
1414; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
1415; CHECK-NEXT:    fminnmv d0, p0, z0.d
1416; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1417; CHECK-NEXT:    ret
1418;
1419; NONEON-NOSVE-LABEL: fminv_v4f64:
1420; NONEON-NOSVE:       // %bb.0:
1421; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1422; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1423; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1424; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
1425; NONEON-NOSVE-NEXT:    ldp d3, d2, [sp], #32
1426; NONEON-NOSVE-NEXT:    fminnm d0, d2, d0
1427; NONEON-NOSVE-NEXT:    fminnm d1, d3, d1
1428; NONEON-NOSVE-NEXT:    fminnm d0, d1, d0
1429; NONEON-NOSVE-NEXT:    ret
1430  %op = load <4 x double>, ptr %a
1431  %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
1432  ret double %res
1433}
1434
1435;
1436; FMAXV
1437;
1438
1439define half @fmaximumv_v4f16(<4 x half> %a) {
1440; CHECK-LABEL: fmaximumv_v4f16:
1441; CHECK:       // %bb.0:
1442; CHECK-NEXT:    ptrue p0.h, vl4
1443; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1444; CHECK-NEXT:    fmaxv h0, p0, z0.h
1445; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1446; CHECK-NEXT:    ret
1447;
1448; NONEON-NOSVE-LABEL: fmaximumv_v4f16:
1449; NONEON-NOSVE:       // %bb.0:
1450; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1451; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1452; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1453; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1454; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1455; NONEON-NOSVE-NEXT:    fcvt s1, h1
1456; NONEON-NOSVE-NEXT:    fcvt s0, h0
1457; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1458; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1459; NONEON-NOSVE-NEXT:    fcvt s1, h1
1460; NONEON-NOSVE-NEXT:    fcvt h0, s0
1461; NONEON-NOSVE-NEXT:    fcvt s0, h0
1462; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1463; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1464; NONEON-NOSVE-NEXT:    fcvt s1, h1
1465; NONEON-NOSVE-NEXT:    fcvt h0, s0
1466; NONEON-NOSVE-NEXT:    fcvt s0, h0
1467; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1468; NONEON-NOSVE-NEXT:    fcvt h0, s0
1469; NONEON-NOSVE-NEXT:    add sp, sp, #16
1470; NONEON-NOSVE-NEXT:    ret
1471  %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
1472  ret half %res
1473}
1474
1475define half @fmaximumv_v8f16(<8 x half> %a) {
1476; CHECK-LABEL: fmaximumv_v8f16:
1477; CHECK:       // %bb.0:
1478; CHECK-NEXT:    ptrue p0.h, vl8
1479; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1480; CHECK-NEXT:    fmaxv h0, p0, z0.h
1481; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1482; CHECK-NEXT:    ret
1483;
1484; NONEON-NOSVE-LABEL: fmaximumv_v8f16:
1485; NONEON-NOSVE:       // %bb.0:
1486; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1487; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1488; NONEON-NOSVE-NEXT:    ldr h0, [sp]
1489; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1490; NONEON-NOSVE-NEXT:    fcvt s1, h1
1491; NONEON-NOSVE-NEXT:    fcvt s0, h0
1492; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1493; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1494; NONEON-NOSVE-NEXT:    fcvt s1, h1
1495; NONEON-NOSVE-NEXT:    fcvt h0, s0
1496; NONEON-NOSVE-NEXT:    fcvt s0, h0
1497; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1498; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1499; NONEON-NOSVE-NEXT:    fcvt s1, h1
1500; NONEON-NOSVE-NEXT:    fcvt h0, s0
1501; NONEON-NOSVE-NEXT:    fcvt s0, h0
1502; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1503; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1504; NONEON-NOSVE-NEXT:    fcvt s1, h1
1505; NONEON-NOSVE-NEXT:    fcvt h0, s0
1506; NONEON-NOSVE-NEXT:    fcvt s0, h0
1507; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1508; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1509; NONEON-NOSVE-NEXT:    fcvt s1, h1
1510; NONEON-NOSVE-NEXT:    fcvt h0, s0
1511; NONEON-NOSVE-NEXT:    fcvt s0, h0
1512; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1513; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1514; NONEON-NOSVE-NEXT:    fcvt s1, h1
1515; NONEON-NOSVE-NEXT:    fcvt h0, s0
1516; NONEON-NOSVE-NEXT:    fcvt s0, h0
1517; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1518; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1519; NONEON-NOSVE-NEXT:    fcvt s1, h1
1520; NONEON-NOSVE-NEXT:    fcvt h0, s0
1521; NONEON-NOSVE-NEXT:    fcvt s0, h0
1522; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1523; NONEON-NOSVE-NEXT:    fcvt h0, s0
1524; NONEON-NOSVE-NEXT:    add sp, sp, #16
1525; NONEON-NOSVE-NEXT:    ret
1526  %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
1527  ret half %res
1528}
1529
1530define half @fmaximumv_v16f16(ptr %a) {
1531; CHECK-LABEL: fmaximumv_v16f16:
1532; CHECK:       // %bb.0:
1533; CHECK-NEXT:    ldp q1, q0, [x0]
1534; CHECK-NEXT:    ptrue p0.h, vl8
1535; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
1536; CHECK-NEXT:    fmaxv h0, p0, z0.h
1537; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1538; CHECK-NEXT:    ret
1539;
1540; NONEON-NOSVE-LABEL: fmaximumv_v16f16:
1541; NONEON-NOSVE:       // %bb.0:
1542; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1543; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1544; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1545; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1546; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1547; NONEON-NOSVE-NEXT:    ldr h2, [sp, #16]
1548; NONEON-NOSVE-NEXT:    ldr h3, [sp]
1549; NONEON-NOSVE-NEXT:    fcvt s0, h0
1550; NONEON-NOSVE-NEXT:    fcvt s1, h1
1551; NONEON-NOSVE-NEXT:    fcvt s2, h2
1552; NONEON-NOSVE-NEXT:    fcvt s3, h3
1553; NONEON-NOSVE-NEXT:    fmax s0, s1, s0
1554; NONEON-NOSVE-NEXT:    fmax s1, s3, s2
1555; NONEON-NOSVE-NEXT:    ldr h2, [sp, #20]
1556; NONEON-NOSVE-NEXT:    ldr h3, [sp, #4]
1557; NONEON-NOSVE-NEXT:    fcvt s2, h2
1558; NONEON-NOSVE-NEXT:    fcvt s3, h3
1559; NONEON-NOSVE-NEXT:    fcvt h0, s0
1560; NONEON-NOSVE-NEXT:    fcvt h1, s1
1561; NONEON-NOSVE-NEXT:    fmax s2, s3, s2
1562; NONEON-NOSVE-NEXT:    ldr h3, [sp, #6]
1563; NONEON-NOSVE-NEXT:    fcvt s0, h0
1564; NONEON-NOSVE-NEXT:    fcvt s1, h1
1565; NONEON-NOSVE-NEXT:    fcvt s3, h3
1566; NONEON-NOSVE-NEXT:    fmax s0, s1, s0
1567; NONEON-NOSVE-NEXT:    fcvt h1, s2
1568; NONEON-NOSVE-NEXT:    ldr h2, [sp, #22]
1569; NONEON-NOSVE-NEXT:    fcvt s2, h2
1570; NONEON-NOSVE-NEXT:    fcvt h0, s0
1571; NONEON-NOSVE-NEXT:    fcvt s1, h1
1572; NONEON-NOSVE-NEXT:    fmax s2, s3, s2
1573; NONEON-NOSVE-NEXT:    ldr h3, [sp, #8]
1574; NONEON-NOSVE-NEXT:    fcvt s0, h0
1575; NONEON-NOSVE-NEXT:    fcvt s3, h3
1576; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1577; NONEON-NOSVE-NEXT:    fcvt h1, s2
1578; NONEON-NOSVE-NEXT:    ldr h2, [sp, #24]
1579; NONEON-NOSVE-NEXT:    fcvt s2, h2
1580; NONEON-NOSVE-NEXT:    fcvt h0, s0
1581; NONEON-NOSVE-NEXT:    fcvt s1, h1
1582; NONEON-NOSVE-NEXT:    fmax s2, s3, s2
1583; NONEON-NOSVE-NEXT:    ldr h3, [sp, #10]
1584; NONEON-NOSVE-NEXT:    fcvt s0, h0
1585; NONEON-NOSVE-NEXT:    fcvt s3, h3
1586; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1587; NONEON-NOSVE-NEXT:    fcvt h1, s2
1588; NONEON-NOSVE-NEXT:    ldr h2, [sp, #26]
1589; NONEON-NOSVE-NEXT:    fcvt s2, h2
1590; NONEON-NOSVE-NEXT:    fcvt h0, s0
1591; NONEON-NOSVE-NEXT:    fcvt s1, h1
1592; NONEON-NOSVE-NEXT:    fmax s2, s3, s2
1593; NONEON-NOSVE-NEXT:    ldr h3, [sp, #12]
1594; NONEON-NOSVE-NEXT:    fcvt s0, h0
1595; NONEON-NOSVE-NEXT:    fcvt s3, h3
1596; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1597; NONEON-NOSVE-NEXT:    fcvt h1, s2
1598; NONEON-NOSVE-NEXT:    ldr h2, [sp, #28]
1599; NONEON-NOSVE-NEXT:    fcvt s2, h2
1600; NONEON-NOSVE-NEXT:    fcvt h0, s0
1601; NONEON-NOSVE-NEXT:    fcvt s1, h1
1602; NONEON-NOSVE-NEXT:    fcvt s0, h0
1603; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1604; NONEON-NOSVE-NEXT:    fmax s1, s3, s2
1605; NONEON-NOSVE-NEXT:    ldr h2, [sp, #30]
1606; NONEON-NOSVE-NEXT:    ldr h3, [sp, #14]
1607; NONEON-NOSVE-NEXT:    fcvt s2, h2
1608; NONEON-NOSVE-NEXT:    fcvt s3, h3
1609; NONEON-NOSVE-NEXT:    fcvt h0, s0
1610; NONEON-NOSVE-NEXT:    fcvt h1, s1
1611; NONEON-NOSVE-NEXT:    fcvt s0, h0
1612; NONEON-NOSVE-NEXT:    fcvt s1, h1
1613; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1614; NONEON-NOSVE-NEXT:    fmax s1, s3, s2
1615; NONEON-NOSVE-NEXT:    fcvt h0, s0
1616; NONEON-NOSVE-NEXT:    fcvt h1, s1
1617; NONEON-NOSVE-NEXT:    fcvt s0, h0
1618; NONEON-NOSVE-NEXT:    fcvt s1, h1
1619; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1620; NONEON-NOSVE-NEXT:    fcvt h0, s0
1621; NONEON-NOSVE-NEXT:    add sp, sp, #32
1622; NONEON-NOSVE-NEXT:    ret
1623  %op = load <16 x half>, ptr %a
1624  %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
1625  ret half %res
1626}
1627
1628define float @fmaximumv_v2f32(<2 x float> %a) {
1629; CHECK-LABEL: fmaximumv_v2f32:
1630; CHECK:       // %bb.0:
1631; CHECK-NEXT:    ptrue p0.s, vl2
1632; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1633; CHECK-NEXT:    fmaxv s0, p0, z0.s
1634; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1635; CHECK-NEXT:    ret
1636;
1637; NONEON-NOSVE-LABEL: fmaximumv_v2f32:
1638; NONEON-NOSVE:       // %bb.0:
1639; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1640; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1641; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1642; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #8]
1643; NONEON-NOSVE-NEXT:    fmax s0, s1, s0
1644; NONEON-NOSVE-NEXT:    add sp, sp, #16
1645; NONEON-NOSVE-NEXT:    ret
1646  %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
1647  ret float %res
1648}
1649
1650define float @fmaximumv_v4f32(<4 x float> %a) {
1651; CHECK-LABEL: fmaximumv_v4f32:
1652; CHECK:       // %bb.0:
1653; CHECK-NEXT:    ptrue p0.s, vl4
1654; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1655; CHECK-NEXT:    fmaxv s0, p0, z0.s
1656; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1657; CHECK-NEXT:    ret
1658;
1659; NONEON-NOSVE-LABEL: fmaximumv_v4f32:
1660; NONEON-NOSVE:       // %bb.0:
1661; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1662; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1663; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp]
1664; NONEON-NOSVE-NEXT:    fmax s0, s1, s0
1665; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
1666; NONEON-NOSVE-NEXT:    fmax s0, s0, s2
1667; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1668; NONEON-NOSVE-NEXT:    add sp, sp, #16
1669; NONEON-NOSVE-NEXT:    ret
1670  %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
1671  ret float %res
1672}
1673
1674define float @fmaximumv_v8f32(ptr %a) {
1675; CHECK-LABEL: fmaximumv_v8f32:
1676; CHECK:       // %bb.0:
1677; CHECK-NEXT:    ldp q1, q0, [x0]
1678; CHECK-NEXT:    ptrue p0.s, vl4
1679; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
1680; CHECK-NEXT:    fmaxv s0, p0, z0.s
1681; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1682; CHECK-NEXT:    ret
1683;
1684; NONEON-NOSVE-LABEL: fmaximumv_v8f32:
1685; NONEON-NOSVE:       // %bb.0:
1686; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1687; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1688; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1689; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #16]
1690; NONEON-NOSVE-NEXT:    ldp s3, s2, [sp]
1691; NONEON-NOSVE-NEXT:    fmax s0, s2, s0
1692; NONEON-NOSVE-NEXT:    fmax s1, s3, s1
1693; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
1694; NONEON-NOSVE-NEXT:    fmax s0, s1, s0
1695; NONEON-NOSVE-NEXT:    ldp s3, s1, [sp, #24]
1696; NONEON-NOSVE-NEXT:    fmax s2, s2, s3
1697; NONEON-NOSVE-NEXT:    fmax s1, s4, s1
1698; NONEON-NOSVE-NEXT:    fmax s0, s0, s2
1699; NONEON-NOSVE-NEXT:    fmax s0, s0, s1
1700; NONEON-NOSVE-NEXT:    add sp, sp, #32
1701; NONEON-NOSVE-NEXT:    ret
1702  %op = load <8 x float>, ptr %a
1703  %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
1704  ret float %res
1705}
1706
1707define double @fmaximumv_v1f64(<1 x double> %a) {
1708; CHECK-LABEL: fmaximumv_v1f64:
1709; CHECK:       // %bb.0:
1710; CHECK-NEXT:    ret
1711;
1712; NONEON-NOSVE-LABEL: fmaximumv_v1f64:
1713; NONEON-NOSVE:       // %bb.0:
1714; NONEON-NOSVE-NEXT:    ret
1715  %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
1716  ret double %res
1717}
1718
1719define double @fmaximumv_v2f64(<2 x double> %a) {
1720; CHECK-LABEL: fmaximumv_v2f64:
1721; CHECK:       // %bb.0:
1722; CHECK-NEXT:    ptrue p0.d, vl2
1723; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1724; CHECK-NEXT:    fmaxv d0, p0, z0.d
1725; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1726; CHECK-NEXT:    ret
1727;
1728; NONEON-NOSVE-LABEL: fmaximumv_v2f64:
1729; NONEON-NOSVE:       // %bb.0:
1730; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1731; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1732; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp], #16
1733; NONEON-NOSVE-NEXT:    fmax d0, d1, d0
1734; NONEON-NOSVE-NEXT:    ret
1735  %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
1736  ret double %res
1737}
1738
1739define double @fmaximumv_v4f64(ptr %a) {
1740; CHECK-LABEL: fmaximumv_v4f64:
1741; CHECK:       // %bb.0:
1742; CHECK-NEXT:    ldp q1, q0, [x0]
1743; CHECK-NEXT:    ptrue p0.d, vl2
1744; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
1745; CHECK-NEXT:    fmaxv d0, p0, z0.d
1746; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1747; CHECK-NEXT:    ret
1748;
1749; NONEON-NOSVE-LABEL: fmaximumv_v4f64:
1750; NONEON-NOSVE:       // %bb.0:
1751; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1752; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1753; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1754; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
1755; NONEON-NOSVE-NEXT:    ldp d3, d2, [sp], #32
1756; NONEON-NOSVE-NEXT:    fmax d0, d2, d0
1757; NONEON-NOSVE-NEXT:    fmax d1, d3, d1
1758; NONEON-NOSVE-NEXT:    fmax d0, d1, d0
1759; NONEON-NOSVE-NEXT:    ret
1760  %op = load <4 x double>, ptr %a
1761  %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
1762  ret double %res
1763}
1764
1765;
1766; FMINV
1767;
1768
1769define half @fminimumv_v4f16(<4 x half> %a) {
1770; CHECK-LABEL: fminimumv_v4f16:
1771; CHECK:       // %bb.0:
1772; CHECK-NEXT:    ptrue p0.h, vl4
1773; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1774; CHECK-NEXT:    fminv h0, p0, z0.h
1775; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1776; CHECK-NEXT:    ret
1777;
1778; NONEON-NOSVE-LABEL: fminimumv_v4f16:
1779; NONEON-NOSVE:       // %bb.0:
1780; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1781; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1782; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1783; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1784; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1785; NONEON-NOSVE-NEXT:    fcvt s1, h1
1786; NONEON-NOSVE-NEXT:    fcvt s0, h0
1787; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1788; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1789; NONEON-NOSVE-NEXT:    fcvt s1, h1
1790; NONEON-NOSVE-NEXT:    fcvt h0, s0
1791; NONEON-NOSVE-NEXT:    fcvt s0, h0
1792; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1793; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1794; NONEON-NOSVE-NEXT:    fcvt s1, h1
1795; NONEON-NOSVE-NEXT:    fcvt h0, s0
1796; NONEON-NOSVE-NEXT:    fcvt s0, h0
1797; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1798; NONEON-NOSVE-NEXT:    fcvt h0, s0
1799; NONEON-NOSVE-NEXT:    add sp, sp, #16
1800; NONEON-NOSVE-NEXT:    ret
1801  %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
1802  ret half %res
1803}
1804
1805define half @fminimumv_v8f16(<8 x half> %a) {
1806; CHECK-LABEL: fminimumv_v8f16:
1807; CHECK:       // %bb.0:
1808; CHECK-NEXT:    ptrue p0.h, vl8
1809; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1810; CHECK-NEXT:    fminv h0, p0, z0.h
1811; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1812; CHECK-NEXT:    ret
1813;
1814; NONEON-NOSVE-LABEL: fminimumv_v8f16:
1815; NONEON-NOSVE:       // %bb.0:
1816; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1817; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1818; NONEON-NOSVE-NEXT:    ldr h0, [sp]
1819; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1820; NONEON-NOSVE-NEXT:    fcvt s1, h1
1821; NONEON-NOSVE-NEXT:    fcvt s0, h0
1822; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1823; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1824; NONEON-NOSVE-NEXT:    fcvt s1, h1
1825; NONEON-NOSVE-NEXT:    fcvt h0, s0
1826; NONEON-NOSVE-NEXT:    fcvt s0, h0
1827; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1828; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1829; NONEON-NOSVE-NEXT:    fcvt s1, h1
1830; NONEON-NOSVE-NEXT:    fcvt h0, s0
1831; NONEON-NOSVE-NEXT:    fcvt s0, h0
1832; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1833; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1834; NONEON-NOSVE-NEXT:    fcvt s1, h1
1835; NONEON-NOSVE-NEXT:    fcvt h0, s0
1836; NONEON-NOSVE-NEXT:    fcvt s0, h0
1837; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1838; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1839; NONEON-NOSVE-NEXT:    fcvt s1, h1
1840; NONEON-NOSVE-NEXT:    fcvt h0, s0
1841; NONEON-NOSVE-NEXT:    fcvt s0, h0
1842; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1843; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1844; NONEON-NOSVE-NEXT:    fcvt s1, h1
1845; NONEON-NOSVE-NEXT:    fcvt h0, s0
1846; NONEON-NOSVE-NEXT:    fcvt s0, h0
1847; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1848; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1849; NONEON-NOSVE-NEXT:    fcvt s1, h1
1850; NONEON-NOSVE-NEXT:    fcvt h0, s0
1851; NONEON-NOSVE-NEXT:    fcvt s0, h0
1852; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1853; NONEON-NOSVE-NEXT:    fcvt h0, s0
1854; NONEON-NOSVE-NEXT:    add sp, sp, #16
1855; NONEON-NOSVE-NEXT:    ret
1856  %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
1857  ret half %res
1858}
1859
1860define half @fminimumv_v16f16(ptr %a) {
1861; CHECK-LABEL: fminimumv_v16f16:
1862; CHECK:       // %bb.0:
1863; CHECK-NEXT:    ldp q1, q0, [x0]
1864; CHECK-NEXT:    ptrue p0.h, vl8
1865; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1866; CHECK-NEXT:    fminv h0, p0, z0.h
1867; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1868; CHECK-NEXT:    ret
1869;
1870; NONEON-NOSVE-LABEL: fminimumv_v16f16:
1871; NONEON-NOSVE:       // %bb.0:
1872; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1873; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
1874; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1875; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1876; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1877; NONEON-NOSVE-NEXT:    ldr h2, [sp, #16]
1878; NONEON-NOSVE-NEXT:    ldr h3, [sp]
1879; NONEON-NOSVE-NEXT:    fcvt s0, h0
1880; NONEON-NOSVE-NEXT:    fcvt s1, h1
1881; NONEON-NOSVE-NEXT:    fcvt s2, h2
1882; NONEON-NOSVE-NEXT:    fcvt s3, h3
1883; NONEON-NOSVE-NEXT:    fmin s0, s1, s0
1884; NONEON-NOSVE-NEXT:    fmin s1, s3, s2
1885; NONEON-NOSVE-NEXT:    ldr h2, [sp, #20]
1886; NONEON-NOSVE-NEXT:    ldr h3, [sp, #4]
1887; NONEON-NOSVE-NEXT:    fcvt s2, h2
1888; NONEON-NOSVE-NEXT:    fcvt s3, h3
1889; NONEON-NOSVE-NEXT:    fcvt h0, s0
1890; NONEON-NOSVE-NEXT:    fcvt h1, s1
1891; NONEON-NOSVE-NEXT:    fmin s2, s3, s2
1892; NONEON-NOSVE-NEXT:    ldr h3, [sp, #6]
1893; NONEON-NOSVE-NEXT:    fcvt s0, h0
1894; NONEON-NOSVE-NEXT:    fcvt s1, h1
1895; NONEON-NOSVE-NEXT:    fcvt s3, h3
1896; NONEON-NOSVE-NEXT:    fmin s0, s1, s0
1897; NONEON-NOSVE-NEXT:    fcvt h1, s2
1898; NONEON-NOSVE-NEXT:    ldr h2, [sp, #22]
1899; NONEON-NOSVE-NEXT:    fcvt s2, h2
1900; NONEON-NOSVE-NEXT:    fcvt h0, s0
1901; NONEON-NOSVE-NEXT:    fcvt s1, h1
1902; NONEON-NOSVE-NEXT:    fmin s2, s3, s2
1903; NONEON-NOSVE-NEXT:    ldr h3, [sp, #8]
1904; NONEON-NOSVE-NEXT:    fcvt s0, h0
1905; NONEON-NOSVE-NEXT:    fcvt s3, h3
1906; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1907; NONEON-NOSVE-NEXT:    fcvt h1, s2
1908; NONEON-NOSVE-NEXT:    ldr h2, [sp, #24]
1909; NONEON-NOSVE-NEXT:    fcvt s2, h2
1910; NONEON-NOSVE-NEXT:    fcvt h0, s0
1911; NONEON-NOSVE-NEXT:    fcvt s1, h1
1912; NONEON-NOSVE-NEXT:    fmin s2, s3, s2
1913; NONEON-NOSVE-NEXT:    ldr h3, [sp, #10]
1914; NONEON-NOSVE-NEXT:    fcvt s0, h0
1915; NONEON-NOSVE-NEXT:    fcvt s3, h3
1916; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1917; NONEON-NOSVE-NEXT:    fcvt h1, s2
1918; NONEON-NOSVE-NEXT:    ldr h2, [sp, #26]
1919; NONEON-NOSVE-NEXT:    fcvt s2, h2
1920; NONEON-NOSVE-NEXT:    fcvt h0, s0
1921; NONEON-NOSVE-NEXT:    fcvt s1, h1
1922; NONEON-NOSVE-NEXT:    fmin s2, s3, s2
1923; NONEON-NOSVE-NEXT:    ldr h3, [sp, #12]
1924; NONEON-NOSVE-NEXT:    fcvt s0, h0
1925; NONEON-NOSVE-NEXT:    fcvt s3, h3
1926; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1927; NONEON-NOSVE-NEXT:    fcvt h1, s2
1928; NONEON-NOSVE-NEXT:    ldr h2, [sp, #28]
1929; NONEON-NOSVE-NEXT:    fcvt s2, h2
1930; NONEON-NOSVE-NEXT:    fcvt h0, s0
1931; NONEON-NOSVE-NEXT:    fcvt s1, h1
1932; NONEON-NOSVE-NEXT:    fcvt s0, h0
1933; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1934; NONEON-NOSVE-NEXT:    fmin s1, s3, s2
1935; NONEON-NOSVE-NEXT:    ldr h2, [sp, #30]
1936; NONEON-NOSVE-NEXT:    ldr h3, [sp, #14]
1937; NONEON-NOSVE-NEXT:    fcvt s2, h2
1938; NONEON-NOSVE-NEXT:    fcvt s3, h3
1939; NONEON-NOSVE-NEXT:    fcvt h0, s0
1940; NONEON-NOSVE-NEXT:    fcvt h1, s1
1941; NONEON-NOSVE-NEXT:    fcvt s0, h0
1942; NONEON-NOSVE-NEXT:    fcvt s1, h1
1943; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1944; NONEON-NOSVE-NEXT:    fmin s1, s3, s2
1945; NONEON-NOSVE-NEXT:    fcvt h0, s0
1946; NONEON-NOSVE-NEXT:    fcvt h1, s1
1947; NONEON-NOSVE-NEXT:    fcvt s0, h0
1948; NONEON-NOSVE-NEXT:    fcvt s1, h1
1949; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1950; NONEON-NOSVE-NEXT:    fcvt h0, s0
1951; NONEON-NOSVE-NEXT:    add sp, sp, #32
1952; NONEON-NOSVE-NEXT:    ret
1953  %op = load <16 x half>, ptr %a
1954  %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
1955  ret half %res
1956}
1957
1958define float @fminimumv_v2f32(<2 x float> %a) {
1959; CHECK-LABEL: fminimumv_v2f32:
1960; CHECK:       // %bb.0:
1961; CHECK-NEXT:    ptrue p0.s, vl2
1962; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1963; CHECK-NEXT:    fminv s0, p0, z0.s
1964; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1965; CHECK-NEXT:    ret
1966;
1967; NONEON-NOSVE-LABEL: fminimumv_v2f32:
1968; NONEON-NOSVE:       // %bb.0:
1969; NONEON-NOSVE-NEXT:    sub sp, sp, #16
1970; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1971; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
1972; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #8]
1973; NONEON-NOSVE-NEXT:    fmin s0, s1, s0
1974; NONEON-NOSVE-NEXT:    add sp, sp, #16
1975; NONEON-NOSVE-NEXT:    ret
1976  %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
1977  ret float %res
1978}
1979
1980define float @fminimumv_v4f32(<4 x float> %a) {
1981; CHECK-LABEL: fminimumv_v4f32:
1982; CHECK:       // %bb.0:
1983; CHECK-NEXT:    ptrue p0.s, vl4
1984; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1985; CHECK-NEXT:    fminv s0, p0, z0.s
1986; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1987; CHECK-NEXT:    ret
1988;
1989; NONEON-NOSVE-LABEL: fminimumv_v4f32:
1990; NONEON-NOSVE:       // %bb.0:
1991; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
1992; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
1993; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp]
1994; NONEON-NOSVE-NEXT:    fmin s0, s1, s0
1995; NONEON-NOSVE-NEXT:    ldp s2, s1, [sp, #8]
1996; NONEON-NOSVE-NEXT:    fmin s0, s0, s2
1997; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
1998; NONEON-NOSVE-NEXT:    add sp, sp, #16
1999; NONEON-NOSVE-NEXT:    ret
2000  %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
2001  ret float %res
2002}
2003
2004define float @fminimumv_v8f32(ptr %a) {
2005; CHECK-LABEL: fminimumv_v8f32:
2006; CHECK:       // %bb.0:
2007; CHECK-NEXT:    ldp q1, q0, [x0]
2008; CHECK-NEXT:    ptrue p0.s, vl4
2009; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
2010; CHECK-NEXT:    fminv s0, p0, z0.s
2011; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
2012; CHECK-NEXT:    ret
2013;
2014; NONEON-NOSVE-LABEL: fminimumv_v8f32:
2015; NONEON-NOSVE:       // %bb.0:
2016; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2017; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
2018; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2019; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #16]
2020; NONEON-NOSVE-NEXT:    ldp s3, s2, [sp]
2021; NONEON-NOSVE-NEXT:    fmin s0, s2, s0
2022; NONEON-NOSVE-NEXT:    fmin s1, s3, s1
2023; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
2024; NONEON-NOSVE-NEXT:    fmin s0, s1, s0
2025; NONEON-NOSVE-NEXT:    ldp s3, s1, [sp, #24]
2026; NONEON-NOSVE-NEXT:    fmin s2, s2, s3
2027; NONEON-NOSVE-NEXT:    fmin s1, s4, s1
2028; NONEON-NOSVE-NEXT:    fmin s0, s0, s2
2029; NONEON-NOSVE-NEXT:    fmin s0, s0, s1
2030; NONEON-NOSVE-NEXT:    add sp, sp, #32
2031; NONEON-NOSVE-NEXT:    ret
2032  %op = load <8 x float>, ptr %a
2033  %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
2034  ret float %res
2035}
2036
2037define double @fminimumv_v1f64(<1 x double> %a) {
2038; CHECK-LABEL: fminimumv_v1f64:
2039; CHECK:       // %bb.0:
2040; CHECK-NEXT:    ret
2041;
2042; NONEON-NOSVE-LABEL: fminimumv_v1f64:
2043; NONEON-NOSVE:       // %bb.0:
2044; NONEON-NOSVE-NEXT:    ret
2045  %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
2046  ret double %res
2047}
2048
2049define double @fminimumv_v2f64(<2 x double> %a) {
2050; CHECK-LABEL: fminimumv_v2f64:
2051; CHECK:       // %bb.0:
2052; CHECK-NEXT:    ptrue p0.d, vl2
2053; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2054; CHECK-NEXT:    fminv d0, p0, z0.d
2055; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2056; CHECK-NEXT:    ret
2057;
2058; NONEON-NOSVE-LABEL: fminimumv_v2f64:
2059; NONEON-NOSVE:       // %bb.0:
2060; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
2061; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2062; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp], #16
2063; NONEON-NOSVE-NEXT:    fmin d0, d1, d0
2064; NONEON-NOSVE-NEXT:    ret
2065  %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
2066  ret double %res
2067}
2068
2069define double @fminimumv_v4f64(ptr %a) {
2070; CHECK-LABEL: fminimumv_v4f64:
2071; CHECK:       // %bb.0:
2072; CHECK-NEXT:    ldp q1, q0, [x0]
2073; CHECK-NEXT:    ptrue p0.d, vl2
2074; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
2075; CHECK-NEXT:    fminv d0, p0, z0.d
2076; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2077; CHECK-NEXT:    ret
2078;
2079; NONEON-NOSVE-LABEL: fminimumv_v4f64:
2080; NONEON-NOSVE:       // %bb.0:
2081; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2082; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-32]!
2083; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2084; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
2085; NONEON-NOSVE-NEXT:    ldp d3, d2, [sp], #32
2086; NONEON-NOSVE-NEXT:    fmin d0, d2, d0
2087; NONEON-NOSVE-NEXT:    fmin d1, d3, d1
2088; NONEON-NOSVE-NEXT:    fmin d0, d1, d0
2089; NONEON-NOSVE-NEXT:    ret
2090  %op = load <4 x double>, ptr %a
2091  %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
2092  ret double %res
2093}
2094
2095declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
2096declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
2097declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
2098
2099declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
2100declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
2101declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
2102
2103declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
2104declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
2105declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
2106
2107declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
2108declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
2109declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
2110
2111declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
2112declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
2113declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
2114
2115declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
2116declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
2117declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
2118
2119declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
2120declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
2121declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
2122
2123declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
2124declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
2125declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
2126
2127declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
2128declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
2129declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
2130
2131declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
2132declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
2133declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
2134
2135declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
2136declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
2137declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
2138
2139declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>)
2140declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
2141declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
2142
2143declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
2144declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
2145declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
2146
2147declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
2148declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
2149declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
2150
2151declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>)
2152declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
2153declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
2154