xref: /llvm-project/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
6
7define float @add_HalfS(<2 x float> %bin.rdx)  {
8; CHECK-SD-LABEL: add_HalfS:
9; CHECK-SD:       // %bb.0:
10; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
11; CHECK-SD-NEXT:    faddp s0, v0.2s
12; CHECK-SD-NEXT:    ret
13;
14; CHECK-GI-LABEL: add_HalfS:
15; CHECK-GI:       // %bb.0:
16; CHECK-GI-NEXT:    faddp s0, v0.2s
17; CHECK-GI-NEXT:    ret
18  %r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
19  ret float %r
20}
21
22define half @add_HalfH(<4 x half> %bin.rdx)  {
23; CHECK-SD-NOFP16-LABEL: add_HalfH:
24; CHECK-SD-NOFP16:       // %bb.0:
25; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
26; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
27; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
28; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
29; CHECK-SD-NOFP16-NEXT:    fadd s1, s2, s1
30; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
31; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[3]
32; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
33; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
34; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
35; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
36; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
37; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
38; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
39; CHECK-SD-NOFP16-NEXT:    fadd s0, s1, s0
40; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
41; CHECK-SD-NOFP16-NEXT:    ret
42;
43; CHECK-SD-FP16-LABEL: add_HalfH:
44; CHECK-SD-FP16:       // %bb.0:
45; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
46; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
47; CHECK-SD-FP16-NEXT:    faddp h2, v0.2h
48; CHECK-SD-FP16-NEXT:    mov h0, v0.h[3]
49; CHECK-SD-FP16-NEXT:    fadd h1, h2, h1
50; CHECK-SD-FP16-NEXT:    fadd h0, h1, h0
51; CHECK-SD-FP16-NEXT:    ret
52;
53; CHECK-GI-NOFP16-LABEL: add_HalfH:
54; CHECK-GI-NOFP16:       // %bb.0:
55; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
56; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
57; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
58; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
59; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
60; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
61; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
62; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
63; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
64; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
65; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
66; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
67; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
68; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
69; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
70; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
71; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
72; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
73; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
74; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
75; CHECK-GI-NOFP16-NEXT:    fadd s0, s1, s0
76; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
77; CHECK-GI-NOFP16-NEXT:    ret
78;
79; CHECK-GI-FP16-LABEL: add_HalfH:
80; CHECK-GI-FP16:       // %bb.0:
81; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
82; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
83; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
84; CHECK-GI-FP16-NEXT:    fadd h1, h0, h1
85; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
86; CHECK-GI-FP16-NEXT:    fadd h1, h1, h2
87; CHECK-GI-FP16-NEXT:    fadd h0, h1, h0
88; CHECK-GI-FP16-NEXT:    ret
89  %r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx)
90  ret half %r
91}
92
93
94define half @add_H(<8 x half> %bin.rdx)  {
95; CHECK-SD-NOFP16-LABEL: add_H:
96; CHECK-SD-NOFP16:       // %bb.0:
97; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
98; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
99; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
100; CHECK-SD-NOFP16-NEXT:    fadd s1, s2, s1
101; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
102; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
103; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
104; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
105; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
106; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[3]
107; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
108; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
109; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
110; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
111; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[4]
112; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
113; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
114; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
115; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
116; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[5]
117; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
118; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
119; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
120; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
121; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
122; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
123; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
124; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
125; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
126; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
127; CHECK-SD-NOFP16-NEXT:    fadd s1, s1, s2
128; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
129; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
130; CHECK-SD-NOFP16-NEXT:    fadd s0, s1, s0
131; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
132; CHECK-SD-NOFP16-NEXT:    ret
133;
134; CHECK-SD-FP16-LABEL: add_H:
135; CHECK-SD-FP16:       // %bb.0:
136; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
137; CHECK-SD-FP16-NEXT:    faddp h2, v0.2h
138; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
139; CHECK-SD-FP16-NEXT:    fadd h1, h2, h1
140; CHECK-SD-FP16-NEXT:    mov h2, v0.h[4]
141; CHECK-SD-FP16-NEXT:    fadd h1, h1, h3
142; CHECK-SD-FP16-NEXT:    mov h3, v0.h[5]
143; CHECK-SD-FP16-NEXT:    fadd h1, h1, h2
144; CHECK-SD-FP16-NEXT:    mov h2, v0.h[6]
145; CHECK-SD-FP16-NEXT:    mov h0, v0.h[7]
146; CHECK-SD-FP16-NEXT:    fadd h1, h1, h3
147; CHECK-SD-FP16-NEXT:    fadd h1, h1, h2
148; CHECK-SD-FP16-NEXT:    fadd h0, h1, h0
149; CHECK-SD-FP16-NEXT:    ret
150;
151; CHECK-GI-NOFP16-LABEL: add_H:
152; CHECK-GI-NOFP16:       // %bb.0:
153; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
154; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
155; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
156; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
157; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
158; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
159; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
160; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
161; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
162; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
163; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
164; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
165; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
166; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
167; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
168; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[3]
169; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
170; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
171; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
172; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
173; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[4]
174; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
175; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
176; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
177; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
178; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[5]
179; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
180; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
181; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
182; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
183; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[6]
184; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
185; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
186; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
187; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
188; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
189; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
190; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
191; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
192; CHECK-GI-NOFP16-NEXT:    fadd s0, s1, s0
193; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
194; CHECK-GI-NOFP16-NEXT:    ret
195;
196; CHECK-GI-FP16-LABEL: add_H:
197; CHECK-GI-FP16:       // %bb.0:
198; CHECK-GI-FP16-NEXT:    mov h1, v0.h[2]
199; CHECK-GI-FP16-NEXT:    faddp h2, v0.2h
200; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
201; CHECK-GI-FP16-NEXT:    fadd h1, h2, h1
202; CHECK-GI-FP16-NEXT:    mov h2, v0.h[4]
203; CHECK-GI-FP16-NEXT:    fadd h1, h1, h3
204; CHECK-GI-FP16-NEXT:    mov h3, v0.h[5]
205; CHECK-GI-FP16-NEXT:    fadd h1, h1, h2
206; CHECK-GI-FP16-NEXT:    mov h2, v0.h[6]
207; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
208; CHECK-GI-FP16-NEXT:    fadd h1, h1, h3
209; CHECK-GI-FP16-NEXT:    fadd h1, h1, h2
210; CHECK-GI-FP16-NEXT:    fadd h0, h1, h0
211; CHECK-GI-FP16-NEXT:    ret
212  %r = call half @llvm.vector.reduce.fadd.f16.v8f16(half -0.0, <8 x half> %bin.rdx)
213  ret half %r
214}
215
216define float @add_S(<4 x float> %bin.rdx)  {
217; CHECK-LABEL: add_S:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    mov s1, v0.s[2]
220; CHECK-NEXT:    faddp s2, v0.2s
221; CHECK-NEXT:    mov s0, v0.s[3]
222; CHECK-NEXT:    fadd s1, s2, s1
223; CHECK-NEXT:    fadd s0, s1, s0
224; CHECK-NEXT:    ret
225  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx)
226  ret float %r
227}
228
229define double @add_D(<2 x double> %bin.rdx)  {
230; CHECK-LABEL: add_D:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    faddp d0, v0.2d
233; CHECK-NEXT:    ret
234  %r = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %bin.rdx)
235  ret double %r
236}
237
238define half @add_2H(<16 x half> %bin.rdx)  {
239; CHECK-SD-NOFP16-LABEL: add_2H:
240; CHECK-SD-NOFP16:       // %bb.0:
241; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
242; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
243; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
244; CHECK-SD-NOFP16-NEXT:    fadd s2, s3, s2
245; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
246; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
247; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
248; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
249; CHECK-SD-NOFP16-NEXT:    fadd s2, s2, s3
250; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
251; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
252; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
253; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
254; CHECK-SD-NOFP16-NEXT:    fadd s2, s2, s3
255; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[4]
256; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
257; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
258; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
259; CHECK-SD-NOFP16-NEXT:    fadd s2, s2, s3
260; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
261; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
262; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
263; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
264; CHECK-SD-NOFP16-NEXT:    fadd s2, s2, s3
265; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[6]
266; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
267; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
268; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
269; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
270; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
271; CHECK-SD-NOFP16-NEXT:    fadd s2, s2, s3
272; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
273; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
274; CHECK-SD-NOFP16-NEXT:    fadd s0, s2, s0
275; CHECK-SD-NOFP16-NEXT:    fcvt s2, h1
276; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
277; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
278; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
279; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[1]
280; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
281; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
282; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
283; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
284; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[2]
285; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
286; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
287; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
288; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
289; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[3]
290; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
291; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
292; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
293; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
294; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[4]
295; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
296; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
297; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
298; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
299; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[5]
300; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
301; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
302; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
303; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
304; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[6]
305; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
306; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
307; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
308; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
309; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
310; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s2
311; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
312; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
313; CHECK-SD-NOFP16-NEXT:    fadd s0, s0, s1
314; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
315; CHECK-SD-NOFP16-NEXT:    ret
316;
317; CHECK-SD-FP16-LABEL: add_2H:
318; CHECK-SD-FP16:       // %bb.0:
319; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
320; CHECK-SD-FP16-NEXT:    faddp h3, v0.2h
321; CHECK-SD-FP16-NEXT:    mov h4, v0.h[3]
322; CHECK-SD-FP16-NEXT:    fadd h2, h3, h2
323; CHECK-SD-FP16-NEXT:    mov h3, v0.h[4]
324; CHECK-SD-FP16-NEXT:    fadd h2, h2, h4
325; CHECK-SD-FP16-NEXT:    mov h4, v0.h[5]
326; CHECK-SD-FP16-NEXT:    fadd h2, h2, h3
327; CHECK-SD-FP16-NEXT:    mov h3, v0.h[6]
328; CHECK-SD-FP16-NEXT:    mov h0, v0.h[7]
329; CHECK-SD-FP16-NEXT:    fadd h2, h2, h4
330; CHECK-SD-FP16-NEXT:    fadd h2, h2, h3
331; CHECK-SD-FP16-NEXT:    mov h3, v1.h[2]
332; CHECK-SD-FP16-NEXT:    fadd h0, h2, h0
333; CHECK-SD-FP16-NEXT:    mov h2, v1.h[1]
334; CHECK-SD-FP16-NEXT:    fadd h0, h0, h1
335; CHECK-SD-FP16-NEXT:    fadd h0, h0, h2
336; CHECK-SD-FP16-NEXT:    mov h2, v1.h[3]
337; CHECK-SD-FP16-NEXT:    fadd h0, h0, h3
338; CHECK-SD-FP16-NEXT:    mov h3, v1.h[4]
339; CHECK-SD-FP16-NEXT:    fadd h0, h0, h2
340; CHECK-SD-FP16-NEXT:    mov h2, v1.h[5]
341; CHECK-SD-FP16-NEXT:    fadd h0, h0, h3
342; CHECK-SD-FP16-NEXT:    mov h3, v1.h[6]
343; CHECK-SD-FP16-NEXT:    mov h1, v1.h[7]
344; CHECK-SD-FP16-NEXT:    fadd h0, h0, h2
345; CHECK-SD-FP16-NEXT:    fadd h0, h0, h3
346; CHECK-SD-FP16-NEXT:    fadd h0, h0, h1
347; CHECK-SD-FP16-NEXT:    ret
348;
349; CHECK-GI-NOFP16-LABEL: add_2H:
350; CHECK-GI-NOFP16:       // %bb.0:
351; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
352; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
353; CHECK-GI-NOFP16-NEXT:    fmov s2, w8
354; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
355; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
356; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
357; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
358; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
359; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
360; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
361; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[2]
362; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
363; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
364; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
365; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
366; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[3]
367; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
368; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
369; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
370; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
371; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[4]
372; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
373; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
374; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
375; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
376; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[5]
377; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
378; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
379; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
380; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
381; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
382; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
383; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
384; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
385; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
386; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
387; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
388; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
389; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
390; CHECK-GI-NOFP16-NEXT:    fadd s0, s2, s0
391; CHECK-GI-NOFP16-NEXT:    fcvt s2, h1
392; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
393; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
394; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
395; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[1]
396; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
397; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
398; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
399; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
400; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[2]
401; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
402; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
403; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
404; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
405; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[3]
406; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
407; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
408; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
409; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
410; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[4]
411; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
412; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
413; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
414; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
415; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[5]
416; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
417; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
418; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
419; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
420; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[6]
421; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
422; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
423; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
424; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
425; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
426; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s2
427; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
428; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
429; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s1
430; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
431; CHECK-GI-NOFP16-NEXT:    ret
432;
433; CHECK-GI-FP16-LABEL: add_2H:
434; CHECK-GI-FP16:       // %bb.0:
435; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
436; CHECK-GI-FP16-NEXT:    faddp h3, v0.2h
437; CHECK-GI-FP16-NEXT:    mov h4, v0.h[3]
438; CHECK-GI-FP16-NEXT:    fadd h2, h3, h2
439; CHECK-GI-FP16-NEXT:    mov h3, v0.h[4]
440; CHECK-GI-FP16-NEXT:    fadd h2, h2, h4
441; CHECK-GI-FP16-NEXT:    mov h4, v0.h[5]
442; CHECK-GI-FP16-NEXT:    fadd h2, h2, h3
443; CHECK-GI-FP16-NEXT:    mov h3, v0.h[6]
444; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
445; CHECK-GI-FP16-NEXT:    fadd h2, h2, h4
446; CHECK-GI-FP16-NEXT:    fadd h2, h2, h3
447; CHECK-GI-FP16-NEXT:    mov h3, v1.h[2]
448; CHECK-GI-FP16-NEXT:    fadd h0, h2, h0
449; CHECK-GI-FP16-NEXT:    mov h2, v1.h[1]
450; CHECK-GI-FP16-NEXT:    fadd h0, h0, h1
451; CHECK-GI-FP16-NEXT:    fadd h0, h0, h2
452; CHECK-GI-FP16-NEXT:    mov h2, v1.h[3]
453; CHECK-GI-FP16-NEXT:    fadd h0, h0, h3
454; CHECK-GI-FP16-NEXT:    mov h3, v1.h[4]
455; CHECK-GI-FP16-NEXT:    fadd h0, h0, h2
456; CHECK-GI-FP16-NEXT:    mov h2, v1.h[5]
457; CHECK-GI-FP16-NEXT:    fadd h0, h0, h3
458; CHECK-GI-FP16-NEXT:    mov h3, v1.h[6]
459; CHECK-GI-FP16-NEXT:    mov h1, v1.h[7]
460; CHECK-GI-FP16-NEXT:    fadd h0, h0, h2
461; CHECK-GI-FP16-NEXT:    fadd h0, h0, h3
462; CHECK-GI-FP16-NEXT:    fadd h0, h0, h1
463; CHECK-GI-FP16-NEXT:    ret
464  %r = call half @llvm.vector.reduce.fadd.f16.v16f16(half -0.0, <16 x half> %bin.rdx)
465  ret half %r
466}
467
468define float @add_2S(<8 x float> %bin.rdx)  {
469; CHECK-LABEL: add_2S:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    mov s2, v0.s[2]
472; CHECK-NEXT:    faddp s3, v0.2s
473; CHECK-NEXT:    mov s0, v0.s[3]
474; CHECK-NEXT:    fadd s2, s3, s2
475; CHECK-NEXT:    mov s3, v1.s[2]
476; CHECK-NEXT:    fadd s0, s2, s0
477; CHECK-NEXT:    mov s2, v1.s[1]
478; CHECK-NEXT:    fadd s0, s0, s1
479; CHECK-NEXT:    mov s1, v1.s[3]
480; CHECK-NEXT:    fadd s0, s0, s2
481; CHECK-NEXT:    fadd s0, s0, s3
482; CHECK-NEXT:    fadd s0, s0, s1
483; CHECK-NEXT:    ret
484  %r = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx)
485  ret float %r
486}
487
488define double @add_2D(<4 x double> %bin.rdx)  {
489; CHECK-LABEL: add_2D:
490; CHECK:       // %bb.0:
491; CHECK-NEXT:    faddp d0, v0.2d
492; CHECK-NEXT:    mov d2, v1.d[1]
493; CHECK-NEXT:    fadd d0, d0, d1
494; CHECK-NEXT:    fadd d0, d0, d2
495; CHECK-NEXT:    ret
496  %r = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %bin.rdx)
497  ret double %r
498}
499
500; Added at least one test where the start value is not -0.0.
501define float @add_S_init_42(<4 x float> %bin.rdx)  {
502; CHECK-LABEL: add_S_init_42:
503; CHECK:       // %bb.0:
504; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
505; CHECK-NEXT:    mov s2, v0.s[1]
506; CHECK-NEXT:    mov s3, v0.s[2]
507; CHECK-NEXT:    fmov s1, w8
508; CHECK-NEXT:    fadd s1, s0, s1
509; CHECK-NEXT:    mov s0, v0.s[3]
510; CHECK-NEXT:    fadd s1, s1, s2
511; CHECK-NEXT:    fadd s1, s1, s3
512; CHECK-NEXT:    fadd s0, s1, s0
513; CHECK-NEXT:    ret
514  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
515  ret float %r
516}
517
518; Function Attrs: nounwind readnone
519declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
520declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
521declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
522declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
523declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
524declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
525declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
526declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
527