xref: /llvm-project/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
6
7define float @mul_HalfS(<2 x float> %bin.rdx)  {
8; CHECK-SD-LABEL: mul_HalfS:
9; CHECK-SD:       // %bb.0:
10; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
11; CHECK-SD-NEXT:    fmul s0, s0, v0.s[1]
12; CHECK-SD-NEXT:    ret
13;
14; CHECK-GI-LABEL: mul_HalfS:
15; CHECK-GI:       // %bb.0:
16; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
17; CHECK-GI-NEXT:    mov s1, v0.s[1]
18; CHECK-GI-NEXT:    fmul s0, s0, s1
19; CHECK-GI-NEXT:    ret
20  %r = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx)
21  ret float %r
22}
23
24define half @mul_HalfH(<4 x half> %bin.rdx)  {
25; CHECK-SD-NOFP16-LABEL: mul_HalfH:
26; CHECK-SD-NOFP16:       // %bb.0:
27; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
28; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
29; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
30; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
31; CHECK-SD-NOFP16-NEXT:    fmul s1, s2, s1
32; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
33; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[3]
34; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
35; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
36; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
37; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
38; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
39; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
40; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
41; CHECK-SD-NOFP16-NEXT:    fmul s0, s1, s0
42; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
43; CHECK-SD-NOFP16-NEXT:    ret
44;
45; CHECK-SD-FP16-LABEL: mul_HalfH:
46; CHECK-SD-FP16:       // %bb.0:
47; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
48; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
49; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
50; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[3]
51; CHECK-SD-FP16-NEXT:    ret
52;
53; CHECK-GI-NOFP16-LABEL: mul_HalfH:
54; CHECK-GI-NOFP16:       // %bb.0:
55; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
56; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
57; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
58; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
59; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
60; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
61; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
62; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
63; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
64; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
65; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
66; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
67; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[3]
68; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
69; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
70; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
71; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
72; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
73; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
74; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
75; CHECK-GI-NOFP16-NEXT:    fmul s0, s1, s0
76; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
77; CHECK-GI-NOFP16-NEXT:    ret
78;
79; CHECK-GI-FP16-LABEL: mul_HalfH:
80; CHECK-GI-FP16:       // %bb.0:
81; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
82; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
83; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
84; CHECK-GI-FP16-NEXT:    fmul h1, h0, h1
85; CHECK-GI-FP16-NEXT:    mov h0, v0.h[3]
86; CHECK-GI-FP16-NEXT:    fmul h1, h1, h2
87; CHECK-GI-FP16-NEXT:    fmul h0, h1, h0
88; CHECK-GI-FP16-NEXT:    ret
89  %r = call half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx)
90  ret half %r
91}
92
93
94define half @mul_H(<8 x half> %bin.rdx)  {
95; CHECK-SD-NOFP16-LABEL: mul_H:
96; CHECK-SD-NOFP16:       // %bb.0:
97; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
98; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
99; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
100; CHECK-SD-NOFP16-NEXT:    fmul s1, s2, s1
101; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
102; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
103; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
104; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
105; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
106; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[3]
107; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
108; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
109; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
110; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
111; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[4]
112; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
113; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
114; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
115; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
116; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[5]
117; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
118; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
119; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
120; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
121; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[6]
122; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
123; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
124; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
125; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
126; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
127; CHECK-SD-NOFP16-NEXT:    fmul s1, s1, s2
128; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
129; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
130; CHECK-SD-NOFP16-NEXT:    fmul s0, s1, s0
131; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
132; CHECK-SD-NOFP16-NEXT:    ret
133;
134; CHECK-SD-FP16-LABEL: mul_H:
135; CHECK-SD-FP16:       // %bb.0:
136; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
137; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
138; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[3]
139; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[4]
140; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[5]
141; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[6]
142; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[7]
143; CHECK-SD-FP16-NEXT:    ret
144;
145; CHECK-GI-NOFP16-LABEL: mul_H:
146; CHECK-GI-NOFP16:       // %bb.0:
147; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
148; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
149; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
150; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
151; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
152; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
153; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
154; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
155; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
156; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
157; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
158; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
159; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
160; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
161; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
162; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[3]
163; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
164; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
165; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
166; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
167; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[4]
168; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
169; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
170; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
171; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
172; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[5]
173; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
174; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
175; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
176; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
177; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[6]
178; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
179; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
180; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
181; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
182; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
183; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
184; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
185; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
186; CHECK-GI-NOFP16-NEXT:    fmul s0, s1, s0
187; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
188; CHECK-GI-NOFP16-NEXT:    ret
189;
190; CHECK-GI-FP16-LABEL: mul_H:
191; CHECK-GI-FP16:       // %bb.0:
192; CHECK-GI-FP16-NEXT:    fmul h1, h0, v0.h[1]
193; CHECK-GI-FP16-NEXT:    fmul h1, h1, v0.h[2]
194; CHECK-GI-FP16-NEXT:    fmul h1, h1, v0.h[3]
195; CHECK-GI-FP16-NEXT:    fmul h1, h1, v0.h[4]
196; CHECK-GI-FP16-NEXT:    fmul h1, h1, v0.h[5]
197; CHECK-GI-FP16-NEXT:    fmul h1, h1, v0.h[6]
198; CHECK-GI-FP16-NEXT:    fmul h0, h1, v0.h[7]
199; CHECK-GI-FP16-NEXT:    ret
200  %r = call half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx)
201  ret half %r
202}
203
204define float @mul_S(<4 x float> %bin.rdx)  {
205; CHECK-LABEL: mul_S:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    fmul s1, s0, v0.s[1]
208; CHECK-NEXT:    fmul s1, s1, v0.s[2]
209; CHECK-NEXT:    fmul s0, s1, v0.s[3]
210; CHECK-NEXT:    ret
211  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx)
212  ret float %r
213}
214
215define double @mul_D(<2 x double> %bin.rdx)  {
216; CHECK-LABEL: mul_D:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    fmul d0, d0, v0.d[1]
219; CHECK-NEXT:    ret
220  %r = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %bin.rdx)
221  ret double %r
222}
223
224define half @mul_2H(<16 x half> %bin.rdx)  {
225; CHECK-SD-NOFP16-LABEL: mul_2H:
226; CHECK-SD-NOFP16:       // %bb.0:
227; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
228; CHECK-SD-NOFP16-NEXT:    fcvt s3, h0
229; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
230; CHECK-SD-NOFP16-NEXT:    fmul s2, s3, s2
231; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
232; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
233; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
234; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
235; CHECK-SD-NOFP16-NEXT:    fmul s2, s2, s3
236; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
237; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
238; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
239; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
240; CHECK-SD-NOFP16-NEXT:    fmul s2, s2, s3
241; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[4]
242; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
243; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
244; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
245; CHECK-SD-NOFP16-NEXT:    fmul s2, s2, s3
246; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[5]
247; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
248; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
249; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
250; CHECK-SD-NOFP16-NEXT:    fmul s2, s2, s3
251; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[6]
252; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
253; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
254; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
255; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
256; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
257; CHECK-SD-NOFP16-NEXT:    fmul s2, s2, s3
258; CHECK-SD-NOFP16-NEXT:    fcvt h2, s2
259; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
260; CHECK-SD-NOFP16-NEXT:    fmul s0, s2, s0
261; CHECK-SD-NOFP16-NEXT:    fcvt s2, h1
262; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
263; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
264; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
265; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[1]
266; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
267; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
268; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
269; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
270; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[2]
271; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
272; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
273; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
274; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
275; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[3]
276; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
277; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
278; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
279; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
280; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[4]
281; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
282; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
283; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
284; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
285; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[5]
286; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
287; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
288; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
289; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
290; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[6]
291; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
292; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
293; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
294; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
295; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
296; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s2
297; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
298; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
299; CHECK-SD-NOFP16-NEXT:    fmul s0, s0, s1
300; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
301; CHECK-SD-NOFP16-NEXT:    ret
302;
303; CHECK-SD-FP16-LABEL: mul_2H:
304; CHECK-SD-FP16:       // %bb.0:
305; CHECK-SD-FP16-NEXT:    fmul h2, h0, v0.h[1]
306; CHECK-SD-FP16-NEXT:    fmul h2, h2, v0.h[2]
307; CHECK-SD-FP16-NEXT:    fmul h2, h2, v0.h[3]
308; CHECK-SD-FP16-NEXT:    fmul h2, h2, v0.h[4]
309; CHECK-SD-FP16-NEXT:    fmul h2, h2, v0.h[5]
310; CHECK-SD-FP16-NEXT:    fmul h2, h2, v0.h[6]
311; CHECK-SD-FP16-NEXT:    fmul h0, h2, v0.h[7]
312; CHECK-SD-FP16-NEXT:    fmul h0, h0, h1
313; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[1]
314; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[2]
315; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[3]
316; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[4]
317; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[5]
318; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[6]
319; CHECK-SD-FP16-NEXT:    fmul h0, h0, v1.h[7]
320; CHECK-SD-FP16-NEXT:    ret
321;
322; CHECK-GI-NOFP16-LABEL: mul_2H:
323; CHECK-GI-NOFP16:       // %bb.0:
324; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
325; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
326; CHECK-GI-NOFP16-NEXT:    fmov s2, w8
327; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
328; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
329; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
330; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
331; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
332; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
333; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
334; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[2]
335; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
336; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
337; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
338; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
339; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[3]
340; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
341; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
342; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
343; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
344; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[4]
345; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
346; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
347; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
348; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
349; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[5]
350; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
351; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
352; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
353; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
354; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[6]
355; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
356; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
357; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
358; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
359; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
360; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
361; CHECK-GI-NOFP16-NEXT:    fcvt h2, s2
362; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
363; CHECK-GI-NOFP16-NEXT:    fmul s0, s2, s0
364; CHECK-GI-NOFP16-NEXT:    fcvt s2, h1
365; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
366; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
367; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
368; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[1]
369; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
370; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
371; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
372; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
373; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[2]
374; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
375; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
376; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
377; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
378; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[3]
379; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
380; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
381; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
382; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
383; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[4]
384; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
385; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
386; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
387; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
388; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[5]
389; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
390; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
391; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
392; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
393; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[6]
394; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
395; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
396; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
397; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
398; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
399; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s2
400; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
401; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
402; CHECK-GI-NOFP16-NEXT:    fmul s0, s0, s1
403; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
404; CHECK-GI-NOFP16-NEXT:    ret
405;
406; CHECK-GI-FP16-LABEL: mul_2H:
407; CHECK-GI-FP16:       // %bb.0:
408; CHECK-GI-FP16-NEXT:    fmul h2, h0, v0.h[1]
409; CHECK-GI-FP16-NEXT:    fmul h2, h2, v0.h[2]
410; CHECK-GI-FP16-NEXT:    fmul h2, h2, v0.h[3]
411; CHECK-GI-FP16-NEXT:    fmul h2, h2, v0.h[4]
412; CHECK-GI-FP16-NEXT:    fmul h2, h2, v0.h[5]
413; CHECK-GI-FP16-NEXT:    fmul h2, h2, v0.h[6]
414; CHECK-GI-FP16-NEXT:    fmul h0, h2, v0.h[7]
415; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
416; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[1]
417; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[2]
418; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[3]
419; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[4]
420; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[5]
421; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[6]
422; CHECK-GI-FP16-NEXT:    fmul h0, h0, v1.h[7]
423; CHECK-GI-FP16-NEXT:    ret
424  %r = call half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx)
425  ret half %r
426}
427
428define float @mul_2S(<8 x float> %bin.rdx)  {
429; CHECK-LABEL: mul_2S:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    fmul s2, s0, v0.s[1]
432; CHECK-NEXT:    fmul s2, s2, v0.s[2]
433; CHECK-NEXT:    fmul s0, s2, v0.s[3]
434; CHECK-NEXT:    fmul s0, s0, s1
435; CHECK-NEXT:    fmul s0, s0, v1.s[1]
436; CHECK-NEXT:    fmul s0, s0, v1.s[2]
437; CHECK-NEXT:    fmul s0, s0, v1.s[3]
438; CHECK-NEXT:    ret
439  %r = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx)
440  ret float %r
441}
442
443define double @mul_2D(<4 x double> %bin.rdx)  {
444; CHECK-LABEL: mul_2D:
445; CHECK:       // %bb.0:
446; CHECK-NEXT:    fmul d0, d0, v0.d[1]
447; CHECK-NEXT:    fmul d0, d0, d1
448; CHECK-NEXT:    fmul d0, d0, v1.d[1]
449; CHECK-NEXT:    ret
450  %r = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %bin.rdx)
451  ret double %r
452}
453
454; Added at least one test where the start value is not 1.0.
455define float @mul_S_init_42(<4 x float> %bin.rdx)  {
456; CHECK-LABEL: mul_S_init_42:
457; CHECK:       // %bb.0:
458; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
459; CHECK-NEXT:    fmov s1, w8
460; CHECK-NEXT:    fmul s1, s1, s0
461; CHECK-NEXT:    fmul s1, s1, v0.s[1]
462; CHECK-NEXT:    fmul s1, s1, v0.s[2]
463; CHECK-NEXT:    fmul s0, s1, v0.s[3]
464; CHECK-NEXT:    ret
465  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
466  ret float %r
467}
468
469; Function Attrs: nounwind readnone
470declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
471declare half @llvm.vector.reduce.fmul.f16.v8f16(half, <8 x half>)
472declare half @llvm.vector.reduce.fmul.f16.v16f16(half, <16 x half>)
473declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
474declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
475declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
476declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
477declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
478