xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-fmul.ll (revision 53c37f300dd1b450671f2aee4cc649c380adb5ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \
8; RUN:   FileCheck %s --check-prefix=PWR10LE
9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \
11; RUN:   FileCheck %s --check-prefix=PWR10BE
12
13;;
14;; Vectors of f32
15;;
16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
17; PWR9LE-LABEL: v2f32:
18; PWR9LE:       # %bb.0: # %entry
19; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
20; PWR9LE-NEXT:    xxswapd vs1, v2
21; PWR9LE-NEXT:    xscvspdpn f0, vs0
22; PWR9LE-NEXT:    xscvspdpn f1, vs1
23; PWR9LE-NEXT:    xsmulsp f1, f0, f1
24; PWR9LE-NEXT:    blr
25;
26; PWR9BE-LABEL: v2f32:
27; PWR9BE:       # %bb.0: # %entry
28; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
29; PWR9BE-NEXT:    xscvspdpn f0, v2
30; PWR9BE-NEXT:    xscvspdpn f1, vs1
31; PWR9BE-NEXT:    xsmulsp f1, f0, f1
32; PWR9BE-NEXT:    blr
33;
34; PWR10LE-LABEL: v2f32:
35; PWR10LE:       # %bb.0: # %entry
36; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
37; PWR10LE-NEXT:    xxswapd vs1, v2
38; PWR10LE-NEXT:    xscvspdpn f0, vs0
39; PWR10LE-NEXT:    xscvspdpn f1, vs1
40; PWR10LE-NEXT:    xsmulsp f1, f0, f1
41; PWR10LE-NEXT:    blr
42;
43; PWR10BE-LABEL: v2f32:
44; PWR10BE:       # %bb.0: # %entry
45; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
46; PWR10BE-NEXT:    xscvspdpn f0, v2
47; PWR10BE-NEXT:    xscvspdpn f1, vs1
48; PWR10BE-NEXT:    xsmulsp f1, f0, f1
49; PWR10BE-NEXT:    blr
50entry:
51  %0 = call float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a)
52  ret float %0
53}
54
55define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 {
56; PWR9LE-LABEL: v2f32_b:
57; PWR9LE:       # %bb.0: # %entry
58; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
59; PWR9LE-NEXT:    xscvspdpn f0, vs0
60; PWR9LE-NEXT:    xsmulsp f0, f1, f0
61; PWR9LE-NEXT:    xxswapd vs1, v2
62; PWR9LE-NEXT:    xscvspdpn f1, vs1
63; PWR9LE-NEXT:    xsmulsp f1, f0, f1
64; PWR9LE-NEXT:    blr
65;
66; PWR9BE-LABEL: v2f32_b:
67; PWR9BE:       # %bb.0: # %entry
68; PWR9BE-NEXT:    xscvspdpn f0, v2
69; PWR9BE-NEXT:    xsmulsp f0, f1, f0
70; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
71; PWR9BE-NEXT:    xscvspdpn f1, vs1
72; PWR9BE-NEXT:    xsmulsp f1, f0, f1
73; PWR9BE-NEXT:    blr
74;
75; PWR10LE-LABEL: v2f32_b:
76; PWR10LE:       # %bb.0: # %entry
77; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
78; PWR10LE-NEXT:    xscvspdpn f0, vs0
79; PWR10LE-NEXT:    xsmulsp f0, f1, f0
80; PWR10LE-NEXT:    xxswapd vs1, v2
81; PWR10LE-NEXT:    xscvspdpn f1, vs1
82; PWR10LE-NEXT:    xsmulsp f1, f0, f1
83; PWR10LE-NEXT:    blr
84;
85; PWR10BE-LABEL: v2f32_b:
86; PWR10BE:       # %bb.0: # %entry
87; PWR10BE-NEXT:    xscvspdpn f0, v2
88; PWR10BE-NEXT:    xsmulsp f0, f1, f0
89; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
90; PWR10BE-NEXT:    xscvspdpn f1, vs1
91; PWR10BE-NEXT:    xsmulsp f1, f0, f1
92; PWR10BE-NEXT:    blr
93entry:
94  %0 = call float @llvm.vector.reduce.fmul.v2f32(float %b, <2 x float> %a)
95  ret float %0
96}
97
98define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
99; PWR9LE-LABEL: v2f32_fast:
100; PWR9LE:       # %bb.0: # %entry
101; PWR9LE-NEXT:    xxspltw vs0, v2, 2
102; PWR9LE-NEXT:    xvmulsp vs0, v2, vs0
103; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
104; PWR9LE-NEXT:    xscvspdpn f1, vs0
105; PWR9LE-NEXT:    blr
106;
107; PWR9BE-LABEL: v2f32_fast:
108; PWR9BE:       # %bb.0: # %entry
109; PWR9BE-NEXT:    xxspltw vs0, v2, 1
110; PWR9BE-NEXT:    xvmulsp vs0, v2, vs0
111; PWR9BE-NEXT:    xscvspdpn f1, vs0
112; PWR9BE-NEXT:    blr
113;
114; PWR10LE-LABEL: v2f32_fast:
115; PWR10LE:       # %bb.0: # %entry
116; PWR10LE-NEXT:    xxspltw vs0, v2, 2
117; PWR10LE-NEXT:    xvmulsp vs0, v2, vs0
118; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
119; PWR10LE-NEXT:    xscvspdpn f1, vs0
120; PWR10LE-NEXT:    blr
121;
122; PWR10BE-LABEL: v2f32_fast:
123; PWR10BE:       # %bb.0: # %entry
124; PWR10BE-NEXT:    xxspltw vs0, v2, 1
125; PWR10BE-NEXT:    xvmulsp vs0, v2, vs0
126; PWR10BE-NEXT:    xscvspdpn f1, vs0
127; PWR10BE-NEXT:    blr
128entry:
129  %0 = call fast float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a)
130  ret float %0
131}
132
133define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
134; PWR9LE-LABEL: v4f32:
135; PWR9LE:       # %bb.0: # %entry
136; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
137; PWR9LE-NEXT:    xxswapd vs1, v2
138; PWR9LE-NEXT:    xscvspdpn f0, vs0
139; PWR9LE-NEXT:    xscvspdpn f1, vs1
140; PWR9LE-NEXT:    xsmulsp f0, f0, f1
141; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
142; PWR9LE-NEXT:    xscvspdpn f1, vs1
143; PWR9LE-NEXT:    xsmulsp f0, f0, f1
144; PWR9LE-NEXT:    xscvspdpn f1, v2
145; PWR9LE-NEXT:    xsmulsp f1, f0, f1
146; PWR9LE-NEXT:    blr
147;
148; PWR9BE-LABEL: v4f32:
149; PWR9BE:       # %bb.0: # %entry
150; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
151; PWR9BE-NEXT:    xscvspdpn f0, v2
152; PWR9BE-NEXT:    xscvspdpn f1, vs1
153; PWR9BE-NEXT:    xsmulsp f0, f0, f1
154; PWR9BE-NEXT:    xxswapd vs1, v2
155; PWR9BE-NEXT:    xscvspdpn f1, vs1
156; PWR9BE-NEXT:    xsmulsp f0, f0, f1
157; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
158; PWR9BE-NEXT:    xscvspdpn f1, vs1
159; PWR9BE-NEXT:    xsmulsp f1, f0, f1
160; PWR9BE-NEXT:    blr
161;
162; PWR10LE-LABEL: v4f32:
163; PWR10LE:       # %bb.0: # %entry
164; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
165; PWR10LE-NEXT:    xxswapd vs1, v2
166; PWR10LE-NEXT:    xscvspdpn f0, vs0
167; PWR10LE-NEXT:    xscvspdpn f1, vs1
168; PWR10LE-NEXT:    xsmulsp f0, f0, f1
169; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
170; PWR10LE-NEXT:    xscvspdpn f1, vs1
171; PWR10LE-NEXT:    xsmulsp f0, f0, f1
172; PWR10LE-NEXT:    xscvspdpn f1, v2
173; PWR10LE-NEXT:    xsmulsp f1, f0, f1
174; PWR10LE-NEXT:    blr
175;
176; PWR10BE-LABEL: v4f32:
177; PWR10BE:       # %bb.0: # %entry
178; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
179; PWR10BE-NEXT:    xscvspdpn f0, v2
180; PWR10BE-NEXT:    xscvspdpn f1, vs1
181; PWR10BE-NEXT:    xsmulsp f0, f0, f1
182; PWR10BE-NEXT:    xxswapd vs1, v2
183; PWR10BE-NEXT:    xscvspdpn f1, vs1
184; PWR10BE-NEXT:    xsmulsp f0, f0, f1
185; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
186; PWR10BE-NEXT:    xscvspdpn f1, vs1
187; PWR10BE-NEXT:    xsmulsp f1, f0, f1
188; PWR10BE-NEXT:    blr
189entry:
190  %0 = call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a)
191  ret float %0
192}
193
194define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 {
195; PWR9LE-LABEL: v4f32_b:
196; PWR9LE:       # %bb.0: # %entry
197; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
198; PWR9LE-NEXT:    xscvspdpn f0, vs0
199; PWR9LE-NEXT:    xsmulsp f0, f1, f0
200; PWR9LE-NEXT:    xxswapd vs1, v2
201; PWR9LE-NEXT:    xscvspdpn f1, vs1
202; PWR9LE-NEXT:    xsmulsp f0, f0, f1
203; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
204; PWR9LE-NEXT:    xscvspdpn f1, vs1
205; PWR9LE-NEXT:    xsmulsp f0, f0, f1
206; PWR9LE-NEXT:    xscvspdpn f1, v2
207; PWR9LE-NEXT:    xsmulsp f1, f0, f1
208; PWR9LE-NEXT:    blr
209;
210; PWR9BE-LABEL: v4f32_b:
211; PWR9BE:       # %bb.0: # %entry
212; PWR9BE-NEXT:    xscvspdpn f0, v2
213; PWR9BE-NEXT:    xsmulsp f0, f1, f0
214; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
215; PWR9BE-NEXT:    xscvspdpn f1, vs1
216; PWR9BE-NEXT:    xsmulsp f0, f0, f1
217; PWR9BE-NEXT:    xxswapd vs1, v2
218; PWR9BE-NEXT:    xscvspdpn f1, vs1
219; PWR9BE-NEXT:    xsmulsp f0, f0, f1
220; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
221; PWR9BE-NEXT:    xscvspdpn f1, vs1
222; PWR9BE-NEXT:    xsmulsp f1, f0, f1
223; PWR9BE-NEXT:    blr
224;
225; PWR10LE-LABEL: v4f32_b:
226; PWR10LE:       # %bb.0: # %entry
227; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
228; PWR10LE-NEXT:    xscvspdpn f0, vs0
229; PWR10LE-NEXT:    xsmulsp f0, f1, f0
230; PWR10LE-NEXT:    xxswapd vs1, v2
231; PWR10LE-NEXT:    xscvspdpn f1, vs1
232; PWR10LE-NEXT:    xsmulsp f0, f0, f1
233; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
234; PWR10LE-NEXT:    xscvspdpn f1, vs1
235; PWR10LE-NEXT:    xsmulsp f0, f0, f1
236; PWR10LE-NEXT:    xscvspdpn f1, v2
237; PWR10LE-NEXT:    xsmulsp f1, f0, f1
238; PWR10LE-NEXT:    blr
239;
240; PWR10BE-LABEL: v4f32_b:
241; PWR10BE:       # %bb.0: # %entry
242; PWR10BE-NEXT:    xscvspdpn f0, v2
243; PWR10BE-NEXT:    xsmulsp f0, f1, f0
244; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
245; PWR10BE-NEXT:    xscvspdpn f1, vs1
246; PWR10BE-NEXT:    xsmulsp f0, f0, f1
247; PWR10BE-NEXT:    xxswapd vs1, v2
248; PWR10BE-NEXT:    xscvspdpn f1, vs1
249; PWR10BE-NEXT:    xsmulsp f0, f0, f1
250; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
251; PWR10BE-NEXT:    xscvspdpn f1, vs1
252; PWR10BE-NEXT:    xsmulsp f1, f0, f1
253; PWR10BE-NEXT:    blr
254entry:
255  %0 = call float @llvm.vector.reduce.fmul.v4f32(float %b, <4 x float> %a)
256  ret float %0
257}
258
259define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
260; PWR9LE-LABEL: v4f32_fast:
261; PWR9LE:       # %bb.0: # %entry
262; PWR9LE-NEXT:    xxswapd v3, v2
263; PWR9LE-NEXT:    xvmulsp vs0, v2, v3
264; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
265; PWR9LE-NEXT:    xvmulsp vs0, vs0, vs1
266; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
267; PWR9LE-NEXT:    xscvspdpn f1, vs0
268; PWR9LE-NEXT:    blr
269;
270; PWR9BE-LABEL: v4f32_fast:
271; PWR9BE:       # %bb.0: # %entry
272; PWR9BE-NEXT:    xxswapd v3, v2
273; PWR9BE-NEXT:    xvmulsp vs0, v2, v3
274; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
275; PWR9BE-NEXT:    xvmulsp vs0, vs0, vs1
276; PWR9BE-NEXT:    xscvspdpn f1, vs0
277; PWR9BE-NEXT:    blr
278;
279; PWR10LE-LABEL: v4f32_fast:
280; PWR10LE:       # %bb.0: # %entry
281; PWR10LE-NEXT:    xxswapd v3, v2
282; PWR10LE-NEXT:    xvmulsp vs0, v2, v3
283; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
284; PWR10LE-NEXT:    xvmulsp vs0, vs0, vs1
285; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
286; PWR10LE-NEXT:    xscvspdpn f1, vs0
287; PWR10LE-NEXT:    blr
288;
289; PWR10BE-LABEL: v4f32_fast:
290; PWR10BE:       # %bb.0: # %entry
291; PWR10BE-NEXT:    xxswapd v3, v2
292; PWR10BE-NEXT:    xvmulsp vs0, v2, v3
293; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
294; PWR10BE-NEXT:    xvmulsp vs0, vs0, vs1
295; PWR10BE-NEXT:    xscvspdpn f1, vs0
296; PWR10BE-NEXT:    blr
297entry:
298  %0 = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a)
299  ret float %0
300}
301
302define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
303; PWR9LE-LABEL: v8f32:
304; PWR9LE:       # %bb.0: # %entry
305; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
306; PWR9LE-NEXT:    xxswapd vs1, v2
307; PWR9LE-NEXT:    xscvspdpn f0, vs0
308; PWR9LE-NEXT:    xscvspdpn f1, vs1
309; PWR9LE-NEXT:    xsmulsp f0, f0, f1
310; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
311; PWR9LE-NEXT:    xscvspdpn f1, vs1
312; PWR9LE-NEXT:    xsmulsp f0, f0, f1
313; PWR9LE-NEXT:    xscvspdpn f1, v2
314; PWR9LE-NEXT:    xsmulsp f0, f0, f1
315; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 3
316; PWR9LE-NEXT:    xscvspdpn f1, vs1
317; PWR9LE-NEXT:    xsmulsp f0, f0, f1
318; PWR9LE-NEXT:    xxswapd vs1, v3
319; PWR9LE-NEXT:    xscvspdpn f1, vs1
320; PWR9LE-NEXT:    xsmulsp f0, f0, f1
321; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 1
322; PWR9LE-NEXT:    xscvspdpn f1, vs1
323; PWR9LE-NEXT:    xsmulsp f0, f0, f1
324; PWR9LE-NEXT:    xscvspdpn f1, v3
325; PWR9LE-NEXT:    xsmulsp f1, f0, f1
326; PWR9LE-NEXT:    blr
327;
328; PWR9BE-LABEL: v8f32:
329; PWR9BE:       # %bb.0: # %entry
330; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
331; PWR9BE-NEXT:    xscvspdpn f0, v2
332; PWR9BE-NEXT:    xscvspdpn f1, vs1
333; PWR9BE-NEXT:    xsmulsp f0, f0, f1
334; PWR9BE-NEXT:    xxswapd vs1, v2
335; PWR9BE-NEXT:    xscvspdpn f1, vs1
336; PWR9BE-NEXT:    xsmulsp f0, f0, f1
337; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
338; PWR9BE-NEXT:    xscvspdpn f1, vs1
339; PWR9BE-NEXT:    xsmulsp f0, f0, f1
340; PWR9BE-NEXT:    xscvspdpn f1, v3
341; PWR9BE-NEXT:    xsmulsp f0, f0, f1
342; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 1
343; PWR9BE-NEXT:    xscvspdpn f1, vs1
344; PWR9BE-NEXT:    xsmulsp f0, f0, f1
345; PWR9BE-NEXT:    xxswapd vs1, v3
346; PWR9BE-NEXT:    xscvspdpn f1, vs1
347; PWR9BE-NEXT:    xsmulsp f0, f0, f1
348; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 3
349; PWR9BE-NEXT:    xscvspdpn f1, vs1
350; PWR9BE-NEXT:    xsmulsp f1, f0, f1
351; PWR9BE-NEXT:    blr
352;
353; PWR10LE-LABEL: v8f32:
354; PWR10LE:       # %bb.0: # %entry
355; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
356; PWR10LE-NEXT:    xxswapd vs1, v2
357; PWR10LE-NEXT:    xscvspdpn f0, vs0
358; PWR10LE-NEXT:    xscvspdpn f1, vs1
359; PWR10LE-NEXT:    xsmulsp f0, f0, f1
360; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
361; PWR10LE-NEXT:    xscvspdpn f1, vs1
362; PWR10LE-NEXT:    xsmulsp f0, f0, f1
363; PWR10LE-NEXT:    xscvspdpn f1, v2
364; PWR10LE-NEXT:    xsmulsp f0, f0, f1
365; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 3
366; PWR10LE-NEXT:    xscvspdpn f1, vs1
367; PWR10LE-NEXT:    xsmulsp f0, f0, f1
368; PWR10LE-NEXT:    xxswapd vs1, v3
369; PWR10LE-NEXT:    xscvspdpn f1, vs1
370; PWR10LE-NEXT:    xsmulsp f0, f0, f1
371; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 1
372; PWR10LE-NEXT:    xscvspdpn f1, vs1
373; PWR10LE-NEXT:    xsmulsp f0, f0, f1
374; PWR10LE-NEXT:    xscvspdpn f1, v3
375; PWR10LE-NEXT:    xsmulsp f1, f0, f1
376; PWR10LE-NEXT:    blr
377;
378; PWR10BE-LABEL: v8f32:
379; PWR10BE:       # %bb.0: # %entry
380; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
381; PWR10BE-NEXT:    xscvspdpn f0, v2
382; PWR10BE-NEXT:    xscvspdpn f1, vs1
383; PWR10BE-NEXT:    xsmulsp f0, f0, f1
384; PWR10BE-NEXT:    xxswapd vs1, v2
385; PWR10BE-NEXT:    xscvspdpn f1, vs1
386; PWR10BE-NEXT:    xsmulsp f0, f0, f1
387; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
388; PWR10BE-NEXT:    xscvspdpn f1, vs1
389; PWR10BE-NEXT:    xsmulsp f0, f0, f1
390; PWR10BE-NEXT:    xscvspdpn f1, v3
391; PWR10BE-NEXT:    xsmulsp f0, f0, f1
392; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 1
393; PWR10BE-NEXT:    xscvspdpn f1, vs1
394; PWR10BE-NEXT:    xsmulsp f0, f0, f1
395; PWR10BE-NEXT:    xxswapd vs1, v3
396; PWR10BE-NEXT:    xscvspdpn f1, vs1
397; PWR10BE-NEXT:    xsmulsp f0, f0, f1
398; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 3
399; PWR10BE-NEXT:    xscvspdpn f1, vs1
400; PWR10BE-NEXT:    xsmulsp f1, f0, f1
401; PWR10BE-NEXT:    blr
402entry:
403  %0 = call float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a)
404  ret float %0
405}
406
407define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 {
408; PWR9LE-LABEL: v8f32_b:
409; PWR9LE:       # %bb.0: # %entry
410; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
411; PWR9LE-NEXT:    xscvspdpn f0, vs0
412; PWR9LE-NEXT:    xsmulsp f0, f1, f0
413; PWR9LE-NEXT:    xxswapd vs1, v2
414; PWR9LE-NEXT:    xscvspdpn f1, vs1
415; PWR9LE-NEXT:    xsmulsp f0, f0, f1
416; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
417; PWR9LE-NEXT:    xscvspdpn f1, vs1
418; PWR9LE-NEXT:    xsmulsp f0, f0, f1
419; PWR9LE-NEXT:    xscvspdpn f1, v2
420; PWR9LE-NEXT:    xsmulsp f0, f0, f1
421; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 3
422; PWR9LE-NEXT:    xscvspdpn f1, vs1
423; PWR9LE-NEXT:    xsmulsp f0, f0, f1
424; PWR9LE-NEXT:    xxswapd vs1, v3
425; PWR9LE-NEXT:    xscvspdpn f1, vs1
426; PWR9LE-NEXT:    xsmulsp f0, f0, f1
427; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 1
428; PWR9LE-NEXT:    xscvspdpn f1, vs1
429; PWR9LE-NEXT:    xsmulsp f0, f0, f1
430; PWR9LE-NEXT:    xscvspdpn f1, v3
431; PWR9LE-NEXT:    xsmulsp f1, f0, f1
432; PWR9LE-NEXT:    blr
433;
434; PWR9BE-LABEL: v8f32_b:
435; PWR9BE:       # %bb.0: # %entry
436; PWR9BE-NEXT:    xscvspdpn f0, v2
437; PWR9BE-NEXT:    xsmulsp f0, f1, f0
438; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
439; PWR9BE-NEXT:    xscvspdpn f1, vs1
440; PWR9BE-NEXT:    xsmulsp f0, f0, f1
441; PWR9BE-NEXT:    xxswapd vs1, v2
442; PWR9BE-NEXT:    xscvspdpn f1, vs1
443; PWR9BE-NEXT:    xsmulsp f0, f0, f1
444; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
445; PWR9BE-NEXT:    xscvspdpn f1, vs1
446; PWR9BE-NEXT:    xsmulsp f0, f0, f1
447; PWR9BE-NEXT:    xscvspdpn f1, v3
448; PWR9BE-NEXT:    xsmulsp f0, f0, f1
449; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 1
450; PWR9BE-NEXT:    xscvspdpn f1, vs1
451; PWR9BE-NEXT:    xsmulsp f0, f0, f1
452; PWR9BE-NEXT:    xxswapd vs1, v3
453; PWR9BE-NEXT:    xscvspdpn f1, vs1
454; PWR9BE-NEXT:    xsmulsp f0, f0, f1
455; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 3
456; PWR9BE-NEXT:    xscvspdpn f1, vs1
457; PWR9BE-NEXT:    xsmulsp f1, f0, f1
458; PWR9BE-NEXT:    blr
459;
460; PWR10LE-LABEL: v8f32_b:
461; PWR10LE:       # %bb.0: # %entry
462; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
463; PWR10LE-NEXT:    xscvspdpn f0, vs0
464; PWR10LE-NEXT:    xsmulsp f0, f1, f0
465; PWR10LE-NEXT:    xxswapd vs1, v2
466; PWR10LE-NEXT:    xscvspdpn f1, vs1
467; PWR10LE-NEXT:    xsmulsp f0, f0, f1
468; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
469; PWR10LE-NEXT:    xscvspdpn f1, vs1
470; PWR10LE-NEXT:    xsmulsp f0, f0, f1
471; PWR10LE-NEXT:    xscvspdpn f1, v2
472; PWR10LE-NEXT:    xsmulsp f0, f0, f1
473; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 3
474; PWR10LE-NEXT:    xscvspdpn f1, vs1
475; PWR10LE-NEXT:    xsmulsp f0, f0, f1
476; PWR10LE-NEXT:    xxswapd vs1, v3
477; PWR10LE-NEXT:    xscvspdpn f1, vs1
478; PWR10LE-NEXT:    xsmulsp f0, f0, f1
479; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 1
480; PWR10LE-NEXT:    xscvspdpn f1, vs1
481; PWR10LE-NEXT:    xsmulsp f0, f0, f1
482; PWR10LE-NEXT:    xscvspdpn f1, v3
483; PWR10LE-NEXT:    xsmulsp f1, f0, f1
484; PWR10LE-NEXT:    blr
485;
486; PWR10BE-LABEL: v8f32_b:
487; PWR10BE:       # %bb.0: # %entry
488; PWR10BE-NEXT:    xscvspdpn f0, v2
489; PWR10BE-NEXT:    xsmulsp f0, f1, f0
490; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
491; PWR10BE-NEXT:    xscvspdpn f1, vs1
492; PWR10BE-NEXT:    xsmulsp f0, f0, f1
493; PWR10BE-NEXT:    xxswapd vs1, v2
494; PWR10BE-NEXT:    xscvspdpn f1, vs1
495; PWR10BE-NEXT:    xsmulsp f0, f0, f1
496; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
497; PWR10BE-NEXT:    xscvspdpn f1, vs1
498; PWR10BE-NEXT:    xsmulsp f0, f0, f1
499; PWR10BE-NEXT:    xscvspdpn f1, v3
500; PWR10BE-NEXT:    xsmulsp f0, f0, f1
501; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 1
502; PWR10BE-NEXT:    xscvspdpn f1, vs1
503; PWR10BE-NEXT:    xsmulsp f0, f0, f1
504; PWR10BE-NEXT:    xxswapd vs1, v3
505; PWR10BE-NEXT:    xscvspdpn f1, vs1
506; PWR10BE-NEXT:    xsmulsp f0, f0, f1
507; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 3
508; PWR10BE-NEXT:    xscvspdpn f1, vs1
509; PWR10BE-NEXT:    xsmulsp f1, f0, f1
510; PWR10BE-NEXT:    blr
511entry:
512  %0 = call float @llvm.vector.reduce.fmul.v8f32(float %b, <8 x float> %a)
513  ret float %0
514}
515
516define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
517; PWR9LE-LABEL: v8f32_fast:
518; PWR9LE:       # %bb.0: # %entry
519; PWR9LE-NEXT:    xvmulsp vs0, v2, v3
520; PWR9LE-NEXT:    xxswapd v2, vs0
521; PWR9LE-NEXT:    xvmulsp vs0, vs0, v2
522; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
523; PWR9LE-NEXT:    xvmulsp vs0, vs0, vs1
524; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
525; PWR9LE-NEXT:    xscvspdpn f1, vs0
526; PWR9LE-NEXT:    blr
527;
528; PWR9BE-LABEL: v8f32_fast:
529; PWR9BE:       # %bb.0: # %entry
530; PWR9BE-NEXT:    xvmulsp vs0, v2, v3
531; PWR9BE-NEXT:    xxswapd v2, vs0
532; PWR9BE-NEXT:    xvmulsp vs0, vs0, v2
533; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
534; PWR9BE-NEXT:    xvmulsp vs0, vs0, vs1
535; PWR9BE-NEXT:    xscvspdpn f1, vs0
536; PWR9BE-NEXT:    blr
537;
538; PWR10LE-LABEL: v8f32_fast:
539; PWR10LE:       # %bb.0: # %entry
540; PWR10LE-NEXT:    xvmulsp vs0, v2, v3
541; PWR10LE-NEXT:    xxswapd v2, vs0
542; PWR10LE-NEXT:    xvmulsp vs0, vs0, v2
543; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
544; PWR10LE-NEXT:    xvmulsp vs0, vs0, vs1
545; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
546; PWR10LE-NEXT:    xscvspdpn f1, vs0
547; PWR10LE-NEXT:    blr
548;
549; PWR10BE-LABEL: v8f32_fast:
550; PWR10BE:       # %bb.0: # %entry
551; PWR10BE-NEXT:    xvmulsp vs0, v2, v3
552; PWR10BE-NEXT:    xxswapd v2, vs0
553; PWR10BE-NEXT:    xvmulsp vs0, vs0, v2
554; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
555; PWR10BE-NEXT:    xvmulsp vs0, vs0, vs1
556; PWR10BE-NEXT:    xscvspdpn f1, vs0
557; PWR10BE-NEXT:    blr
558entry:
559  %0 = call fast float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a)
560  ret float %0
561}
562
563define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
564; PWR9LE-LABEL: v16f32:
565; PWR9LE:       # %bb.0: # %entry
566; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
567; PWR9LE-NEXT:    xxswapd vs1, v2
568; PWR9LE-NEXT:    xscvspdpn f0, vs0
569; PWR9LE-NEXT:    xscvspdpn f1, vs1
570; PWR9LE-NEXT:    xsmulsp f0, f0, f1
571; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
572; PWR9LE-NEXT:    xscvspdpn f1, vs1
573; PWR9LE-NEXT:    xsmulsp f0, f0, f1
574; PWR9LE-NEXT:    xscvspdpn f1, v2
575; PWR9LE-NEXT:    xsmulsp f0, f0, f1
576; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 3
577; PWR9LE-NEXT:    xscvspdpn f1, vs1
578; PWR9LE-NEXT:    xsmulsp f0, f0, f1
579; PWR9LE-NEXT:    xxswapd vs1, v3
580; PWR9LE-NEXT:    xscvspdpn f1, vs1
581; PWR9LE-NEXT:    xsmulsp f0, f0, f1
582; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 1
583; PWR9LE-NEXT:    xscvspdpn f1, vs1
584; PWR9LE-NEXT:    xsmulsp f0, f0, f1
585; PWR9LE-NEXT:    xscvspdpn f1, v3
586; PWR9LE-NEXT:    xsmulsp f0, f0, f1
587; PWR9LE-NEXT:    xxsldwi vs1, v4, v4, 3
588; PWR9LE-NEXT:    xscvspdpn f1, vs1
589; PWR9LE-NEXT:    xsmulsp f0, f0, f1
590; PWR9LE-NEXT:    xxswapd vs1, v4
591; PWR9LE-NEXT:    xscvspdpn f1, vs1
592; PWR9LE-NEXT:    xsmulsp f0, f0, f1
593; PWR9LE-NEXT:    xxsldwi vs1, v4, v4, 1
594; PWR9LE-NEXT:    xscvspdpn f1, vs1
595; PWR9LE-NEXT:    xsmulsp f0, f0, f1
596; PWR9LE-NEXT:    xscvspdpn f1, v4
597; PWR9LE-NEXT:    xsmulsp f0, f0, f1
598; PWR9LE-NEXT:    xxsldwi vs1, v5, v5, 3
599; PWR9LE-NEXT:    xscvspdpn f1, vs1
600; PWR9LE-NEXT:    xsmulsp f0, f0, f1
601; PWR9LE-NEXT:    xxswapd vs1, v5
602; PWR9LE-NEXT:    xscvspdpn f1, vs1
603; PWR9LE-NEXT:    xsmulsp f0, f0, f1
604; PWR9LE-NEXT:    xxsldwi vs1, v5, v5, 1
605; PWR9LE-NEXT:    xscvspdpn f1, vs1
606; PWR9LE-NEXT:    xsmulsp f0, f0, f1
607; PWR9LE-NEXT:    xscvspdpn f1, v5
608; PWR9LE-NEXT:    xsmulsp f1, f0, f1
609; PWR9LE-NEXT:    blr
610;
611; PWR9BE-LABEL: v16f32:
612; PWR9BE:       # %bb.0: # %entry
613; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
614; PWR9BE-NEXT:    xscvspdpn f0, v2
615; PWR9BE-NEXT:    xscvspdpn f1, vs1
616; PWR9BE-NEXT:    xsmulsp f0, f0, f1
617; PWR9BE-NEXT:    xxswapd vs1, v2
618; PWR9BE-NEXT:    xscvspdpn f1, vs1
619; PWR9BE-NEXT:    xsmulsp f0, f0, f1
620; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
621; PWR9BE-NEXT:    xscvspdpn f1, vs1
622; PWR9BE-NEXT:    xsmulsp f0, f0, f1
623; PWR9BE-NEXT:    xscvspdpn f1, v3
624; PWR9BE-NEXT:    xsmulsp f0, f0, f1
625; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 1
626; PWR9BE-NEXT:    xscvspdpn f1, vs1
627; PWR9BE-NEXT:    xsmulsp f0, f0, f1
628; PWR9BE-NEXT:    xxswapd vs1, v3
629; PWR9BE-NEXT:    xscvspdpn f1, vs1
630; PWR9BE-NEXT:    xsmulsp f0, f0, f1
631; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 3
632; PWR9BE-NEXT:    xscvspdpn f1, vs1
633; PWR9BE-NEXT:    xsmulsp f0, f0, f1
634; PWR9BE-NEXT:    xscvspdpn f1, v4
635; PWR9BE-NEXT:    xsmulsp f0, f0, f1
636; PWR9BE-NEXT:    xxsldwi vs1, v4, v4, 1
637; PWR9BE-NEXT:    xscvspdpn f1, vs1
638; PWR9BE-NEXT:    xsmulsp f0, f0, f1
639; PWR9BE-NEXT:    xxswapd vs1, v4
640; PWR9BE-NEXT:    xscvspdpn f1, vs1
641; PWR9BE-NEXT:    xsmulsp f0, f0, f1
642; PWR9BE-NEXT:    xxsldwi vs1, v4, v4, 3
643; PWR9BE-NEXT:    xscvspdpn f1, vs1
644; PWR9BE-NEXT:    xsmulsp f0, f0, f1
645; PWR9BE-NEXT:    xscvspdpn f1, v5
646; PWR9BE-NEXT:    xsmulsp f0, f0, f1
647; PWR9BE-NEXT:    xxsldwi vs1, v5, v5, 1
648; PWR9BE-NEXT:    xscvspdpn f1, vs1
649; PWR9BE-NEXT:    xsmulsp f0, f0, f1
650; PWR9BE-NEXT:    xxswapd vs1, v5
651; PWR9BE-NEXT:    xscvspdpn f1, vs1
652; PWR9BE-NEXT:    xsmulsp f0, f0, f1
653; PWR9BE-NEXT:    xxsldwi vs1, v5, v5, 3
654; PWR9BE-NEXT:    xscvspdpn f1, vs1
655; PWR9BE-NEXT:    xsmulsp f1, f0, f1
656; PWR9BE-NEXT:    blr
657;
658; PWR10LE-LABEL: v16f32:
659; PWR10LE:       # %bb.0: # %entry
660; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
661; PWR10LE-NEXT:    xxswapd vs1, v2
662; PWR10LE-NEXT:    xscvspdpn f0, vs0
663; PWR10LE-NEXT:    xscvspdpn f1, vs1
664; PWR10LE-NEXT:    xsmulsp f0, f0, f1
665; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
666; PWR10LE-NEXT:    xscvspdpn f1, vs1
667; PWR10LE-NEXT:    xsmulsp f0, f0, f1
668; PWR10LE-NEXT:    xscvspdpn f1, v2
669; PWR10LE-NEXT:    xsmulsp f0, f0, f1
670; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 3
671; PWR10LE-NEXT:    xscvspdpn f1, vs1
672; PWR10LE-NEXT:    xsmulsp f0, f0, f1
673; PWR10LE-NEXT:    xxswapd vs1, v3
674; PWR10LE-NEXT:    xscvspdpn f1, vs1
675; PWR10LE-NEXT:    xsmulsp f0, f0, f1
676; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 1
677; PWR10LE-NEXT:    xscvspdpn f1, vs1
678; PWR10LE-NEXT:    xsmulsp f0, f0, f1
679; PWR10LE-NEXT:    xscvspdpn f1, v3
680; PWR10LE-NEXT:    xsmulsp f0, f0, f1
681; PWR10LE-NEXT:    xxsldwi vs1, v4, v4, 3
682; PWR10LE-NEXT:    xscvspdpn f1, vs1
683; PWR10LE-NEXT:    xsmulsp f0, f0, f1
684; PWR10LE-NEXT:    xxswapd vs1, v4
685; PWR10LE-NEXT:    xscvspdpn f1, vs1
686; PWR10LE-NEXT:    xsmulsp f0, f0, f1
687; PWR10LE-NEXT:    xxsldwi vs1, v4, v4, 1
688; PWR10LE-NEXT:    xscvspdpn f1, vs1
689; PWR10LE-NEXT:    xsmulsp f0, f0, f1
690; PWR10LE-NEXT:    xscvspdpn f1, v4
691; PWR10LE-NEXT:    xsmulsp f0, f0, f1
692; PWR10LE-NEXT:    xxsldwi vs1, v5, v5, 3
693; PWR10LE-NEXT:    xscvspdpn f1, vs1
694; PWR10LE-NEXT:    xsmulsp f0, f0, f1
695; PWR10LE-NEXT:    xxswapd vs1, v5
696; PWR10LE-NEXT:    xscvspdpn f1, vs1
697; PWR10LE-NEXT:    xsmulsp f0, f0, f1
698; PWR10LE-NEXT:    xxsldwi vs1, v5, v5, 1
699; PWR10LE-NEXT:    xscvspdpn f1, vs1
700; PWR10LE-NEXT:    xsmulsp f0, f0, f1
701; PWR10LE-NEXT:    xscvspdpn f1, v5
702; PWR10LE-NEXT:    xsmulsp f1, f0, f1
703; PWR10LE-NEXT:    blr
704;
705; PWR10BE-LABEL: v16f32:
706; PWR10BE:       # %bb.0: # %entry
707; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
708; PWR10BE-NEXT:    xscvspdpn f0, v2
709; PWR10BE-NEXT:    xscvspdpn f1, vs1
710; PWR10BE-NEXT:    xsmulsp f0, f0, f1
711; PWR10BE-NEXT:    xxswapd vs1, v2
712; PWR10BE-NEXT:    xscvspdpn f1, vs1
713; PWR10BE-NEXT:    xsmulsp f0, f0, f1
714; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
715; PWR10BE-NEXT:    xscvspdpn f1, vs1
716; PWR10BE-NEXT:    xsmulsp f0, f0, f1
717; PWR10BE-NEXT:    xscvspdpn f1, v3
718; PWR10BE-NEXT:    xsmulsp f0, f0, f1
719; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 1
720; PWR10BE-NEXT:    xscvspdpn f1, vs1
721; PWR10BE-NEXT:    xsmulsp f0, f0, f1
722; PWR10BE-NEXT:    xxswapd vs1, v3
723; PWR10BE-NEXT:    xscvspdpn f1, vs1
724; PWR10BE-NEXT:    xsmulsp f0, f0, f1
725; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 3
726; PWR10BE-NEXT:    xscvspdpn f1, vs1
727; PWR10BE-NEXT:    xsmulsp f0, f0, f1
728; PWR10BE-NEXT:    xscvspdpn f1, v4
729; PWR10BE-NEXT:    xsmulsp f0, f0, f1
730; PWR10BE-NEXT:    xxsldwi vs1, v4, v4, 1
731; PWR10BE-NEXT:    xscvspdpn f1, vs1
732; PWR10BE-NEXT:    xsmulsp f0, f0, f1
733; PWR10BE-NEXT:    xxswapd vs1, v4
734; PWR10BE-NEXT:    xscvspdpn f1, vs1
735; PWR10BE-NEXT:    xsmulsp f0, f0, f1
736; PWR10BE-NEXT:    xxsldwi vs1, v4, v4, 3
737; PWR10BE-NEXT:    xscvspdpn f1, vs1
738; PWR10BE-NEXT:    xsmulsp f0, f0, f1
739; PWR10BE-NEXT:    xscvspdpn f1, v5
740; PWR10BE-NEXT:    xsmulsp f0, f0, f1
741; PWR10BE-NEXT:    xxsldwi vs1, v5, v5, 1
742; PWR10BE-NEXT:    xscvspdpn f1, vs1
743; PWR10BE-NEXT:    xsmulsp f0, f0, f1
744; PWR10BE-NEXT:    xxswapd vs1, v5
745; PWR10BE-NEXT:    xscvspdpn f1, vs1
746; PWR10BE-NEXT:    xsmulsp f0, f0, f1
747; PWR10BE-NEXT:    xxsldwi vs1, v5, v5, 3
748; PWR10BE-NEXT:    xscvspdpn f1, vs1
749; PWR10BE-NEXT:    xsmulsp f1, f0, f1
750; PWR10BE-NEXT:    blr
751entry:
752  %0 = call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a)
753  ret float %0
754}
755
756define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 {
757; PWR9LE-LABEL: v16f32_b:
758; PWR9LE:       # %bb.0: # %entry
759; PWR9LE-NEXT:    xxsldwi vs0, v2, v2, 3
760; PWR9LE-NEXT:    xscvspdpn f0, vs0
761; PWR9LE-NEXT:    xsmulsp f0, f1, f0
762; PWR9LE-NEXT:    xxswapd vs1, v2
763; PWR9LE-NEXT:    xscvspdpn f1, vs1
764; PWR9LE-NEXT:    xsmulsp f0, f0, f1
765; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
766; PWR9LE-NEXT:    xscvspdpn f1, vs1
767; PWR9LE-NEXT:    xsmulsp f0, f0, f1
768; PWR9LE-NEXT:    xscvspdpn f1, v2
769; PWR9LE-NEXT:    xsmulsp f0, f0, f1
770; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 3
771; PWR9LE-NEXT:    xscvspdpn f1, vs1
772; PWR9LE-NEXT:    xsmulsp f0, f0, f1
773; PWR9LE-NEXT:    xxswapd vs1, v3
774; PWR9LE-NEXT:    xscvspdpn f1, vs1
775; PWR9LE-NEXT:    xsmulsp f0, f0, f1
776; PWR9LE-NEXT:    xxsldwi vs1, v3, v3, 1
777; PWR9LE-NEXT:    xscvspdpn f1, vs1
778; PWR9LE-NEXT:    xsmulsp f0, f0, f1
779; PWR9LE-NEXT:    xscvspdpn f1, v3
780; PWR9LE-NEXT:    xsmulsp f0, f0, f1
781; PWR9LE-NEXT:    xxsldwi vs1, v4, v4, 3
782; PWR9LE-NEXT:    xscvspdpn f1, vs1
783; PWR9LE-NEXT:    xsmulsp f0, f0, f1
784; PWR9LE-NEXT:    xxswapd vs1, v4
785; PWR9LE-NEXT:    xscvspdpn f1, vs1
786; PWR9LE-NEXT:    xsmulsp f0, f0, f1
787; PWR9LE-NEXT:    xxsldwi vs1, v4, v4, 1
788; PWR9LE-NEXT:    xscvspdpn f1, vs1
789; PWR9LE-NEXT:    xsmulsp f0, f0, f1
790; PWR9LE-NEXT:    xscvspdpn f1, v4
791; PWR9LE-NEXT:    xsmulsp f0, f0, f1
792; PWR9LE-NEXT:    xxsldwi vs1, v5, v5, 3
793; PWR9LE-NEXT:    xscvspdpn f1, vs1
794; PWR9LE-NEXT:    xsmulsp f0, f0, f1
795; PWR9LE-NEXT:    xxswapd vs1, v5
796; PWR9LE-NEXT:    xscvspdpn f1, vs1
797; PWR9LE-NEXT:    xsmulsp f0, f0, f1
798; PWR9LE-NEXT:    xxsldwi vs1, v5, v5, 1
799; PWR9LE-NEXT:    xscvspdpn f1, vs1
800; PWR9LE-NEXT:    xsmulsp f0, f0, f1
801; PWR9LE-NEXT:    xscvspdpn f1, v5
802; PWR9LE-NEXT:    xsmulsp f1, f0, f1
803; PWR9LE-NEXT:    blr
804;
805; PWR9BE-LABEL: v16f32_b:
806; PWR9BE:       # %bb.0: # %entry
807; PWR9BE-NEXT:    xscvspdpn f0, v2
808; PWR9BE-NEXT:    xsmulsp f0, f1, f0
809; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
810; PWR9BE-NEXT:    xscvspdpn f1, vs1
811; PWR9BE-NEXT:    xsmulsp f0, f0, f1
812; PWR9BE-NEXT:    xxswapd vs1, v2
813; PWR9BE-NEXT:    xscvspdpn f1, vs1
814; PWR9BE-NEXT:    xsmulsp f0, f0, f1
815; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 3
816; PWR9BE-NEXT:    xscvspdpn f1, vs1
817; PWR9BE-NEXT:    xsmulsp f0, f0, f1
818; PWR9BE-NEXT:    xscvspdpn f1, v3
819; PWR9BE-NEXT:    xsmulsp f0, f0, f1
820; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 1
821; PWR9BE-NEXT:    xscvspdpn f1, vs1
822; PWR9BE-NEXT:    xsmulsp f0, f0, f1
823; PWR9BE-NEXT:    xxswapd vs1, v3
824; PWR9BE-NEXT:    xscvspdpn f1, vs1
825; PWR9BE-NEXT:    xsmulsp f0, f0, f1
826; PWR9BE-NEXT:    xxsldwi vs1, v3, v3, 3
827; PWR9BE-NEXT:    xscvspdpn f1, vs1
828; PWR9BE-NEXT:    xsmulsp f0, f0, f1
829; PWR9BE-NEXT:    xscvspdpn f1, v4
830; PWR9BE-NEXT:    xsmulsp f0, f0, f1
831; PWR9BE-NEXT:    xxsldwi vs1, v4, v4, 1
832; PWR9BE-NEXT:    xscvspdpn f1, vs1
833; PWR9BE-NEXT:    xsmulsp f0, f0, f1
834; PWR9BE-NEXT:    xxswapd vs1, v4
835; PWR9BE-NEXT:    xscvspdpn f1, vs1
836; PWR9BE-NEXT:    xsmulsp f0, f0, f1
837; PWR9BE-NEXT:    xxsldwi vs1, v4, v4, 3
838; PWR9BE-NEXT:    xscvspdpn f1, vs1
839; PWR9BE-NEXT:    xsmulsp f0, f0, f1
840; PWR9BE-NEXT:    xscvspdpn f1, v5
841; PWR9BE-NEXT:    xsmulsp f0, f0, f1
842; PWR9BE-NEXT:    xxsldwi vs1, v5, v5, 1
843; PWR9BE-NEXT:    xscvspdpn f1, vs1
844; PWR9BE-NEXT:    xsmulsp f0, f0, f1
845; PWR9BE-NEXT:    xxswapd vs1, v5
846; PWR9BE-NEXT:    xscvspdpn f1, vs1
847; PWR9BE-NEXT:    xsmulsp f0, f0, f1
848; PWR9BE-NEXT:    xxsldwi vs1, v5, v5, 3
849; PWR9BE-NEXT:    xscvspdpn f1, vs1
850; PWR9BE-NEXT:    xsmulsp f1, f0, f1
851; PWR9BE-NEXT:    blr
852;
853; PWR10LE-LABEL: v16f32_b:
854; PWR10LE:       # %bb.0: # %entry
855; PWR10LE-NEXT:    xxsldwi vs0, v2, v2, 3
856; PWR10LE-NEXT:    xscvspdpn f0, vs0
857; PWR10LE-NEXT:    xsmulsp f0, f1, f0
858; PWR10LE-NEXT:    xxswapd vs1, v2
859; PWR10LE-NEXT:    xscvspdpn f1, vs1
860; PWR10LE-NEXT:    xsmulsp f0, f0, f1
861; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
862; PWR10LE-NEXT:    xscvspdpn f1, vs1
863; PWR10LE-NEXT:    xsmulsp f0, f0, f1
864; PWR10LE-NEXT:    xscvspdpn f1, v2
865; PWR10LE-NEXT:    xsmulsp f0, f0, f1
866; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 3
867; PWR10LE-NEXT:    xscvspdpn f1, vs1
868; PWR10LE-NEXT:    xsmulsp f0, f0, f1
869; PWR10LE-NEXT:    xxswapd vs1, v3
870; PWR10LE-NEXT:    xscvspdpn f1, vs1
871; PWR10LE-NEXT:    xsmulsp f0, f0, f1
872; PWR10LE-NEXT:    xxsldwi vs1, v3, v3, 1
873; PWR10LE-NEXT:    xscvspdpn f1, vs1
874; PWR10LE-NEXT:    xsmulsp f0, f0, f1
875; PWR10LE-NEXT:    xscvspdpn f1, v3
876; PWR10LE-NEXT:    xsmulsp f0, f0, f1
877; PWR10LE-NEXT:    xxsldwi vs1, v4, v4, 3
878; PWR10LE-NEXT:    xscvspdpn f1, vs1
879; PWR10LE-NEXT:    xsmulsp f0, f0, f1
880; PWR10LE-NEXT:    xxswapd vs1, v4
881; PWR10LE-NEXT:    xscvspdpn f1, vs1
882; PWR10LE-NEXT:    xsmulsp f0, f0, f1
883; PWR10LE-NEXT:    xxsldwi vs1, v4, v4, 1
884; PWR10LE-NEXT:    xscvspdpn f1, vs1
885; PWR10LE-NEXT:    xsmulsp f0, f0, f1
886; PWR10LE-NEXT:    xscvspdpn f1, v4
887; PWR10LE-NEXT:    xsmulsp f0, f0, f1
888; PWR10LE-NEXT:    xxsldwi vs1, v5, v5, 3
889; PWR10LE-NEXT:    xscvspdpn f1, vs1
890; PWR10LE-NEXT:    xsmulsp f0, f0, f1
891; PWR10LE-NEXT:    xxswapd vs1, v5
892; PWR10LE-NEXT:    xscvspdpn f1, vs1
893; PWR10LE-NEXT:    xsmulsp f0, f0, f1
894; PWR10LE-NEXT:    xxsldwi vs1, v5, v5, 1
895; PWR10LE-NEXT:    xscvspdpn f1, vs1
896; PWR10LE-NEXT:    xsmulsp f0, f0, f1
897; PWR10LE-NEXT:    xscvspdpn f1, v5
898; PWR10LE-NEXT:    xsmulsp f1, f0, f1
899; PWR10LE-NEXT:    blr
900;
901; PWR10BE-LABEL: v16f32_b:
902; PWR10BE:       # %bb.0: # %entry
903; PWR10BE-NEXT:    xscvspdpn f0, v2
904; PWR10BE-NEXT:    xsmulsp f0, f1, f0
905; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
906; PWR10BE-NEXT:    xscvspdpn f1, vs1
907; PWR10BE-NEXT:    xsmulsp f0, f0, f1
908; PWR10BE-NEXT:    xxswapd vs1, v2
909; PWR10BE-NEXT:    xscvspdpn f1, vs1
910; PWR10BE-NEXT:    xsmulsp f0, f0, f1
911; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 3
912; PWR10BE-NEXT:    xscvspdpn f1, vs1
913; PWR10BE-NEXT:    xsmulsp f0, f0, f1
914; PWR10BE-NEXT:    xscvspdpn f1, v3
915; PWR10BE-NEXT:    xsmulsp f0, f0, f1
916; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 1
917; PWR10BE-NEXT:    xscvspdpn f1, vs1
918; PWR10BE-NEXT:    xsmulsp f0, f0, f1
919; PWR10BE-NEXT:    xxswapd vs1, v3
920; PWR10BE-NEXT:    xscvspdpn f1, vs1
921; PWR10BE-NEXT:    xsmulsp f0, f0, f1
922; PWR10BE-NEXT:    xxsldwi vs1, v3, v3, 3
923; PWR10BE-NEXT:    xscvspdpn f1, vs1
924; PWR10BE-NEXT:    xsmulsp f0, f0, f1
925; PWR10BE-NEXT:    xscvspdpn f1, v4
926; PWR10BE-NEXT:    xsmulsp f0, f0, f1
927; PWR10BE-NEXT:    xxsldwi vs1, v4, v4, 1
928; PWR10BE-NEXT:    xscvspdpn f1, vs1
929; PWR10BE-NEXT:    xsmulsp f0, f0, f1
930; PWR10BE-NEXT:    xxswapd vs1, v4
931; PWR10BE-NEXT:    xscvspdpn f1, vs1
932; PWR10BE-NEXT:    xsmulsp f0, f0, f1
933; PWR10BE-NEXT:    xxsldwi vs1, v4, v4, 3
934; PWR10BE-NEXT:    xscvspdpn f1, vs1
935; PWR10BE-NEXT:    xsmulsp f0, f0, f1
936; PWR10BE-NEXT:    xscvspdpn f1, v5
937; PWR10BE-NEXT:    xsmulsp f0, f0, f1
938; PWR10BE-NEXT:    xxsldwi vs1, v5, v5, 1
939; PWR10BE-NEXT:    xscvspdpn f1, vs1
940; PWR10BE-NEXT:    xsmulsp f0, f0, f1
941; PWR10BE-NEXT:    xxswapd vs1, v5
942; PWR10BE-NEXT:    xscvspdpn f1, vs1
943; PWR10BE-NEXT:    xsmulsp f0, f0, f1
944; PWR10BE-NEXT:    xxsldwi vs1, v5, v5, 3
945; PWR10BE-NEXT:    xscvspdpn f1, vs1
946; PWR10BE-NEXT:    xsmulsp f1, f0, f1
947; PWR10BE-NEXT:    blr
948entry:
949  %0 = call float @llvm.vector.reduce.fmul.v16f32(float %b, <16 x float> %a)
950  ret float %0
951}
952
953define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
954; PWR9LE-LABEL: v16f32_fast:
955; PWR9LE:       # %bb.0: # %entry
956; PWR9LE-NEXT:    xvmulsp vs0, v3, v5
957; PWR9LE-NEXT:    xvmulsp vs1, v2, v4
958; PWR9LE-NEXT:    xvmulsp vs0, vs1, vs0
959; PWR9LE-NEXT:    xxswapd v2, vs0
960; PWR9LE-NEXT:    xvmulsp vs0, vs0, v2
961; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
962; PWR9LE-NEXT:    xvmulsp vs0, vs0, vs1
963; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
964; PWR9LE-NEXT:    xscvspdpn f1, vs0
965; PWR9LE-NEXT:    blr
966;
967; PWR9BE-LABEL: v16f32_fast:
968; PWR9BE:       # %bb.0: # %entry
969; PWR9BE-NEXT:    xvmulsp vs0, v3, v5
970; PWR9BE-NEXT:    xvmulsp vs1, v2, v4
971; PWR9BE-NEXT:    xvmulsp vs0, vs1, vs0
972; PWR9BE-NEXT:    xxswapd v2, vs0
973; PWR9BE-NEXT:    xvmulsp vs0, vs0, v2
974; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
975; PWR9BE-NEXT:    xvmulsp vs0, vs0, vs1
976; PWR9BE-NEXT:    xscvspdpn f1, vs0
977; PWR9BE-NEXT:    blr
978;
979; PWR10LE-LABEL: v16f32_fast:
980; PWR10LE:       # %bb.0: # %entry
981; PWR10LE-NEXT:    xvmulsp vs0, v3, v5
982; PWR10LE-NEXT:    xvmulsp vs1, v2, v4
983; PWR10LE-NEXT:    xvmulsp vs0, vs1, vs0
984; PWR10LE-NEXT:    xxswapd v2, vs0
985; PWR10LE-NEXT:    xvmulsp vs0, vs0, v2
986; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
987; PWR10LE-NEXT:    xvmulsp vs0, vs0, vs1
988; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
989; PWR10LE-NEXT:    xscvspdpn f1, vs0
990; PWR10LE-NEXT:    blr
991;
992; PWR10BE-LABEL: v16f32_fast:
993; PWR10BE:       # %bb.0: # %entry
994; PWR10BE-NEXT:    xvmulsp vs0, v3, v5
995; PWR10BE-NEXT:    xvmulsp vs1, v2, v4
996; PWR10BE-NEXT:    xvmulsp vs0, vs1, vs0
997; PWR10BE-NEXT:    xxswapd v2, vs0
998; PWR10BE-NEXT:    xvmulsp vs0, vs0, v2
999; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
1000; PWR10BE-NEXT:    xvmulsp vs0, vs0, vs1
1001; PWR10BE-NEXT:    xscvspdpn f1, vs0
1002; PWR10BE-NEXT:    blr
1003entry:
1004  %0 = call fast float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a)
1005  ret float %0
1006}
1007
1008declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>) #0
1009declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) #0
1010declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) #0
1011declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>) #0
1012
1013;;
1014;; Vectors of f64
1015;;
1016define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
1017; PWR9LE-LABEL: v2f64:
1018; PWR9LE:       # %bb.0: # %entry
1019; PWR9LE-NEXT:    xxswapd vs0, v2
1020; PWR9LE-NEXT:    xsmuldp f1, f0, v2
1021; PWR9LE-NEXT:    blr
1022;
1023; PWR9BE-LABEL: v2f64:
1024; PWR9BE:       # %bb.0: # %entry
1025; PWR9BE-NEXT:    xxswapd vs0, v2
1026; PWR9BE-NEXT:    xsmuldp f1, v2, f0
1027; PWR9BE-NEXT:    blr
1028;
1029; PWR10LE-LABEL: v2f64:
1030; PWR10LE:       # %bb.0: # %entry
1031; PWR10LE-NEXT:    xxswapd vs0, v2
1032; PWR10LE-NEXT:    xsmuldp f1, f0, v2
1033; PWR10LE-NEXT:    blr
1034;
1035; PWR10BE-LABEL: v2f64:
1036; PWR10BE:       # %bb.0: # %entry
1037; PWR10BE-NEXT:    xxswapd vs0, v2
1038; PWR10BE-NEXT:    xsmuldp f1, v2, f0
1039; PWR10BE-NEXT:    blr
1040entry:
1041  %0 = call double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a)
1042  ret double %0
1043}
1044
1045define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 {
1046; PWR9LE-LABEL: v2f64_b:
1047; PWR9LE:       # %bb.0: # %entry
1048; PWR9LE-NEXT:    xxswapd vs0, v2
1049; PWR9LE-NEXT:    xsmuldp f0, f1, f0
1050; PWR9LE-NEXT:    xsmuldp f1, f0, v2
1051; PWR9LE-NEXT:    blr
1052;
1053; PWR9BE-LABEL: v2f64_b:
1054; PWR9BE:       # %bb.0: # %entry
1055; PWR9BE-NEXT:    xsmuldp f0, f1, v2
1056; PWR9BE-NEXT:    xxswapd vs1, v2
1057; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1058; PWR9BE-NEXT:    blr
1059;
1060; PWR10LE-LABEL: v2f64_b:
1061; PWR10LE:       # %bb.0: # %entry
1062; PWR10LE-NEXT:    xxswapd vs0, v2
1063; PWR10LE-NEXT:    xsmuldp f0, f1, f0
1064; PWR10LE-NEXT:    xsmuldp f1, f0, v2
1065; PWR10LE-NEXT:    blr
1066;
1067; PWR10BE-LABEL: v2f64_b:
1068; PWR10BE:       # %bb.0: # %entry
1069; PWR10BE-NEXT:    xsmuldp f0, f1, v2
1070; PWR10BE-NEXT:    xxswapd vs1, v2
1071; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1072; PWR10BE-NEXT:    blr
1073entry:
1074  %0 = call double @llvm.vector.reduce.fmul.v2f64(double %b, <2 x double> %a)
1075  ret double %0
1076}
1077
1078define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
1079; PWR9LE-LABEL: v2f64_fast:
1080; PWR9LE:       # %bb.0: # %entry
1081; PWR9LE-NEXT:    xxswapd vs0, v2
1082; PWR9LE-NEXT:    xvmuldp vs0, v2, vs0
1083; PWR9LE-NEXT:    xxswapd vs1, vs0
1084; PWR9LE-NEXT:    blr
1085;
1086; PWR9BE-LABEL: v2f64_fast:
1087; PWR9BE:       # %bb.0: # %entry
1088; PWR9BE-NEXT:    xxswapd vs0, v2
1089; PWR9BE-NEXT:    xvmuldp vs1, v2, vs0
1090; PWR9BE-NEXT:    blr
1091;
1092; PWR10LE-LABEL: v2f64_fast:
1093; PWR10LE:       # %bb.0: # %entry
1094; PWR10LE-NEXT:    xxswapd vs0, v2
1095; PWR10LE-NEXT:    xvmuldp vs0, v2, vs0
1096; PWR10LE-NEXT:    xxswapd vs1, vs0
1097; PWR10LE-NEXT:    blr
1098;
1099; PWR10BE-LABEL: v2f64_fast:
1100; PWR10BE:       # %bb.0: # %entry
1101; PWR10BE-NEXT:    xxswapd vs0, v2
1102; PWR10BE-NEXT:    xvmuldp vs1, v2, vs0
1103; PWR10BE-NEXT:    blr
1104entry:
1105  %0 = call fast double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a)
1106  ret double %0
1107}
1108
1109define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
1110; PWR9LE-LABEL: v4f64:
1111; PWR9LE:       # %bb.0: # %entry
1112; PWR9LE-NEXT:    xxswapd vs0, v2
1113; PWR9LE-NEXT:    xxswapd vs1, v3
1114; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1115; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1116; PWR9LE-NEXT:    xsmuldp f1, f0, v3
1117; PWR9LE-NEXT:    blr
1118;
1119; PWR9BE-LABEL: v4f64:
1120; PWR9BE:       # %bb.0: # %entry
1121; PWR9BE-NEXT:    xxswapd vs0, v2
1122; PWR9BE-NEXT:    xxswapd vs1, v3
1123; PWR9BE-NEXT:    xsmuldp f0, v2, f0
1124; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1125; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1126; PWR9BE-NEXT:    blr
1127;
1128; PWR10LE-LABEL: v4f64:
1129; PWR10LE:       # %bb.0: # %entry
1130; PWR10LE-NEXT:    xxswapd vs0, v2
1131; PWR10LE-NEXT:    xxswapd vs1, v3
1132; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1133; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1134; PWR10LE-NEXT:    xsmuldp f1, f0, v3
1135; PWR10LE-NEXT:    blr
1136;
1137; PWR10BE-LABEL: v4f64:
1138; PWR10BE:       # %bb.0: # %entry
1139; PWR10BE-NEXT:    xxswapd vs0, v2
1140; PWR10BE-NEXT:    xxswapd vs1, v3
1141; PWR10BE-NEXT:    xsmuldp f0, v2, f0
1142; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1143; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1144; PWR10BE-NEXT:    blr
1145entry:
1146  %0 = call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a)
1147  ret double %0
1148}
1149
1150define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 {
1151; PWR9LE-LABEL: v4f64_b:
1152; PWR9LE:       # %bb.0: # %entry
1153; PWR9LE-NEXT:    xxswapd vs0, v2
1154; PWR9LE-NEXT:    xsmuldp f0, f1, f0
1155; PWR9LE-NEXT:    xxswapd vs1, v3
1156; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1157; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1158; PWR9LE-NEXT:    xsmuldp f1, f0, v3
1159; PWR9LE-NEXT:    blr
1160;
1161; PWR9BE-LABEL: v4f64_b:
1162; PWR9BE:       # %bb.0: # %entry
1163; PWR9BE-NEXT:    xsmuldp f0, f1, v2
1164; PWR9BE-NEXT:    xxswapd vs1, v2
1165; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1166; PWR9BE-NEXT:    xxswapd vs1, v3
1167; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1168; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1169; PWR9BE-NEXT:    blr
1170;
1171; PWR10LE-LABEL: v4f64_b:
1172; PWR10LE:       # %bb.0: # %entry
1173; PWR10LE-NEXT:    xxswapd vs0, v2
1174; PWR10LE-NEXT:    xsmuldp f0, f1, f0
1175; PWR10LE-NEXT:    xxswapd vs1, v3
1176; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1177; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1178; PWR10LE-NEXT:    xsmuldp f1, f0, v3
1179; PWR10LE-NEXT:    blr
1180;
1181; PWR10BE-LABEL: v4f64_b:
1182; PWR10BE:       # %bb.0: # %entry
1183; PWR10BE-NEXT:    xsmuldp f0, f1, v2
1184; PWR10BE-NEXT:    xxswapd vs1, v2
1185; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1186; PWR10BE-NEXT:    xxswapd vs1, v3
1187; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1188; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1189; PWR10BE-NEXT:    blr
1190entry:
1191  %0 = call double @llvm.vector.reduce.fmul.v4f64(double %b, <4 x double> %a)
1192  ret double %0
1193}
1194
1195define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
1196; PWR9LE-LABEL: v4f64_fast:
1197; PWR9LE:       # %bb.0: # %entry
1198; PWR9LE-NEXT:    xvmuldp vs0, v2, v3
1199; PWR9LE-NEXT:    xxswapd vs1, vs0
1200; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
1201; PWR9LE-NEXT:    xxswapd vs1, vs0
1202; PWR9LE-NEXT:    blr
1203;
1204; PWR9BE-LABEL: v4f64_fast:
1205; PWR9BE:       # %bb.0: # %entry
1206; PWR9BE-NEXT:    xvmuldp vs0, v2, v3
1207; PWR9BE-NEXT:    xxswapd vs1, vs0
1208; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
1209; PWR9BE-NEXT:    blr
1210;
1211; PWR10LE-LABEL: v4f64_fast:
1212; PWR10LE:       # %bb.0: # %entry
1213; PWR10LE-NEXT:    xvmuldp vs0, v2, v3
1214; PWR10LE-NEXT:    xxswapd vs1, vs0
1215; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
1216; PWR10LE-NEXT:    xxswapd vs1, vs0
1217; PWR10LE-NEXT:    blr
1218;
1219; PWR10BE-LABEL: v4f64_fast:
1220; PWR10BE:       # %bb.0: # %entry
1221; PWR10BE-NEXT:    xvmuldp vs0, v2, v3
1222; PWR10BE-NEXT:    xxswapd vs1, vs0
1223; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
1224; PWR10BE-NEXT:    blr
1225entry:
1226  %0 = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a)
1227  ret double %0
1228}
1229
1230define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
1231; PWR9LE-LABEL: v8f64:
1232; PWR9LE:       # %bb.0: # %entry
1233; PWR9LE-NEXT:    xxswapd vs0, v2
1234; PWR9LE-NEXT:    xxswapd vs1, v3
1235; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1236; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1237; PWR9LE-NEXT:    xxswapd vs1, v4
1238; PWR9LE-NEXT:    xsmuldp f0, f0, v3
1239; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1240; PWR9LE-NEXT:    xxswapd vs1, v5
1241; PWR9LE-NEXT:    xsmuldp f0, f0, v4
1242; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1243; PWR9LE-NEXT:    xsmuldp f1, f0, v5
1244; PWR9LE-NEXT:    blr
1245;
1246; PWR9BE-LABEL: v8f64:
1247; PWR9BE:       # %bb.0: # %entry
1248; PWR9BE-NEXT:    xxswapd vs0, v2
1249; PWR9BE-NEXT:    xxswapd vs1, v3
1250; PWR9BE-NEXT:    xsmuldp f0, v2, f0
1251; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1252; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1253; PWR9BE-NEXT:    xxswapd vs1, v4
1254; PWR9BE-NEXT:    xsmuldp f0, f0, v4
1255; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1256; PWR9BE-NEXT:    xxswapd vs1, v5
1257; PWR9BE-NEXT:    xsmuldp f0, f0, v5
1258; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1259; PWR9BE-NEXT:    blr
1260;
1261; PWR10LE-LABEL: v8f64:
1262; PWR10LE:       # %bb.0: # %entry
1263; PWR10LE-NEXT:    xxswapd vs0, v2
1264; PWR10LE-NEXT:    xxswapd vs1, v3
1265; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1266; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1267; PWR10LE-NEXT:    xxswapd vs1, v4
1268; PWR10LE-NEXT:    xsmuldp f0, f0, v3
1269; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1270; PWR10LE-NEXT:    xxswapd vs1, v5
1271; PWR10LE-NEXT:    xsmuldp f0, f0, v4
1272; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1273; PWR10LE-NEXT:    xsmuldp f1, f0, v5
1274; PWR10LE-NEXT:    blr
1275;
1276; PWR10BE-LABEL: v8f64:
1277; PWR10BE:       # %bb.0: # %entry
1278; PWR10BE-NEXT:    xxswapd vs0, v2
1279; PWR10BE-NEXT:    xxswapd vs1, v3
1280; PWR10BE-NEXT:    xsmuldp f0, v2, f0
1281; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1282; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1283; PWR10BE-NEXT:    xxswapd vs1, v4
1284; PWR10BE-NEXT:    xsmuldp f0, f0, v4
1285; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1286; PWR10BE-NEXT:    xxswapd vs1, v5
1287; PWR10BE-NEXT:    xsmuldp f0, f0, v5
1288; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1289; PWR10BE-NEXT:    blr
1290entry:
1291  %0 = call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a)
1292  ret double %0
1293}
1294
1295define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 {
1296; PWR9LE-LABEL: v8f64_b:
1297; PWR9LE:       # %bb.0: # %entry
1298; PWR9LE-NEXT:    xxswapd vs0, v2
1299; PWR9LE-NEXT:    xsmuldp f0, f1, f0
1300; PWR9LE-NEXT:    xxswapd vs1, v3
1301; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1302; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1303; PWR9LE-NEXT:    xxswapd vs1, v4
1304; PWR9LE-NEXT:    xsmuldp f0, f0, v3
1305; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1306; PWR9LE-NEXT:    xxswapd vs1, v5
1307; PWR9LE-NEXT:    xsmuldp f0, f0, v4
1308; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1309; PWR9LE-NEXT:    xsmuldp f1, f0, v5
1310; PWR9LE-NEXT:    blr
1311;
1312; PWR9BE-LABEL: v8f64_b:
1313; PWR9BE:       # %bb.0: # %entry
1314; PWR9BE-NEXT:    xsmuldp f0, f1, v2
1315; PWR9BE-NEXT:    xxswapd vs1, v2
1316; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1317; PWR9BE-NEXT:    xxswapd vs1, v3
1318; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1319; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1320; PWR9BE-NEXT:    xxswapd vs1, v4
1321; PWR9BE-NEXT:    xsmuldp f0, f0, v4
1322; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1323; PWR9BE-NEXT:    xxswapd vs1, v5
1324; PWR9BE-NEXT:    xsmuldp f0, f0, v5
1325; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1326; PWR9BE-NEXT:    blr
1327;
1328; PWR10LE-LABEL: v8f64_b:
1329; PWR10LE:       # %bb.0: # %entry
1330; PWR10LE-NEXT:    xxswapd vs0, v2
1331; PWR10LE-NEXT:    xsmuldp f0, f1, f0
1332; PWR10LE-NEXT:    xxswapd vs1, v3
1333; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1334; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1335; PWR10LE-NEXT:    xxswapd vs1, v4
1336; PWR10LE-NEXT:    xsmuldp f0, f0, v3
1337; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1338; PWR10LE-NEXT:    xxswapd vs1, v5
1339; PWR10LE-NEXT:    xsmuldp f0, f0, v4
1340; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1341; PWR10LE-NEXT:    xsmuldp f1, f0, v5
1342; PWR10LE-NEXT:    blr
1343;
1344; PWR10BE-LABEL: v8f64_b:
1345; PWR10BE:       # %bb.0: # %entry
1346; PWR10BE-NEXT:    xsmuldp f0, f1, v2
1347; PWR10BE-NEXT:    xxswapd vs1, v2
1348; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1349; PWR10BE-NEXT:    xxswapd vs1, v3
1350; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1351; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1352; PWR10BE-NEXT:    xxswapd vs1, v4
1353; PWR10BE-NEXT:    xsmuldp f0, f0, v4
1354; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1355; PWR10BE-NEXT:    xxswapd vs1, v5
1356; PWR10BE-NEXT:    xsmuldp f0, f0, v5
1357; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1358; PWR10BE-NEXT:    blr
1359entry:
1360  %0 = call double @llvm.vector.reduce.fmul.v8f64(double %b, <8 x double> %a)
1361  ret double %0
1362}
1363
1364define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
1365; PWR9LE-LABEL: v8f64_fast:
1366; PWR9LE:       # %bb.0: # %entry
1367; PWR9LE-NEXT:    xvmuldp vs0, v3, v5
1368; PWR9LE-NEXT:    xvmuldp vs1, v2, v4
1369; PWR9LE-NEXT:    xvmuldp vs0, vs1, vs0
1370; PWR9LE-NEXT:    xxswapd vs1, vs0
1371; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
1372; PWR9LE-NEXT:    xxswapd vs1, vs0
1373; PWR9LE-NEXT:    blr
1374;
1375; PWR9BE-LABEL: v8f64_fast:
1376; PWR9BE:       # %bb.0: # %entry
1377; PWR9BE-NEXT:    xvmuldp vs0, v3, v5
1378; PWR9BE-NEXT:    xvmuldp vs1, v2, v4
1379; PWR9BE-NEXT:    xvmuldp vs0, vs1, vs0
1380; PWR9BE-NEXT:    xxswapd vs1, vs0
1381; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
1382; PWR9BE-NEXT:    blr
1383;
1384; PWR10LE-LABEL: v8f64_fast:
1385; PWR10LE:       # %bb.0: # %entry
1386; PWR10LE-NEXT:    xvmuldp vs0, v3, v5
1387; PWR10LE-NEXT:    xvmuldp vs1, v2, v4
1388; PWR10LE-NEXT:    xvmuldp vs0, vs1, vs0
1389; PWR10LE-NEXT:    xxswapd vs1, vs0
1390; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
1391; PWR10LE-NEXT:    xxswapd vs1, vs0
1392; PWR10LE-NEXT:    blr
1393;
1394; PWR10BE-LABEL: v8f64_fast:
1395; PWR10BE:       # %bb.0: # %entry
1396; PWR10BE-NEXT:    xvmuldp vs0, v3, v5
1397; PWR10BE-NEXT:    xvmuldp vs1, v2, v4
1398; PWR10BE-NEXT:    xvmuldp vs0, vs1, vs0
1399; PWR10BE-NEXT:    xxswapd vs1, vs0
1400; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
1401; PWR10BE-NEXT:    blr
1402entry:
1403  %0 = call fast double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a)
1404  ret double %0
1405}
1406
1407define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
1408; PWR9LE-LABEL: v16f64:
1409; PWR9LE:       # %bb.0: # %entry
1410; PWR9LE-NEXT:    xxswapd vs0, v2
1411; PWR9LE-NEXT:    xxswapd vs1, v3
1412; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1413; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1414; PWR9LE-NEXT:    xxswapd vs1, v4
1415; PWR9LE-NEXT:    xsmuldp f0, f0, v3
1416; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1417; PWR9LE-NEXT:    xxswapd vs1, v5
1418; PWR9LE-NEXT:    xsmuldp f0, f0, v4
1419; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1420; PWR9LE-NEXT:    xxswapd vs1, v6
1421; PWR9LE-NEXT:    xsmuldp f0, f0, v5
1422; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1423; PWR9LE-NEXT:    xxswapd vs1, v7
1424; PWR9LE-NEXT:    xsmuldp f0, f0, v6
1425; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1426; PWR9LE-NEXT:    xxswapd vs1, v8
1427; PWR9LE-NEXT:    xsmuldp f0, f0, v7
1428; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1429; PWR9LE-NEXT:    xxswapd vs1, v9
1430; PWR9LE-NEXT:    xsmuldp f0, f0, v8
1431; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1432; PWR9LE-NEXT:    xsmuldp f1, f0, v9
1433; PWR9LE-NEXT:    blr
1434;
1435; PWR9BE-LABEL: v16f64:
1436; PWR9BE:       # %bb.0: # %entry
1437; PWR9BE-NEXT:    xxswapd vs0, v2
1438; PWR9BE-NEXT:    xxswapd vs1, v3
1439; PWR9BE-NEXT:    xsmuldp f0, v2, f0
1440; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1441; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1442; PWR9BE-NEXT:    xxswapd vs1, v4
1443; PWR9BE-NEXT:    xsmuldp f0, f0, v4
1444; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1445; PWR9BE-NEXT:    xxswapd vs1, v5
1446; PWR9BE-NEXT:    xsmuldp f0, f0, v5
1447; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1448; PWR9BE-NEXT:    xxswapd vs1, v6
1449; PWR9BE-NEXT:    xsmuldp f0, f0, v6
1450; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1451; PWR9BE-NEXT:    xxswapd vs1, v7
1452; PWR9BE-NEXT:    xsmuldp f0, f0, v7
1453; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1454; PWR9BE-NEXT:    xxswapd vs1, v8
1455; PWR9BE-NEXT:    xsmuldp f0, f0, v8
1456; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1457; PWR9BE-NEXT:    xxswapd vs1, v9
1458; PWR9BE-NEXT:    xsmuldp f0, f0, v9
1459; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1460; PWR9BE-NEXT:    blr
1461;
1462; PWR10LE-LABEL: v16f64:
1463; PWR10LE:       # %bb.0: # %entry
1464; PWR10LE-NEXT:    xxswapd vs0, v2
1465; PWR10LE-NEXT:    xxswapd vs1, v3
1466; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1467; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1468; PWR10LE-NEXT:    xxswapd vs1, v4
1469; PWR10LE-NEXT:    xsmuldp f0, f0, v3
1470; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1471; PWR10LE-NEXT:    xxswapd vs1, v5
1472; PWR10LE-NEXT:    xsmuldp f0, f0, v4
1473; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1474; PWR10LE-NEXT:    xxswapd vs1, v6
1475; PWR10LE-NEXT:    xsmuldp f0, f0, v5
1476; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1477; PWR10LE-NEXT:    xxswapd vs1, v7
1478; PWR10LE-NEXT:    xsmuldp f0, f0, v6
1479; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1480; PWR10LE-NEXT:    xxswapd vs1, v8
1481; PWR10LE-NEXT:    xsmuldp f0, f0, v7
1482; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1483; PWR10LE-NEXT:    xxswapd vs1, v9
1484; PWR10LE-NEXT:    xsmuldp f0, f0, v8
1485; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1486; PWR10LE-NEXT:    xsmuldp f1, f0, v9
1487; PWR10LE-NEXT:    blr
1488;
1489; PWR10BE-LABEL: v16f64:
1490; PWR10BE:       # %bb.0: # %entry
1491; PWR10BE-NEXT:    xxswapd vs0, v2
1492; PWR10BE-NEXT:    xxswapd vs1, v3
1493; PWR10BE-NEXT:    xsmuldp f0, v2, f0
1494; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1495; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1496; PWR10BE-NEXT:    xxswapd vs1, v4
1497; PWR10BE-NEXT:    xsmuldp f0, f0, v4
1498; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1499; PWR10BE-NEXT:    xxswapd vs1, v5
1500; PWR10BE-NEXT:    xsmuldp f0, f0, v5
1501; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1502; PWR10BE-NEXT:    xxswapd vs1, v6
1503; PWR10BE-NEXT:    xsmuldp f0, f0, v6
1504; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1505; PWR10BE-NEXT:    xxswapd vs1, v7
1506; PWR10BE-NEXT:    xsmuldp f0, f0, v7
1507; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1508; PWR10BE-NEXT:    xxswapd vs1, v8
1509; PWR10BE-NEXT:    xsmuldp f0, f0, v8
1510; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1511; PWR10BE-NEXT:    xxswapd vs1, v9
1512; PWR10BE-NEXT:    xsmuldp f0, f0, v9
1513; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1514; PWR10BE-NEXT:    blr
1515entry:
1516  %0 = call double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a)
1517  ret double %0
1518}
1519
1520define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 {
1521; PWR9LE-LABEL: v16f64_b:
1522; PWR9LE:       # %bb.0: # %entry
1523; PWR9LE-NEXT:    xxswapd vs0, v2
1524; PWR9LE-NEXT:    xsmuldp f0, f1, f0
1525; PWR9LE-NEXT:    xxswapd vs1, v3
1526; PWR9LE-NEXT:    xsmuldp f0, f0, v2
1527; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1528; PWR9LE-NEXT:    xxswapd vs1, v4
1529; PWR9LE-NEXT:    xsmuldp f0, f0, v3
1530; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1531; PWR9LE-NEXT:    xxswapd vs1, v5
1532; PWR9LE-NEXT:    xsmuldp f0, f0, v4
1533; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1534; PWR9LE-NEXT:    xxswapd vs1, v6
1535; PWR9LE-NEXT:    xsmuldp f0, f0, v5
1536; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1537; PWR9LE-NEXT:    xxswapd vs1, v7
1538; PWR9LE-NEXT:    xsmuldp f0, f0, v6
1539; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1540; PWR9LE-NEXT:    xxswapd vs1, v8
1541; PWR9LE-NEXT:    xsmuldp f0, f0, v7
1542; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1543; PWR9LE-NEXT:    xxswapd vs1, v9
1544; PWR9LE-NEXT:    xsmuldp f0, f0, v8
1545; PWR9LE-NEXT:    xsmuldp f0, f0, f1
1546; PWR9LE-NEXT:    xsmuldp f1, f0, v9
1547; PWR9LE-NEXT:    blr
1548;
1549; PWR9BE-LABEL: v16f64_b:
1550; PWR9BE:       # %bb.0: # %entry
1551; PWR9BE-NEXT:    xsmuldp f0, f1, v2
1552; PWR9BE-NEXT:    xxswapd vs1, v2
1553; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1554; PWR9BE-NEXT:    xxswapd vs1, v3
1555; PWR9BE-NEXT:    xsmuldp f0, f0, v3
1556; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1557; PWR9BE-NEXT:    xxswapd vs1, v4
1558; PWR9BE-NEXT:    xsmuldp f0, f0, v4
1559; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1560; PWR9BE-NEXT:    xxswapd vs1, v5
1561; PWR9BE-NEXT:    xsmuldp f0, f0, v5
1562; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1563; PWR9BE-NEXT:    xxswapd vs1, v6
1564; PWR9BE-NEXT:    xsmuldp f0, f0, v6
1565; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1566; PWR9BE-NEXT:    xxswapd vs1, v7
1567; PWR9BE-NEXT:    xsmuldp f0, f0, v7
1568; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1569; PWR9BE-NEXT:    xxswapd vs1, v8
1570; PWR9BE-NEXT:    xsmuldp f0, f0, v8
1571; PWR9BE-NEXT:    xsmuldp f0, f0, f1
1572; PWR9BE-NEXT:    xxswapd vs1, v9
1573; PWR9BE-NEXT:    xsmuldp f0, f0, v9
1574; PWR9BE-NEXT:    xsmuldp f1, f0, f1
1575; PWR9BE-NEXT:    blr
1576;
1577; PWR10LE-LABEL: v16f64_b:
1578; PWR10LE:       # %bb.0: # %entry
1579; PWR10LE-NEXT:    xxswapd vs0, v2
1580; PWR10LE-NEXT:    xsmuldp f0, f1, f0
1581; PWR10LE-NEXT:    xxswapd vs1, v3
1582; PWR10LE-NEXT:    xsmuldp f0, f0, v2
1583; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1584; PWR10LE-NEXT:    xxswapd vs1, v4
1585; PWR10LE-NEXT:    xsmuldp f0, f0, v3
1586; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1587; PWR10LE-NEXT:    xxswapd vs1, v5
1588; PWR10LE-NEXT:    xsmuldp f0, f0, v4
1589; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1590; PWR10LE-NEXT:    xxswapd vs1, v6
1591; PWR10LE-NEXT:    xsmuldp f0, f0, v5
1592; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1593; PWR10LE-NEXT:    xxswapd vs1, v7
1594; PWR10LE-NEXT:    xsmuldp f0, f0, v6
1595; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1596; PWR10LE-NEXT:    xxswapd vs1, v8
1597; PWR10LE-NEXT:    xsmuldp f0, f0, v7
1598; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1599; PWR10LE-NEXT:    xxswapd vs1, v9
1600; PWR10LE-NEXT:    xsmuldp f0, f0, v8
1601; PWR10LE-NEXT:    xsmuldp f0, f0, f1
1602; PWR10LE-NEXT:    xsmuldp f1, f0, v9
1603; PWR10LE-NEXT:    blr
1604;
1605; PWR10BE-LABEL: v16f64_b:
1606; PWR10BE:       # %bb.0: # %entry
1607; PWR10BE-NEXT:    xsmuldp f0, f1, v2
1608; PWR10BE-NEXT:    xxswapd vs1, v2
1609; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1610; PWR10BE-NEXT:    xxswapd vs1, v3
1611; PWR10BE-NEXT:    xsmuldp f0, f0, v3
1612; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1613; PWR10BE-NEXT:    xxswapd vs1, v4
1614; PWR10BE-NEXT:    xsmuldp f0, f0, v4
1615; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1616; PWR10BE-NEXT:    xxswapd vs1, v5
1617; PWR10BE-NEXT:    xsmuldp f0, f0, v5
1618; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1619; PWR10BE-NEXT:    xxswapd vs1, v6
1620; PWR10BE-NEXT:    xsmuldp f0, f0, v6
1621; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1622; PWR10BE-NEXT:    xxswapd vs1, v7
1623; PWR10BE-NEXT:    xsmuldp f0, f0, v7
1624; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1625; PWR10BE-NEXT:    xxswapd vs1, v8
1626; PWR10BE-NEXT:    xsmuldp f0, f0, v8
1627; PWR10BE-NEXT:    xsmuldp f0, f0, f1
1628; PWR10BE-NEXT:    xxswapd vs1, v9
1629; PWR10BE-NEXT:    xsmuldp f0, f0, v9
1630; PWR10BE-NEXT:    xsmuldp f1, f0, f1
1631; PWR10BE-NEXT:    blr
1632entry:
1633  %0 = call double @llvm.vector.reduce.fmul.v16f64(double %b, <16 x double> %a)
1634  ret double %0
1635}
1636
1637define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
1638; PWR9LE-LABEL: v16f64_fast:
1639; PWR9LE:       # %bb.0: # %entry
1640; PWR9LE-NEXT:    xvmuldp vs0, v4, v8
1641; PWR9LE-NEXT:    xvmuldp vs1, v2, v6
1642; PWR9LE-NEXT:    xvmuldp vs2, v5, v9
1643; PWR9LE-NEXT:    xvmuldp vs3, v3, v7
1644; PWR9LE-NEXT:    xvmuldp vs2, vs3, vs2
1645; PWR9LE-NEXT:    xvmuldp vs0, vs1, vs0
1646; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs2
1647; PWR9LE-NEXT:    xxswapd vs1, vs0
1648; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
1649; PWR9LE-NEXT:    xxswapd vs1, vs0
1650; PWR9LE-NEXT:    blr
1651;
1652; PWR9BE-LABEL: v16f64_fast:
1653; PWR9BE:       # %bb.0: # %entry
1654; PWR9BE-NEXT:    xvmuldp vs0, v4, v8
1655; PWR9BE-NEXT:    xvmuldp vs1, v2, v6
1656; PWR9BE-NEXT:    xvmuldp vs2, v5, v9
1657; PWR9BE-NEXT:    xvmuldp vs3, v3, v7
1658; PWR9BE-NEXT:    xvmuldp vs2, vs3, vs2
1659; PWR9BE-NEXT:    xvmuldp vs0, vs1, vs0
1660; PWR9BE-NEXT:    xvmuldp vs0, vs0, vs2
1661; PWR9BE-NEXT:    xxswapd vs1, vs0
1662; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
1663; PWR9BE-NEXT:    blr
1664;
1665; PWR10LE-LABEL: v16f64_fast:
1666; PWR10LE:       # %bb.0: # %entry
1667; PWR10LE-NEXT:    xvmuldp vs0, v4, v8
1668; PWR10LE-NEXT:    xvmuldp vs1, v2, v6
1669; PWR10LE-NEXT:    xvmuldp vs2, v5, v9
1670; PWR10LE-NEXT:    xvmuldp vs3, v3, v7
1671; PWR10LE-NEXT:    xvmuldp vs2, vs3, vs2
1672; PWR10LE-NEXT:    xvmuldp vs0, vs1, vs0
1673; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs2
1674; PWR10LE-NEXT:    xxswapd vs1, vs0
1675; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
1676; PWR10LE-NEXT:    xxswapd vs1, vs0
1677; PWR10LE-NEXT:    blr
1678;
1679; PWR10BE-LABEL: v16f64_fast:
1680; PWR10BE:       # %bb.0: # %entry
1681; PWR10BE-NEXT:    xvmuldp vs0, v4, v8
1682; PWR10BE-NEXT:    xvmuldp vs1, v2, v6
1683; PWR10BE-NEXT:    xvmuldp vs2, v5, v9
1684; PWR10BE-NEXT:    xvmuldp vs3, v3, v7
1685; PWR10BE-NEXT:    xvmuldp vs2, vs3, vs2
1686; PWR10BE-NEXT:    xvmuldp vs0, vs1, vs0
1687; PWR10BE-NEXT:    xvmuldp vs0, vs0, vs2
1688; PWR10BE-NEXT:    xxswapd vs1, vs0
1689; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
1690; PWR10BE-NEXT:    blr
1691entry:
1692  %0 = call fast double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a)
1693  ret double %0
1694}
1695
1696declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) #0
1697declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) #0
1698declare double @llvm.vector.reduce.fmul.v8f64(double, <8 x double>) #0
1699declare double @llvm.vector.reduce.fmul.v16f64(double, <16 x double>) #0
1700
1701attributes #0 = { nounwind }
1702