xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll (revision 53c37f300dd1b450671f2aee4cc649c380adb5ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \
8; RUN:   FileCheck %s --check-prefix=PWR10LE
9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \
11; RUN:   FileCheck %s --check-prefix=PWR10BE
12
13;;
14;; Vectors of f32
15;;
16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
17; PWR9LE-LABEL: v2f32:
18; PWR9LE:       # %bb.0: # %entry
19; PWR9LE-NEXT:    xxswapd vs0, v2
20; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 3
21; PWR9LE-NEXT:    xscvspdpn f0, vs0
22; PWR9LE-NEXT:    xscvspdpn f1, vs1
23; PWR9LE-NEXT:    xsmindp f1, f1, f0
24; PWR9LE-NEXT:    blr
25;
26; PWR9BE-LABEL: v2f32:
27; PWR9BE:       # %bb.0: # %entry
28; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
29; PWR9BE-NEXT:    xscvspdpn f0, v2
30; PWR9BE-NEXT:    xscvspdpn f1, vs1
31; PWR9BE-NEXT:    xsmindp f1, f0, f1
32; PWR9BE-NEXT:    blr
33;
34; PWR10LE-LABEL: v2f32:
35; PWR10LE:       # %bb.0: # %entry
36; PWR10LE-NEXT:    xxswapd vs0, v2
37; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 3
38; PWR10LE-NEXT:    xscvspdpn f0, vs0
39; PWR10LE-NEXT:    xscvspdpn f1, vs1
40; PWR10LE-NEXT:    xsmindp f1, f1, f0
41; PWR10LE-NEXT:    blr
42;
43; PWR10BE-LABEL: v2f32:
44; PWR10BE:       # %bb.0: # %entry
45; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
46; PWR10BE-NEXT:    xscvspdpn f0, v2
47; PWR10BE-NEXT:    xscvspdpn f1, vs1
48; PWR10BE-NEXT:    xsmindp f1, f0, f1
49; PWR10BE-NEXT:    blr
50entry:
51  %0 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
52  ret float %0
53}
54
55define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
56; PWR9LE-LABEL: v2f32_fast:
57; PWR9LE:       # %bb.0: # %entry
58; PWR9LE-NEXT:    xxspltw vs0, v2, 2
59; PWR9LE-NEXT:    xvminsp vs0, v2, vs0
60; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
61; PWR9LE-NEXT:    xscvspdpn f1, vs0
62; PWR9LE-NEXT:    blr
63;
64; PWR9BE-LABEL: v2f32_fast:
65; PWR9BE:       # %bb.0: # %entry
66; PWR9BE-NEXT:    xxspltw vs0, v2, 1
67; PWR9BE-NEXT:    xvminsp vs0, v2, vs0
68; PWR9BE-NEXT:    xscvspdpn f1, vs0
69; PWR9BE-NEXT:    blr
70;
71; PWR10LE-LABEL: v2f32_fast:
72; PWR10LE:       # %bb.0: # %entry
73; PWR10LE-NEXT:    xxspltw vs0, v2, 2
74; PWR10LE-NEXT:    xvminsp vs0, v2, vs0
75; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
76; PWR10LE-NEXT:    xscvspdpn f1, vs0
77; PWR10LE-NEXT:    blr
78;
79; PWR10BE-LABEL: v2f32_fast:
80; PWR10BE:       # %bb.0: # %entry
81; PWR10BE-NEXT:    xxspltw vs0, v2, 1
82; PWR10BE-NEXT:    xvminsp vs0, v2, vs0
83; PWR10BE-NEXT:    xscvspdpn f1, vs0
84; PWR10BE-NEXT:    blr
85entry:
86  %0 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
87  ret float %0
88}
89
90define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
91; PWR9LE-LABEL: v4f32:
92; PWR9LE:       # %bb.0: # %entry
93; PWR9LE-NEXT:    xxsldwi vs2, v2, v2, 3
94; PWR9LE-NEXT:    xxswapd vs3, v2
95; PWR9LE-NEXT:    xscvspdpn f0, v2
96; PWR9LE-NEXT:    xscvspdpn f2, vs2
97; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
98; PWR9LE-NEXT:    xscvspdpn f3, vs3
99; PWR9LE-NEXT:    xscvspdpn f1, vs1
100; PWR9LE-NEXT:    xsmindp f2, f2, f3
101; PWR9LE-NEXT:    xsmindp f1, f2, f1
102; PWR9LE-NEXT:    xsmindp f1, f1, f0
103; PWR9LE-NEXT:    blr
104;
105; PWR9BE-LABEL: v4f32:
106; PWR9BE:       # %bb.0: # %entry
107; PWR9BE-NEXT:    xxsldwi vs2, v2, v2, 1
108; PWR9BE-NEXT:    xxswapd vs1, v2
109; PWR9BE-NEXT:    xscvspdpn f3, v2
110; PWR9BE-NEXT:    xscvspdpn f2, vs2
111; PWR9BE-NEXT:    xxsldwi vs0, v2, v2, 3
112; PWR9BE-NEXT:    xscvspdpn f1, vs1
113; PWR9BE-NEXT:    xscvspdpn f0, vs0
114; PWR9BE-NEXT:    xsmindp f2, f3, f2
115; PWR9BE-NEXT:    xsmindp f1, f2, f1
116; PWR9BE-NEXT:    xsmindp f1, f1, f0
117; PWR9BE-NEXT:    blr
118;
119; PWR10LE-LABEL: v4f32:
120; PWR10LE:       # %bb.0: # %entry
121; PWR10LE-NEXT:    xxsldwi vs2, v2, v2, 3
122; PWR10LE-NEXT:    xxswapd vs3, v2
123; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
124; PWR10LE-NEXT:    xscvspdpn f0, v2
125; PWR10LE-NEXT:    xscvspdpn f2, vs2
126; PWR10LE-NEXT:    xscvspdpn f3, vs3
127; PWR10LE-NEXT:    xscvspdpn f1, vs1
128; PWR10LE-NEXT:    xsmindp f2, f2, f3
129; PWR10LE-NEXT:    xsmindp f1, f2, f1
130; PWR10LE-NEXT:    xsmindp f1, f1, f0
131; PWR10LE-NEXT:    blr
132;
133; PWR10BE-LABEL: v4f32:
134; PWR10BE:       # %bb.0: # %entry
135; PWR10BE-NEXT:    xxsldwi vs2, v2, v2, 1
136; PWR10BE-NEXT:    xxswapd vs1, v2
137; PWR10BE-NEXT:    xscvspdpn f3, v2
138; PWR10BE-NEXT:    xxsldwi vs0, v2, v2, 3
139; PWR10BE-NEXT:    xscvspdpn f2, vs2
140; PWR10BE-NEXT:    xscvspdpn f1, vs1
141; PWR10BE-NEXT:    xscvspdpn f0, vs0
142; PWR10BE-NEXT:    xsmindp f2, f3, f2
143; PWR10BE-NEXT:    xsmindp f1, f2, f1
144; PWR10BE-NEXT:    xsmindp f1, f1, f0
145; PWR10BE-NEXT:    blr
146entry:
147  %0 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
148  ret float %0
149}
150
151define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
152; PWR9LE-LABEL: v4f32_fast:
153; PWR9LE:       # %bb.0: # %entry
154; PWR9LE-NEXT:    xxswapd v3, v2
155; PWR9LE-NEXT:    xvminsp vs0, v2, v3
156; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
157; PWR9LE-NEXT:    xvminsp vs0, vs0, vs1
158; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
159; PWR9LE-NEXT:    xscvspdpn f1, vs0
160; PWR9LE-NEXT:    blr
161;
162; PWR9BE-LABEL: v4f32_fast:
163; PWR9BE:       # %bb.0: # %entry
164; PWR9BE-NEXT:    xxswapd v3, v2
165; PWR9BE-NEXT:    xvminsp vs0, v2, v3
166; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
167; PWR9BE-NEXT:    xvminsp vs0, vs0, vs1
168; PWR9BE-NEXT:    xscvspdpn f1, vs0
169; PWR9BE-NEXT:    blr
170;
171; PWR10LE-LABEL: v4f32_fast:
172; PWR10LE:       # %bb.0: # %entry
173; PWR10LE-NEXT:    xxswapd v3, v2
174; PWR10LE-NEXT:    xvminsp vs0, v2, v3
175; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
176; PWR10LE-NEXT:    xvminsp vs0, vs0, vs1
177; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
178; PWR10LE-NEXT:    xscvspdpn f1, vs0
179; PWR10LE-NEXT:    blr
180;
181; PWR10BE-LABEL: v4f32_fast:
182; PWR10BE:       # %bb.0: # %entry
183; PWR10BE-NEXT:    xxswapd v3, v2
184; PWR10BE-NEXT:    xvminsp vs0, v2, v3
185; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
186; PWR10BE-NEXT:    xvminsp vs0, vs0, vs1
187; PWR10BE-NEXT:    xscvspdpn f1, vs0
188; PWR10BE-NEXT:    blr
189entry:
190  %0 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
191  ret float %0
192}
193
194define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
195; PWR9LE-LABEL: v8f32:
196; PWR9LE:       # %bb.0: # %entry
197; PWR9LE-NEXT:    xvminsp vs0, v2, v3
198; PWR9LE-NEXT:    xxswapd vs1, vs0
199; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
200; PWR9LE-NEXT:    xscvspdpn f1, vs1
201; PWR9LE-NEXT:    xscvspdpn f2, vs2
202; PWR9LE-NEXT:    xsmindp f1, f2, f1
203; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
204; PWR9LE-NEXT:    xscvspdpn f0, vs0
205; PWR9LE-NEXT:    xscvspdpn f2, vs2
206; PWR9LE-NEXT:    xsmindp f1, f1, f2
207; PWR9LE-NEXT:    xsmindp f1, f1, f0
208; PWR9LE-NEXT:    blr
209;
210; PWR9BE-LABEL: v8f32:
211; PWR9BE:       # %bb.0: # %entry
212; PWR9BE-NEXT:    xvminsp vs0, v2, v3
213; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
214; PWR9BE-NEXT:    xscvspdpn f1, vs0
215; PWR9BE-NEXT:    xscvspdpn f2, vs2
216; PWR9BE-NEXT:    xsmindp f1, f1, f2
217; PWR9BE-NEXT:    xxswapd vs2, vs0
218; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
219; PWR9BE-NEXT:    xscvspdpn f2, vs2
220; PWR9BE-NEXT:    xscvspdpn f0, vs0
221; PWR9BE-NEXT:    xsmindp f1, f1, f2
222; PWR9BE-NEXT:    xsmindp f1, f1, f0
223; PWR9BE-NEXT:    blr
224;
225; PWR10LE-LABEL: v8f32:
226; PWR10LE:       # %bb.0: # %entry
227; PWR10LE-NEXT:    xvminsp vs0, v2, v3
228; PWR10LE-NEXT:    xxswapd vs1, vs0
229; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
230; PWR10LE-NEXT:    xscvspdpn f1, vs1
231; PWR10LE-NEXT:    xscvspdpn f2, vs2
232; PWR10LE-NEXT:    xsmindp f1, f2, f1
233; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
234; PWR10LE-NEXT:    xscvspdpn f0, vs0
235; PWR10LE-NEXT:    xscvspdpn f2, vs2
236; PWR10LE-NEXT:    xsmindp f1, f1, f2
237; PWR10LE-NEXT:    xsmindp f1, f1, f0
238; PWR10LE-NEXT:    blr
239;
240; PWR10BE-LABEL: v8f32:
241; PWR10BE:       # %bb.0: # %entry
242; PWR10BE-NEXT:    xvminsp vs0, v2, v3
243; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
244; PWR10BE-NEXT:    xscvspdpn f1, vs0
245; PWR10BE-NEXT:    xscvspdpn f2, vs2
246; PWR10BE-NEXT:    xsmindp f1, f1, f2
247; PWR10BE-NEXT:    xxswapd vs2, vs0
248; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
249; PWR10BE-NEXT:    xscvspdpn f2, vs2
250; PWR10BE-NEXT:    xscvspdpn f0, vs0
251; PWR10BE-NEXT:    xsmindp f1, f1, f2
252; PWR10BE-NEXT:    xsmindp f1, f1, f0
253; PWR10BE-NEXT:    blr
254entry:
255  %0 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
256  ret float %0
257}
258
259define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
260; PWR9LE-LABEL: v8f32_fast:
261; PWR9LE:       # %bb.0: # %entry
262; PWR9LE-NEXT:    xvminsp vs0, v2, v3
263; PWR9LE-NEXT:    xxswapd v2, vs0
264; PWR9LE-NEXT:    xvminsp vs0, vs0, v2
265; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
266; PWR9LE-NEXT:    xvminsp vs0, vs0, vs1
267; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
268; PWR9LE-NEXT:    xscvspdpn f1, vs0
269; PWR9LE-NEXT:    blr
270;
271; PWR9BE-LABEL: v8f32_fast:
272; PWR9BE:       # %bb.0: # %entry
273; PWR9BE-NEXT:    xvminsp vs0, v2, v3
274; PWR9BE-NEXT:    xxswapd v2, vs0
275; PWR9BE-NEXT:    xvminsp vs0, vs0, v2
276; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
277; PWR9BE-NEXT:    xvminsp vs0, vs0, vs1
278; PWR9BE-NEXT:    xscvspdpn f1, vs0
279; PWR9BE-NEXT:    blr
280;
281; PWR10LE-LABEL: v8f32_fast:
282; PWR10LE:       # %bb.0: # %entry
283; PWR10LE-NEXT:    xvminsp vs0, v2, v3
284; PWR10LE-NEXT:    xxswapd v2, vs0
285; PWR10LE-NEXT:    xvminsp vs0, vs0, v2
286; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
287; PWR10LE-NEXT:    xvminsp vs0, vs0, vs1
288; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
289; PWR10LE-NEXT:    xscvspdpn f1, vs0
290; PWR10LE-NEXT:    blr
291;
292; PWR10BE-LABEL: v8f32_fast:
293; PWR10BE:       # %bb.0: # %entry
294; PWR10BE-NEXT:    xvminsp vs0, v2, v3
295; PWR10BE-NEXT:    xxswapd v2, vs0
296; PWR10BE-NEXT:    xvminsp vs0, vs0, v2
297; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
298; PWR10BE-NEXT:    xvminsp vs0, vs0, vs1
299; PWR10BE-NEXT:    xscvspdpn f1, vs0
300; PWR10BE-NEXT:    blr
301entry:
302  %0 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
303  ret float %0
304}
305
306define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
307; PWR9LE-LABEL: v16f32:
308; PWR9LE:       # %bb.0: # %entry
309; PWR9LE-NEXT:    xvminsp vs0, v3, v5
310; PWR9LE-NEXT:    xvminsp vs1, v2, v4
311; PWR9LE-NEXT:    xvminsp vs0, vs1, vs0
312; PWR9LE-NEXT:    xxswapd vs1, vs0
313; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
314; PWR9LE-NEXT:    xscvspdpn f1, vs1
315; PWR9LE-NEXT:    xscvspdpn f2, vs2
316; PWR9LE-NEXT:    xsmindp f1, f2, f1
317; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
318; PWR9LE-NEXT:    xscvspdpn f0, vs0
319; PWR9LE-NEXT:    xscvspdpn f2, vs2
320; PWR9LE-NEXT:    xsmindp f1, f1, f2
321; PWR9LE-NEXT:    xsmindp f1, f1, f0
322; PWR9LE-NEXT:    blr
323;
324; PWR9BE-LABEL: v16f32:
325; PWR9BE:       # %bb.0: # %entry
326; PWR9BE-NEXT:    xvminsp vs0, v3, v5
327; PWR9BE-NEXT:    xvminsp vs1, v2, v4
328; PWR9BE-NEXT:    xvminsp vs0, vs1, vs0
329; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
330; PWR9BE-NEXT:    xscvspdpn f1, vs0
331; PWR9BE-NEXT:    xscvspdpn f2, vs2
332; PWR9BE-NEXT:    xsmindp f1, f1, f2
333; PWR9BE-NEXT:    xxswapd vs2, vs0
334; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
335; PWR9BE-NEXT:    xscvspdpn f2, vs2
336; PWR9BE-NEXT:    xscvspdpn f0, vs0
337; PWR9BE-NEXT:    xsmindp f1, f1, f2
338; PWR9BE-NEXT:    xsmindp f1, f1, f0
339; PWR9BE-NEXT:    blr
340;
341; PWR10LE-LABEL: v16f32:
342; PWR10LE:       # %bb.0: # %entry
343; PWR10LE-NEXT:    xvminsp vs0, v3, v5
344; PWR10LE-NEXT:    xvminsp vs1, v2, v4
345; PWR10LE-NEXT:    xvminsp vs0, vs1, vs0
346; PWR10LE-NEXT:    xxswapd vs1, vs0
347; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
348; PWR10LE-NEXT:    xscvspdpn f1, vs1
349; PWR10LE-NEXT:    xscvspdpn f2, vs2
350; PWR10LE-NEXT:    xsmindp f1, f2, f1
351; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
352; PWR10LE-NEXT:    xscvspdpn f0, vs0
353; PWR10LE-NEXT:    xscvspdpn f2, vs2
354; PWR10LE-NEXT:    xsmindp f1, f1, f2
355; PWR10LE-NEXT:    xsmindp f1, f1, f0
356; PWR10LE-NEXT:    blr
357;
358; PWR10BE-LABEL: v16f32:
359; PWR10BE:       # %bb.0: # %entry
360; PWR10BE-NEXT:    xvminsp vs0, v3, v5
361; PWR10BE-NEXT:    xvminsp vs1, v2, v4
362; PWR10BE-NEXT:    xvminsp vs0, vs1, vs0
363; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
364; PWR10BE-NEXT:    xscvspdpn f1, vs0
365; PWR10BE-NEXT:    xscvspdpn f2, vs2
366; PWR10BE-NEXT:    xsmindp f1, f1, f2
367; PWR10BE-NEXT:    xxswapd vs2, vs0
368; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
369; PWR10BE-NEXT:    xscvspdpn f2, vs2
370; PWR10BE-NEXT:    xscvspdpn f0, vs0
371; PWR10BE-NEXT:    xsmindp f1, f1, f2
372; PWR10BE-NEXT:    xsmindp f1, f1, f0
373; PWR10BE-NEXT:    blr
374entry:
375  %0 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
376  ret float %0
377}
378
379define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
380; PWR9LE-LABEL: v16f32_fast:
381; PWR9LE:       # %bb.0: # %entry
382; PWR9LE-NEXT:    xvminsp vs0, v3, v5
383; PWR9LE-NEXT:    xvminsp vs1, v2, v4
384; PWR9LE-NEXT:    xvminsp vs0, vs1, vs0
385; PWR9LE-NEXT:    xxswapd v2, vs0
386; PWR9LE-NEXT:    xvminsp vs0, vs0, v2
387; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
388; PWR9LE-NEXT:    xvminsp vs0, vs0, vs1
389; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
390; PWR9LE-NEXT:    xscvspdpn f1, vs0
391; PWR9LE-NEXT:    blr
392;
393; PWR9BE-LABEL: v16f32_fast:
394; PWR9BE:       # %bb.0: # %entry
395; PWR9BE-NEXT:    xvminsp vs0, v3, v5
396; PWR9BE-NEXT:    xvminsp vs1, v2, v4
397; PWR9BE-NEXT:    xvminsp vs0, vs1, vs0
398; PWR9BE-NEXT:    xxswapd v2, vs0
399; PWR9BE-NEXT:    xvminsp vs0, vs0, v2
400; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
401; PWR9BE-NEXT:    xvminsp vs0, vs0, vs1
402; PWR9BE-NEXT:    xscvspdpn f1, vs0
403; PWR9BE-NEXT:    blr
404;
405; PWR10LE-LABEL: v16f32_fast:
406; PWR10LE:       # %bb.0: # %entry
407; PWR10LE-NEXT:    xvminsp vs0, v3, v5
408; PWR10LE-NEXT:    xvminsp vs1, v2, v4
409; PWR10LE-NEXT:    xvminsp vs0, vs1, vs0
410; PWR10LE-NEXT:    xxswapd v2, vs0
411; PWR10LE-NEXT:    xvminsp vs0, vs0, v2
412; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
413; PWR10LE-NEXT:    xvminsp vs0, vs0, vs1
414; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
415; PWR10LE-NEXT:    xscvspdpn f1, vs0
416; PWR10LE-NEXT:    blr
417;
418; PWR10BE-LABEL: v16f32_fast:
419; PWR10BE:       # %bb.0: # %entry
420; PWR10BE-NEXT:    xvminsp vs0, v3, v5
421; PWR10BE-NEXT:    xvminsp vs1, v2, v4
422; PWR10BE-NEXT:    xvminsp vs0, vs1, vs0
423; PWR10BE-NEXT:    xxswapd v2, vs0
424; PWR10BE-NEXT:    xvminsp vs0, vs0, v2
425; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
426; PWR10BE-NEXT:    xvminsp vs0, vs0, vs1
427; PWR10BE-NEXT:    xscvspdpn f1, vs0
428; PWR10BE-NEXT:    blr
429entry:
430  %0 = call fast float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
431  ret float %0
432}
433
434define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 {
435; PWR9LE-LABEL: v32f32:
436; PWR9LE:       # %bb.0: # %entry
437; PWR9LE-NEXT:    xvminsp vs0, v5, v9
438; PWR9LE-NEXT:    xvminsp vs1, v3, v7
439; PWR9LE-NEXT:    xvminsp vs2, v2, v6
440; PWR9LE-NEXT:    xvminsp vs0, vs1, vs0
441; PWR9LE-NEXT:    xvminsp vs1, v4, v8
442; PWR9LE-NEXT:    xvminsp vs1, vs2, vs1
443; PWR9LE-NEXT:    xvminsp vs0, vs1, vs0
444; PWR9LE-NEXT:    xxswapd vs1, vs0
445; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
446; PWR9LE-NEXT:    xscvspdpn f1, vs1
447; PWR9LE-NEXT:    xscvspdpn f2, vs2
448; PWR9LE-NEXT:    xsmindp f1, f2, f1
449; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
450; PWR9LE-NEXT:    xscvspdpn f0, vs0
451; PWR9LE-NEXT:    xscvspdpn f2, vs2
452; PWR9LE-NEXT:    xsmindp f1, f1, f2
453; PWR9LE-NEXT:    xsmindp f1, f1, f0
454; PWR9LE-NEXT:    blr
455;
456; PWR9BE-LABEL: v32f32:
457; PWR9BE:       # %bb.0: # %entry
458; PWR9BE-NEXT:    xvminsp vs0, v5, v9
459; PWR9BE-NEXT:    xvminsp vs1, v3, v7
460; PWR9BE-NEXT:    xvminsp vs2, v2, v6
461; PWR9BE-NEXT:    xvminsp vs0, vs1, vs0
462; PWR9BE-NEXT:    xvminsp vs1, v4, v8
463; PWR9BE-NEXT:    xvminsp vs1, vs2, vs1
464; PWR9BE-NEXT:    xvminsp vs0, vs1, vs0
465; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
466; PWR9BE-NEXT:    xscvspdpn f1, vs0
467; PWR9BE-NEXT:    xscvspdpn f2, vs2
468; PWR9BE-NEXT:    xsmindp f1, f1, f2
469; PWR9BE-NEXT:    xxswapd vs2, vs0
470; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
471; PWR9BE-NEXT:    xscvspdpn f2, vs2
472; PWR9BE-NEXT:    xscvspdpn f0, vs0
473; PWR9BE-NEXT:    xsmindp f1, f1, f2
474; PWR9BE-NEXT:    xsmindp f1, f1, f0
475; PWR9BE-NEXT:    blr
476;
477; PWR10LE-LABEL: v32f32:
478; PWR10LE:       # %bb.0: # %entry
479; PWR10LE-NEXT:    xvminsp vs0, v5, v9
480; PWR10LE-NEXT:    xvminsp vs1, v3, v7
481; PWR10LE-NEXT:    xvminsp vs2, v2, v6
482; PWR10LE-NEXT:    xvminsp vs0, vs1, vs0
483; PWR10LE-NEXT:    xvminsp vs1, v4, v8
484; PWR10LE-NEXT:    xvminsp vs1, vs2, vs1
485; PWR10LE-NEXT:    xvminsp vs0, vs1, vs0
486; PWR10LE-NEXT:    xxswapd vs1, vs0
487; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
488; PWR10LE-NEXT:    xscvspdpn f1, vs1
489; PWR10LE-NEXT:    xscvspdpn f2, vs2
490; PWR10LE-NEXT:    xsmindp f1, f2, f1
491; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
492; PWR10LE-NEXT:    xscvspdpn f0, vs0
493; PWR10LE-NEXT:    xscvspdpn f2, vs2
494; PWR10LE-NEXT:    xsmindp f1, f1, f2
495; PWR10LE-NEXT:    xsmindp f1, f1, f0
496; PWR10LE-NEXT:    blr
497;
498; PWR10BE-LABEL: v32f32:
499; PWR10BE:       # %bb.0: # %entry
500; PWR10BE-NEXT:    xvminsp vs0, v5, v9
501; PWR10BE-NEXT:    xvminsp vs1, v3, v7
502; PWR10BE-NEXT:    xvminsp vs2, v2, v6
503; PWR10BE-NEXT:    xvminsp vs0, vs1, vs0
504; PWR10BE-NEXT:    xvminsp vs1, v4, v8
505; PWR10BE-NEXT:    xvminsp vs1, vs2, vs1
506; PWR10BE-NEXT:    xvminsp vs0, vs1, vs0
507; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
508; PWR10BE-NEXT:    xscvspdpn f1, vs0
509; PWR10BE-NEXT:    xscvspdpn f2, vs2
510; PWR10BE-NEXT:    xsmindp f1, f1, f2
511; PWR10BE-NEXT:    xxswapd vs2, vs0
512; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
513; PWR10BE-NEXT:    xscvspdpn f2, vs2
514; PWR10BE-NEXT:    xscvspdpn f0, vs0
515; PWR10BE-NEXT:    xsmindp f1, f1, f2
516; PWR10BE-NEXT:    xsmindp f1, f1, f0
517; PWR10BE-NEXT:    blr
518entry:
519  %0 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
520  ret float %0
521}
522
523define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
524; PWR9LE-LABEL: v32f32_fast:
525; PWR9LE:       # %bb.0: # %entry
526; PWR9LE-NEXT:    xvminsp vs0, v4, v8
527; PWR9LE-NEXT:    xvminsp vs1, v2, v6
528; PWR9LE-NEXT:    xvminsp vs2, v5, v9
529; PWR9LE-NEXT:    xvminsp vs3, v3, v7
530; PWR9LE-NEXT:    xvminsp vs2, vs3, vs2
531; PWR9LE-NEXT:    xvminsp vs0, vs1, vs0
532; PWR9LE-NEXT:    xvminsp vs0, vs0, vs2
533; PWR9LE-NEXT:    xxswapd v2, vs0
534; PWR9LE-NEXT:    xvminsp vs0, vs0, v2
535; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
536; PWR9LE-NEXT:    xvminsp vs0, vs0, vs1
537; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
538; PWR9LE-NEXT:    xscvspdpn f1, vs0
539; PWR9LE-NEXT:    blr
540;
541; PWR9BE-LABEL: v32f32_fast:
542; PWR9BE:       # %bb.0: # %entry
543; PWR9BE-NEXT:    xvminsp vs0, v4, v8
544; PWR9BE-NEXT:    xvminsp vs1, v2, v6
545; PWR9BE-NEXT:    xvminsp vs2, v5, v9
546; PWR9BE-NEXT:    xvminsp vs3, v3, v7
547; PWR9BE-NEXT:    xvminsp vs2, vs3, vs2
548; PWR9BE-NEXT:    xvminsp vs0, vs1, vs0
549; PWR9BE-NEXT:    xvminsp vs0, vs0, vs2
550; PWR9BE-NEXT:    xxswapd v2, vs0
551; PWR9BE-NEXT:    xvminsp vs0, vs0, v2
552; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
553; PWR9BE-NEXT:    xvminsp vs0, vs0, vs1
554; PWR9BE-NEXT:    xscvspdpn f1, vs0
555; PWR9BE-NEXT:    blr
556;
557; PWR10LE-LABEL: v32f32_fast:
558; PWR10LE:       # %bb.0: # %entry
559; PWR10LE-NEXT:    xvminsp vs0, v4, v8
560; PWR10LE-NEXT:    xvminsp vs1, v2, v6
561; PWR10LE-NEXT:    xvminsp vs2, v5, v9
562; PWR10LE-NEXT:    xvminsp vs3, v3, v7
563; PWR10LE-NEXT:    xvminsp vs2, vs3, vs2
564; PWR10LE-NEXT:    xvminsp vs0, vs1, vs0
565; PWR10LE-NEXT:    xvminsp vs0, vs0, vs2
566; PWR10LE-NEXT:    xxswapd v2, vs0
567; PWR10LE-NEXT:    xvminsp vs0, vs0, v2
568; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
569; PWR10LE-NEXT:    xvminsp vs0, vs0, vs1
570; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
571; PWR10LE-NEXT:    xscvspdpn f1, vs0
572; PWR10LE-NEXT:    blr
573;
574; PWR10BE-LABEL: v32f32_fast:
575; PWR10BE:       # %bb.0: # %entry
576; PWR10BE-NEXT:    xvminsp vs0, v4, v8
577; PWR10BE-NEXT:    xvminsp vs1, v2, v6
578; PWR10BE-NEXT:    xvminsp vs2, v5, v9
579; PWR10BE-NEXT:    xvminsp vs3, v3, v7
580; PWR10BE-NEXT:    xvminsp vs2, vs3, vs2
581; PWR10BE-NEXT:    xvminsp vs0, vs1, vs0
582; PWR10BE-NEXT:    xvminsp vs0, vs0, vs2
583; PWR10BE-NEXT:    xxswapd v2, vs0
584; PWR10BE-NEXT:    xvminsp vs0, vs0, v2
585; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
586; PWR10BE-NEXT:    xvminsp vs0, vs0, vs1
587; PWR10BE-NEXT:    xscvspdpn f1, vs0
588; PWR10BE-NEXT:    blr
589entry:
590  %0 = call fast float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
591  ret float %0
592}
593
594declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) #0
595declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) #0
596declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) #0
597declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) #0
598declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) #0
599
600;;
601;; Vectors of f64
602;;
603define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
604; PWR9LE-LABEL: v2f64:
605; PWR9LE:       # %bb.0: # %entry
606; PWR9LE-NEXT:    xxswapd vs0, v2
607; PWR9LE-NEXT:    xsmindp f1, f0, v2
608; PWR9LE-NEXT:    blr
609;
610; PWR9BE-LABEL: v2f64:
611; PWR9BE:       # %bb.0: # %entry
612; PWR9BE-NEXT:    xxswapd vs0, v2
613; PWR9BE-NEXT:    xsmindp f1, v2, f0
614; PWR9BE-NEXT:    blr
615;
616; PWR10LE-LABEL: v2f64:
617; PWR10LE:       # %bb.0: # %entry
618; PWR10LE-NEXT:    xxswapd vs0, v2
619; PWR10LE-NEXT:    xsmindp f1, f0, v2
620; PWR10LE-NEXT:    blr
621;
622; PWR10BE-LABEL: v2f64:
623; PWR10BE:       # %bb.0: # %entry
624; PWR10BE-NEXT:    xxswapd vs0, v2
625; PWR10BE-NEXT:    xsmindp f1, v2, f0
626; PWR10BE-NEXT:    blr
627entry:
628  %0 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
629  ret double %0
630}
631
632define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
633; PWR9LE-LABEL: v2f64_fast:
634; PWR9LE:       # %bb.0: # %entry
635; PWR9LE-NEXT:    xxswapd vs0, v2
636; PWR9LE-NEXT:    xvmindp vs0, v2, vs0
637; PWR9LE-NEXT:    xxswapd vs1, vs0
638; PWR9LE-NEXT:    blr
639;
640; PWR9BE-LABEL: v2f64_fast:
641; PWR9BE:       # %bb.0: # %entry
642; PWR9BE-NEXT:    xxswapd vs0, v2
643; PWR9BE-NEXT:    xvmindp vs1, v2, vs0
644; PWR9BE-NEXT:    blr
645;
646; PWR10LE-LABEL: v2f64_fast:
647; PWR10LE:       # %bb.0: # %entry
648; PWR10LE-NEXT:    xxswapd vs0, v2
649; PWR10LE-NEXT:    xvmindp vs0, v2, vs0
650; PWR10LE-NEXT:    xxswapd vs1, vs0
651; PWR10LE-NEXT:    blr
652;
653; PWR10BE-LABEL: v2f64_fast:
654; PWR10BE:       # %bb.0: # %entry
655; PWR10BE-NEXT:    xxswapd vs0, v2
656; PWR10BE-NEXT:    xvmindp vs1, v2, vs0
657; PWR10BE-NEXT:    blr
658entry:
659  %0 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
660  ret double %0
661}
662
663define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
664; PWR9LE-LABEL: v4f64:
665; PWR9LE:       # %bb.0: # %entry
666; PWR9LE-NEXT:    xvmindp vs0, v2, v3
667; PWR9LE-NEXT:    xxswapd vs1, vs0
668; PWR9LE-NEXT:    xsmindp f1, f1, f0
669; PWR9LE-NEXT:    blr
670;
671; PWR9BE-LABEL: v4f64:
672; PWR9BE:       # %bb.0: # %entry
673; PWR9BE-NEXT:    xvmindp vs0, v2, v3
674; PWR9BE-NEXT:    xxswapd vs1, vs0
675; PWR9BE-NEXT:    xsmindp f1, f0, f1
676; PWR9BE-NEXT:    blr
677;
678; PWR10LE-LABEL: v4f64:
679; PWR10LE:       # %bb.0: # %entry
680; PWR10LE-NEXT:    xvmindp vs0, v2, v3
681; PWR10LE-NEXT:    xxswapd vs1, vs0
682; PWR10LE-NEXT:    xsmindp f1, f1, f0
683; PWR10LE-NEXT:    blr
684;
685; PWR10BE-LABEL: v4f64:
686; PWR10BE:       # %bb.0: # %entry
687; PWR10BE-NEXT:    xvmindp vs0, v2, v3
688; PWR10BE-NEXT:    xxswapd vs1, vs0
689; PWR10BE-NEXT:    xsmindp f1, f0, f1
690; PWR10BE-NEXT:    blr
691entry:
692  %0 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
693  ret double %0
694}
695
696define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
697; PWR9LE-LABEL: v4f64_fast:
698; PWR9LE:       # %bb.0: # %entry
699; PWR9LE-NEXT:    xvmindp vs0, v2, v3
700; PWR9LE-NEXT:    xxswapd vs1, vs0
701; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
702; PWR9LE-NEXT:    xxswapd vs1, vs0
703; PWR9LE-NEXT:    blr
704;
705; PWR9BE-LABEL: v4f64_fast:
706; PWR9BE:       # %bb.0: # %entry
707; PWR9BE-NEXT:    xvmindp vs0, v2, v3
708; PWR9BE-NEXT:    xxswapd vs1, vs0
709; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
710; PWR9BE-NEXT:    blr
711;
712; PWR10LE-LABEL: v4f64_fast:
713; PWR10LE:       # %bb.0: # %entry
714; PWR10LE-NEXT:    xvmindp vs0, v2, v3
715; PWR10LE-NEXT:    xxswapd vs1, vs0
716; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
717; PWR10LE-NEXT:    xxswapd vs1, vs0
718; PWR10LE-NEXT:    blr
719;
720; PWR10BE-LABEL: v4f64_fast:
721; PWR10BE:       # %bb.0: # %entry
722; PWR10BE-NEXT:    xvmindp vs0, v2, v3
723; PWR10BE-NEXT:    xxswapd vs1, vs0
724; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
725; PWR10BE-NEXT:    blr
726entry:
727  %0 = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
728  ret double %0
729}
730
731define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
732; PWR9LE-LABEL: v8f64:
733; PWR9LE:       # %bb.0: # %entry
734; PWR9LE-NEXT:    xvmindp vs0, v3, v5
735; PWR9LE-NEXT:    xvmindp vs1, v2, v4
736; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
737; PWR9LE-NEXT:    xxswapd vs1, vs0
738; PWR9LE-NEXT:    xsmindp f1, f1, f0
739; PWR9LE-NEXT:    blr
740;
741; PWR9BE-LABEL: v8f64:
742; PWR9BE:       # %bb.0: # %entry
743; PWR9BE-NEXT:    xvmindp vs0, v3, v5
744; PWR9BE-NEXT:    xvmindp vs1, v2, v4
745; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
746; PWR9BE-NEXT:    xxswapd vs1, vs0
747; PWR9BE-NEXT:    xsmindp f1, f0, f1
748; PWR9BE-NEXT:    blr
749;
750; PWR10LE-LABEL: v8f64:
751; PWR10LE:       # %bb.0: # %entry
752; PWR10LE-NEXT:    xvmindp vs0, v3, v5
753; PWR10LE-NEXT:    xvmindp vs1, v2, v4
754; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
755; PWR10LE-NEXT:    xxswapd vs1, vs0
756; PWR10LE-NEXT:    xsmindp f1, f1, f0
757; PWR10LE-NEXT:    blr
758;
759; PWR10BE-LABEL: v8f64:
760; PWR10BE:       # %bb.0: # %entry
761; PWR10BE-NEXT:    xvmindp vs0, v3, v5
762; PWR10BE-NEXT:    xvmindp vs1, v2, v4
763; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
764; PWR10BE-NEXT:    xxswapd vs1, vs0
765; PWR10BE-NEXT:    xsmindp f1, f0, f1
766; PWR10BE-NEXT:    blr
767entry:
768  %0 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
769  ret double %0
770}
771
772define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
773; PWR9LE-LABEL: v8f64_fast:
774; PWR9LE:       # %bb.0: # %entry
775; PWR9LE-NEXT:    xvmindp vs0, v3, v5
776; PWR9LE-NEXT:    xvmindp vs1, v2, v4
777; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
778; PWR9LE-NEXT:    xxswapd vs1, vs0
779; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
780; PWR9LE-NEXT:    xxswapd vs1, vs0
781; PWR9LE-NEXT:    blr
782;
783; PWR9BE-LABEL: v8f64_fast:
784; PWR9BE:       # %bb.0: # %entry
785; PWR9BE-NEXT:    xvmindp vs0, v3, v5
786; PWR9BE-NEXT:    xvmindp vs1, v2, v4
787; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
788; PWR9BE-NEXT:    xxswapd vs1, vs0
789; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
790; PWR9BE-NEXT:    blr
791;
792; PWR10LE-LABEL: v8f64_fast:
793; PWR10LE:       # %bb.0: # %entry
794; PWR10LE-NEXT:    xvmindp vs0, v3, v5
795; PWR10LE-NEXT:    xvmindp vs1, v2, v4
796; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
797; PWR10LE-NEXT:    xxswapd vs1, vs0
798; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
799; PWR10LE-NEXT:    xxswapd vs1, vs0
800; PWR10LE-NEXT:    blr
801;
802; PWR10BE-LABEL: v8f64_fast:
803; PWR10BE:       # %bb.0: # %entry
804; PWR10BE-NEXT:    xvmindp vs0, v3, v5
805; PWR10BE-NEXT:    xvmindp vs1, v2, v4
806; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
807; PWR10BE-NEXT:    xxswapd vs1, vs0
808; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
809; PWR10BE-NEXT:    blr
810entry:
811  %0 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
812  ret double %0
813}
814
815define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
816; PWR9LE-LABEL: v16f64:
817; PWR9LE:       # %bb.0: # %entry
818; PWR9LE-NEXT:    xvmindp vs0, v5, v9
819; PWR9LE-NEXT:    xvmindp vs1, v3, v7
820; PWR9LE-NEXT:    xvmindp vs2, v2, v6
821; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
822; PWR9LE-NEXT:    xvmindp vs1, v4, v8
823; PWR9LE-NEXT:    xvmindp vs1, vs2, vs1
824; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
825; PWR9LE-NEXT:    xxswapd vs1, vs0
826; PWR9LE-NEXT:    xsmindp f1, f1, f0
827; PWR9LE-NEXT:    blr
828;
829; PWR9BE-LABEL: v16f64:
830; PWR9BE:       # %bb.0: # %entry
831; PWR9BE-NEXT:    xvmindp vs0, v5, v9
832; PWR9BE-NEXT:    xvmindp vs1, v3, v7
833; PWR9BE-NEXT:    xvmindp vs2, v2, v6
834; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
835; PWR9BE-NEXT:    xvmindp vs1, v4, v8
836; PWR9BE-NEXT:    xvmindp vs1, vs2, vs1
837; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
838; PWR9BE-NEXT:    xxswapd vs1, vs0
839; PWR9BE-NEXT:    xsmindp f1, f0, f1
840; PWR9BE-NEXT:    blr
841;
842; PWR10LE-LABEL: v16f64:
843; PWR10LE:       # %bb.0: # %entry
844; PWR10LE-NEXT:    xvmindp vs0, v5, v9
845; PWR10LE-NEXT:    xvmindp vs1, v3, v7
846; PWR10LE-NEXT:    xvmindp vs2, v2, v6
847; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
848; PWR10LE-NEXT:    xvmindp vs1, v4, v8
849; PWR10LE-NEXT:    xvmindp vs1, vs2, vs1
850; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
851; PWR10LE-NEXT:    xxswapd vs1, vs0
852; PWR10LE-NEXT:    xsmindp f1, f1, f0
853; PWR10LE-NEXT:    blr
854;
855; PWR10BE-LABEL: v16f64:
856; PWR10BE:       # %bb.0: # %entry
857; PWR10BE-NEXT:    xvmindp vs0, v5, v9
858; PWR10BE-NEXT:    xvmindp vs1, v3, v7
859; PWR10BE-NEXT:    xvmindp vs2, v2, v6
860; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
861; PWR10BE-NEXT:    xvmindp vs1, v4, v8
862; PWR10BE-NEXT:    xvmindp vs1, vs2, vs1
863; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
864; PWR10BE-NEXT:    xxswapd vs1, vs0
865; PWR10BE-NEXT:    xsmindp f1, f0, f1
866; PWR10BE-NEXT:    blr
867entry:
868  %0 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
869  ret double %0
870}
871
872define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
873; PWR9LE-LABEL: v16f64_fast:
874; PWR9LE:       # %bb.0: # %entry
875; PWR9LE-NEXT:    xvmindp vs0, v4, v8
876; PWR9LE-NEXT:    xvmindp vs1, v2, v6
877; PWR9LE-NEXT:    xvmindp vs2, v5, v9
878; PWR9LE-NEXT:    xvmindp vs3, v3, v7
879; PWR9LE-NEXT:    xvmindp vs2, vs3, vs2
880; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
881; PWR9LE-NEXT:    xvmindp vs0, vs0, vs2
882; PWR9LE-NEXT:    xxswapd vs1, vs0
883; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
884; PWR9LE-NEXT:    xxswapd vs1, vs0
885; PWR9LE-NEXT:    blr
886;
887; PWR9BE-LABEL: v16f64_fast:
888; PWR9BE:       # %bb.0: # %entry
889; PWR9BE-NEXT:    xvmindp vs0, v4, v8
890; PWR9BE-NEXT:    xvmindp vs1, v2, v6
891; PWR9BE-NEXT:    xvmindp vs2, v5, v9
892; PWR9BE-NEXT:    xvmindp vs3, v3, v7
893; PWR9BE-NEXT:    xvmindp vs2, vs3, vs2
894; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
895; PWR9BE-NEXT:    xvmindp vs0, vs0, vs2
896; PWR9BE-NEXT:    xxswapd vs1, vs0
897; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
898; PWR9BE-NEXT:    blr
899;
900; PWR10LE-LABEL: v16f64_fast:
901; PWR10LE:       # %bb.0: # %entry
902; PWR10LE-NEXT:    xvmindp vs0, v4, v8
903; PWR10LE-NEXT:    xvmindp vs1, v2, v6
904; PWR10LE-NEXT:    xvmindp vs2, v5, v9
905; PWR10LE-NEXT:    xvmindp vs3, v3, v7
906; PWR10LE-NEXT:    xvmindp vs2, vs3, vs2
907; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
908; PWR10LE-NEXT:    xvmindp vs0, vs0, vs2
909; PWR10LE-NEXT:    xxswapd vs1, vs0
910; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
911; PWR10LE-NEXT:    xxswapd vs1, vs0
912; PWR10LE-NEXT:    blr
913;
914; PWR10BE-LABEL: v16f64_fast:
915; PWR10BE:       # %bb.0: # %entry
916; PWR10BE-NEXT:    xvmindp vs0, v4, v8
917; PWR10BE-NEXT:    xvmindp vs1, v2, v6
918; PWR10BE-NEXT:    xvmindp vs2, v5, v9
919; PWR10BE-NEXT:    xvmindp vs3, v3, v7
920; PWR10BE-NEXT:    xvmindp vs2, vs3, vs2
921; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
922; PWR10BE-NEXT:    xvmindp vs0, vs0, vs2
923; PWR10BE-NEXT:    xxswapd vs1, vs0
924; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
925; PWR10BE-NEXT:    blr
926entry:
927  %0 = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
928  ret double %0
929}
930
931define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
932; PWR9LE-LABEL: v32f64:
933; PWR9LE:       # %bb.0: # %entry
934; PWR9LE-NEXT:    lxv vs3, 272(r1)
935; PWR9LE-NEXT:    lxv vs2, 240(r1)
936; PWR9LE-NEXT:    xvmindp vs4, v5, v13
937; PWR9LE-NEXT:    lxv vs1, 256(r1)
938; PWR9LE-NEXT:    lxv vs0, 224(r1)
939; PWR9LE-NEXT:    xvmindp vs3, v9, vs3
940; PWR9LE-NEXT:    xvmindp vs2, v7, vs2
941; PWR9LE-NEXT:    xvmindp vs1, v8, vs1
942; PWR9LE-NEXT:    xvmindp vs0, v6, vs0
943; PWR9LE-NEXT:    xvmindp vs3, vs4, vs3
944; PWR9LE-NEXT:    xvmindp vs4, v3, v11
945; PWR9LE-NEXT:    xvmindp vs2, vs4, vs2
946; PWR9LE-NEXT:    xvmindp vs2, vs2, vs3
947; PWR9LE-NEXT:    xvmindp vs3, v4, v12
948; PWR9LE-NEXT:    xvmindp vs1, vs3, vs1
949; PWR9LE-NEXT:    xvmindp vs3, v2, v10
950; PWR9LE-NEXT:    xvmindp vs0, vs3, vs0
951; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
952; PWR9LE-NEXT:    xvmindp vs0, vs0, vs2
953; PWR9LE-NEXT:    xxswapd vs1, vs0
954; PWR9LE-NEXT:    xsmindp f1, f1, f0
955; PWR9LE-NEXT:    blr
956;
957; PWR9BE-LABEL: v32f64:
958; PWR9BE:       # %bb.0: # %entry
959; PWR9BE-NEXT:    lxv vs3, 288(r1)
960; PWR9BE-NEXT:    lxv vs2, 256(r1)
961; PWR9BE-NEXT:    xvmindp vs4, v5, v13
962; PWR9BE-NEXT:    lxv vs1, 272(r1)
963; PWR9BE-NEXT:    lxv vs0, 240(r1)
964; PWR9BE-NEXT:    xvmindp vs3, v9, vs3
965; PWR9BE-NEXT:    xvmindp vs2, v7, vs2
966; PWR9BE-NEXT:    xvmindp vs1, v8, vs1
967; PWR9BE-NEXT:    xvmindp vs0, v6, vs0
968; PWR9BE-NEXT:    xvmindp vs3, vs4, vs3
969; PWR9BE-NEXT:    xvmindp vs4, v3, v11
970; PWR9BE-NEXT:    xvmindp vs2, vs4, vs2
971; PWR9BE-NEXT:    xvmindp vs2, vs2, vs3
972; PWR9BE-NEXT:    xvmindp vs3, v4, v12
973; PWR9BE-NEXT:    xvmindp vs1, vs3, vs1
974; PWR9BE-NEXT:    xvmindp vs3, v2, v10
975; PWR9BE-NEXT:    xvmindp vs0, vs3, vs0
976; PWR9BE-NEXT:    xvmindp vs0, vs0, vs1
977; PWR9BE-NEXT:    xvmindp vs0, vs0, vs2
978; PWR9BE-NEXT:    xxswapd vs1, vs0
979; PWR9BE-NEXT:    xsmindp f1, f0, f1
980; PWR9BE-NEXT:    blr
981;
982; PWR10LE-LABEL: v32f64:
983; PWR10LE:       # %bb.0: # %entry
984; PWR10LE-NEXT:    lxv vs3, 272(r1)
985; PWR10LE-NEXT:    lxv vs2, 240(r1)
986; PWR10LE-NEXT:    xvmindp vs4, v5, v13
987; PWR10LE-NEXT:    xvmindp vs3, v9, vs3
988; PWR10LE-NEXT:    lxv vs1, 256(r1)
989; PWR10LE-NEXT:    xvmindp vs2, v7, vs2
990; PWR10LE-NEXT:    lxv vs0, 224(r1)
991; PWR10LE-NEXT:    xvmindp vs1, v8, vs1
992; PWR10LE-NEXT:    xvmindp vs0, v6, vs0
993; PWR10LE-NEXT:    xvmindp vs3, vs4, vs3
994; PWR10LE-NEXT:    xvmindp vs4, v3, v11
995; PWR10LE-NEXT:    xvmindp vs2, vs4, vs2
996; PWR10LE-NEXT:    xvmindp vs2, vs2, vs3
997; PWR10LE-NEXT:    xvmindp vs3, v4, v12
998; PWR10LE-NEXT:    xvmindp vs1, vs3, vs1
999; PWR10LE-NEXT:    xvmindp vs3, v2, v10
1000; PWR10LE-NEXT:    xvmindp vs0, vs3, vs0
1001; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
1002; PWR10LE-NEXT:    xvmindp vs0, vs0, vs2
1003; PWR10LE-NEXT:    xxswapd vs1, vs0
1004; PWR10LE-NEXT:    xsmindp f1, f1, f0
1005; PWR10LE-NEXT:    blr
1006;
1007; PWR10BE-LABEL: v32f64:
1008; PWR10BE:       # %bb.0: # %entry
1009; PWR10BE-NEXT:    lxv vs3, 288(r1)
1010; PWR10BE-NEXT:    lxv vs2, 256(r1)
1011; PWR10BE-NEXT:    xvmindp vs4, v5, v13
1012; PWR10BE-NEXT:    xvmindp vs3, v9, vs3
1013; PWR10BE-NEXT:    lxv vs1, 272(r1)
1014; PWR10BE-NEXT:    xvmindp vs2, v7, vs2
1015; PWR10BE-NEXT:    lxv vs0, 240(r1)
1016; PWR10BE-NEXT:    xvmindp vs1, v8, vs1
1017; PWR10BE-NEXT:    xvmindp vs0, v6, vs0
1018; PWR10BE-NEXT:    xvmindp vs3, vs4, vs3
1019; PWR10BE-NEXT:    xvmindp vs4, v3, v11
1020; PWR10BE-NEXT:    xvmindp vs2, vs4, vs2
1021; PWR10BE-NEXT:    xvmindp vs2, vs2, vs3
1022; PWR10BE-NEXT:    xvmindp vs3, v4, v12
1023; PWR10BE-NEXT:    xvmindp vs1, vs3, vs1
1024; PWR10BE-NEXT:    xvmindp vs3, v2, v10
1025; PWR10BE-NEXT:    xvmindp vs0, vs3, vs0
1026; PWR10BE-NEXT:    xvmindp vs0, vs0, vs1
1027; PWR10BE-NEXT:    xvmindp vs0, vs0, vs2
1028; PWR10BE-NEXT:    xxswapd vs1, vs0
1029; PWR10BE-NEXT:    xsmindp f1, f0, f1
1030; PWR10BE-NEXT:    blr
1031entry:
1032  %0 = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
1033  ret double %0
1034}
1035
1036define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
1037; PWR9LE-LABEL: v32f64_fast:
1038; PWR9LE:       # %bb.0: # %entry
1039; PWR9LE-NEXT:    lxv vs0, 256(r1)
1040; PWR9LE-NEXT:    lxv vs1, 224(r1)
1041; PWR9LE-NEXT:    lxv vs2, 272(r1)
1042; PWR9LE-NEXT:    lxv vs3, 240(r1)
1043; PWR9LE-NEXT:    xvmindp vs4, v3, v11
1044; PWR9LE-NEXT:    xvmindp vs5, v5, v13
1045; PWR9LE-NEXT:    xvmindp vs6, v2, v10
1046; PWR9LE-NEXT:    xvmindp vs7, v4, v12
1047; PWR9LE-NEXT:    xvmindp vs3, v7, vs3
1048; PWR9LE-NEXT:    xvmindp vs2, v9, vs2
1049; PWR9LE-NEXT:    xvmindp vs1, v6, vs1
1050; PWR9LE-NEXT:    xvmindp vs0, v8, vs0
1051; PWR9LE-NEXT:    xvmindp vs0, vs7, vs0
1052; PWR9LE-NEXT:    xvmindp vs1, vs6, vs1
1053; PWR9LE-NEXT:    xvmindp vs2, vs5, vs2
1054; PWR9LE-NEXT:    xvmindp vs3, vs4, vs3
1055; PWR9LE-NEXT:    xvmindp vs2, vs3, vs2
1056; PWR9LE-NEXT:    xvmindp vs0, vs1, vs0
1057; PWR9LE-NEXT:    xvmindp vs0, vs0, vs2
1058; PWR9LE-NEXT:    xxswapd vs1, vs0
1059; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
1060; PWR9LE-NEXT:    xxswapd vs1, vs0
1061; PWR9LE-NEXT:    blr
1062;
1063; PWR9BE-LABEL: v32f64_fast:
1064; PWR9BE:       # %bb.0: # %entry
1065; PWR9BE-NEXT:    lxv vs0, 272(r1)
1066; PWR9BE-NEXT:    lxv vs1, 240(r1)
1067; PWR9BE-NEXT:    lxv vs2, 288(r1)
1068; PWR9BE-NEXT:    lxv vs3, 256(r1)
1069; PWR9BE-NEXT:    xvmindp vs4, v3, v11
1070; PWR9BE-NEXT:    xvmindp vs5, v5, v13
1071; PWR9BE-NEXT:    xvmindp vs6, v2, v10
1072; PWR9BE-NEXT:    xvmindp vs7, v4, v12
1073; PWR9BE-NEXT:    xvmindp vs3, v7, vs3
1074; PWR9BE-NEXT:    xvmindp vs2, v9, vs2
1075; PWR9BE-NEXT:    xvmindp vs1, v6, vs1
1076; PWR9BE-NEXT:    xvmindp vs0, v8, vs0
1077; PWR9BE-NEXT:    xvmindp vs0, vs7, vs0
1078; PWR9BE-NEXT:    xvmindp vs1, vs6, vs1
1079; PWR9BE-NEXT:    xvmindp vs2, vs5, vs2
1080; PWR9BE-NEXT:    xvmindp vs3, vs4, vs3
1081; PWR9BE-NEXT:    xvmindp vs2, vs3, vs2
1082; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
1083; PWR9BE-NEXT:    xvmindp vs0, vs0, vs2
1084; PWR9BE-NEXT:    xxswapd vs1, vs0
1085; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
1086; PWR9BE-NEXT:    blr
1087;
1088; PWR10LE-LABEL: v32f64_fast:
1089; PWR10LE:       # %bb.0: # %entry
1090; PWR10LE-NEXT:    lxv vs0, 256(r1)
1091; PWR10LE-NEXT:    lxv vs1, 224(r1)
1092; PWR10LE-NEXT:    xvmindp vs4, v3, v11
1093; PWR10LE-NEXT:    xvmindp vs5, v5, v13
1094; PWR10LE-NEXT:    xvmindp vs6, v2, v10
1095; PWR10LE-NEXT:    xvmindp vs7, v4, v12
1096; PWR10LE-NEXT:    xvmindp vs1, v6, vs1
1097; PWR10LE-NEXT:    lxv vs2, 272(r1)
1098; PWR10LE-NEXT:    lxv vs3, 240(r1)
1099; PWR10LE-NEXT:    xvmindp vs3, v7, vs3
1100; PWR10LE-NEXT:    xvmindp vs2, v9, vs2
1101; PWR10LE-NEXT:    xvmindp vs0, v8, vs0
1102; PWR10LE-NEXT:    xvmindp vs0, vs7, vs0
1103; PWR10LE-NEXT:    xvmindp vs1, vs6, vs1
1104; PWR10LE-NEXT:    xvmindp vs2, vs5, vs2
1105; PWR10LE-NEXT:    xvmindp vs3, vs4, vs3
1106; PWR10LE-NEXT:    xvmindp vs2, vs3, vs2
1107; PWR10LE-NEXT:    xvmindp vs0, vs1, vs0
1108; PWR10LE-NEXT:    xvmindp vs0, vs0, vs2
1109; PWR10LE-NEXT:    xxswapd vs1, vs0
1110; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
1111; PWR10LE-NEXT:    xxswapd vs1, vs0
1112; PWR10LE-NEXT:    blr
1113;
1114; PWR10BE-LABEL: v32f64_fast:
1115; PWR10BE:       # %bb.0: # %entry
1116; PWR10BE-NEXT:    lxv vs0, 272(r1)
1117; PWR10BE-NEXT:    lxv vs1, 240(r1)
1118; PWR10BE-NEXT:    xvmindp vs4, v3, v11
1119; PWR10BE-NEXT:    xvmindp vs5, v5, v13
1120; PWR10BE-NEXT:    xvmindp vs6, v2, v10
1121; PWR10BE-NEXT:    xvmindp vs7, v4, v12
1122; PWR10BE-NEXT:    xvmindp vs1, v6, vs1
1123; PWR10BE-NEXT:    lxv vs2, 288(r1)
1124; PWR10BE-NEXT:    lxv vs3, 256(r1)
1125; PWR10BE-NEXT:    xvmindp vs3, v7, vs3
1126; PWR10BE-NEXT:    xvmindp vs2, v9, vs2
1127; PWR10BE-NEXT:    xvmindp vs0, v8, vs0
1128; PWR10BE-NEXT:    xvmindp vs0, vs7, vs0
1129; PWR10BE-NEXT:    xvmindp vs1, vs6, vs1
1130; PWR10BE-NEXT:    xvmindp vs2, vs5, vs2
1131; PWR10BE-NEXT:    xvmindp vs3, vs4, vs3
1132; PWR10BE-NEXT:    xvmindp vs2, vs3, vs2
1133; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
1134; PWR10BE-NEXT:    xvmindp vs0, vs0, vs2
1135; PWR10BE-NEXT:    xxswapd vs1, vs0
1136; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
1137; PWR10BE-NEXT:    blr
1138entry:
1139  %0 = call fast double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
1140  ret double %0
1141}
1142
1143declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) #0
1144declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) #0
1145declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) #0
1146declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) #0
1147declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) #0
1148
1149attributes #0 = { nounwind }
1150