xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-umin.ll (revision e9d12c248013b2d2b9880436727857e0ec8a7085)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9; RUN:   -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
10
11;;
12;; Vectors of type i8
13;;
14define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 {
15; PWR9LE-LABEL: v2i8:
16; PWR9LE:       # %bb.0: # %entry
17; PWR9LE-NEXT:    vspltb v3, v2, 14
18; PWR9LE-NEXT:    li r3, 0
19; PWR9LE-NEXT:    vminub v2, v2, v3
20; PWR9LE-NEXT:    vextubrx r3, r3, v2
21; PWR9LE-NEXT:    blr
22;
23; PWR9BE-LABEL: v2i8:
24; PWR9BE:       # %bb.0: # %entry
25; PWR9BE-NEXT:    vspltb v3, v2, 1
26; PWR9BE-NEXT:    li r3, 0
27; PWR9BE-NEXT:    vminub v2, v2, v3
28; PWR9BE-NEXT:    vextublx r3, r3, v2
29; PWR9BE-NEXT:    blr
30;
31; PWR10LE-LABEL: v2i8:
32; PWR10LE:       # %bb.0: # %entry
33; PWR10LE-NEXT:    vspltb v3, v2, 14
34; PWR10LE-NEXT:    li r3, 0
35; PWR10LE-NEXT:    vminub v2, v2, v3
36; PWR10LE-NEXT:    vextubrx r3, r3, v2
37; PWR10LE-NEXT:    blr
38;
39; PWR10BE-LABEL: v2i8:
40; PWR10BE:       # %bb.0: # %entry
41; PWR10BE-NEXT:    vspltb v3, v2, 1
42; PWR10BE-NEXT:    li r3, 0
43; PWR10BE-NEXT:    vminub v2, v2, v3
44; PWR10BE-NEXT:    vextublx r3, r3, v2
45; PWR10BE-NEXT:    blr
46entry:
47  %0 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a)
48  ret i8 %0
49}
50
51define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 {
52; PWR9LE-LABEL: v4i8:
53; PWR9LE:       # %bb.0: # %entry
54; PWR9LE-NEXT:    vsplth v3, v2, 6
55; PWR9LE-NEXT:    li r3, 0
56; PWR9LE-NEXT:    vminub v2, v2, v3
57; PWR9LE-NEXT:    vspltb v3, v2, 14
58; PWR9LE-NEXT:    vminub v2, v2, v3
59; PWR9LE-NEXT:    vextubrx r3, r3, v2
60; PWR9LE-NEXT:    blr
61;
62; PWR9BE-LABEL: v4i8:
63; PWR9BE:       # %bb.0: # %entry
64; PWR9BE-NEXT:    vsplth v3, v2, 1
65; PWR9BE-NEXT:    li r3, 0
66; PWR9BE-NEXT:    vminub v2, v2, v3
67; PWR9BE-NEXT:    vspltb v3, v2, 1
68; PWR9BE-NEXT:    vminub v2, v2, v3
69; PWR9BE-NEXT:    vextublx r3, r3, v2
70; PWR9BE-NEXT:    blr
71;
72; PWR10LE-LABEL: v4i8:
73; PWR10LE:       # %bb.0: # %entry
74; PWR10LE-NEXT:    vsplth v3, v2, 6
75; PWR10LE-NEXT:    li r3, 0
76; PWR10LE-NEXT:    vminub v2, v2, v3
77; PWR10LE-NEXT:    vspltb v3, v2, 14
78; PWR10LE-NEXT:    vminub v2, v2, v3
79; PWR10LE-NEXT:    vextubrx r3, r3, v2
80; PWR10LE-NEXT:    blr
81;
82; PWR10BE-LABEL: v4i8:
83; PWR10BE:       # %bb.0: # %entry
84; PWR10BE-NEXT:    vsplth v3, v2, 1
85; PWR10BE-NEXT:    li r3, 0
86; PWR10BE-NEXT:    vminub v2, v2, v3
87; PWR10BE-NEXT:    vspltb v3, v2, 1
88; PWR10BE-NEXT:    vminub v2, v2, v3
89; PWR10BE-NEXT:    vextublx r3, r3, v2
90; PWR10BE-NEXT:    blr
91entry:
92  %0 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a)
93  ret i8 %0
94}
95
96define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 {
97; PWR9LE-LABEL: v8i8:
98; PWR9LE:       # %bb.0: # %entry
99; PWR9LE-NEXT:    xxspltw v3, v2, 2
100; PWR9LE-NEXT:    li r3, 0
101; PWR9LE-NEXT:    vminub v2, v2, v3
102; PWR9LE-NEXT:    vsplth v3, v2, 6
103; PWR9LE-NEXT:    vminub v2, v2, v3
104; PWR9LE-NEXT:    vspltb v3, v2, 14
105; PWR9LE-NEXT:    vminub v2, v2, v3
106; PWR9LE-NEXT:    vextubrx r3, r3, v2
107; PWR9LE-NEXT:    blr
108;
109; PWR9BE-LABEL: v8i8:
110; PWR9BE:       # %bb.0: # %entry
111; PWR9BE-NEXT:    xxspltw v3, v2, 1
112; PWR9BE-NEXT:    li r3, 0
113; PWR9BE-NEXT:    vminub v2, v2, v3
114; PWR9BE-NEXT:    vsplth v3, v2, 1
115; PWR9BE-NEXT:    vminub v2, v2, v3
116; PWR9BE-NEXT:    vspltb v3, v2, 1
117; PWR9BE-NEXT:    vminub v2, v2, v3
118; PWR9BE-NEXT:    vextublx r3, r3, v2
119; PWR9BE-NEXT:    blr
120;
121; PWR10LE-LABEL: v8i8:
122; PWR10LE:       # %bb.0: # %entry
123; PWR10LE-NEXT:    xxspltw v3, v2, 2
124; PWR10LE-NEXT:    li r3, 0
125; PWR10LE-NEXT:    vminub v2, v2, v3
126; PWR10LE-NEXT:    vsplth v3, v2, 6
127; PWR10LE-NEXT:    vminub v2, v2, v3
128; PWR10LE-NEXT:    vspltb v3, v2, 14
129; PWR10LE-NEXT:    vminub v2, v2, v3
130; PWR10LE-NEXT:    vextubrx r3, r3, v2
131; PWR10LE-NEXT:    blr
132;
133; PWR10BE-LABEL: v8i8:
134; PWR10BE:       # %bb.0: # %entry
135; PWR10BE-NEXT:    xxspltw v3, v2, 1
136; PWR10BE-NEXT:    li r3, 0
137; PWR10BE-NEXT:    vminub v2, v2, v3
138; PWR10BE-NEXT:    vsplth v3, v2, 1
139; PWR10BE-NEXT:    vminub v2, v2, v3
140; PWR10BE-NEXT:    vspltb v3, v2, 1
141; PWR10BE-NEXT:    vminub v2, v2, v3
142; PWR10BE-NEXT:    vextublx r3, r3, v2
143; PWR10BE-NEXT:    blr
144entry:
145  %0 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
146  ret i8 %0
147}
148
149define dso_local i8 @v16i8(<16 x i8> %a) local_unnamed_addr #0 {
150; PWR9LE-LABEL: v16i8:
151; PWR9LE:       # %bb.0: # %entry
152; PWR9LE-NEXT:    xxswapd v3, v2
153; PWR9LE-NEXT:    li r3, 0
154; PWR9LE-NEXT:    vminub v2, v2, v3
155; PWR9LE-NEXT:    xxspltw v3, v2, 2
156; PWR9LE-NEXT:    vminub v2, v2, v3
157; PWR9LE-NEXT:    vsplth v3, v2, 6
158; PWR9LE-NEXT:    vminub v2, v2, v3
159; PWR9LE-NEXT:    vspltb v3, v2, 14
160; PWR9LE-NEXT:    vminub v2, v2, v3
161; PWR9LE-NEXT:    vextubrx r3, r3, v2
162; PWR9LE-NEXT:    blr
163;
164; PWR9BE-LABEL: v16i8:
165; PWR9BE:       # %bb.0: # %entry
166; PWR9BE-NEXT:    xxswapd v3, v2
167; PWR9BE-NEXT:    li r3, 0
168; PWR9BE-NEXT:    vminub v2, v2, v3
169; PWR9BE-NEXT:    xxspltw v3, v2, 1
170; PWR9BE-NEXT:    vminub v2, v2, v3
171; PWR9BE-NEXT:    vsplth v3, v2, 1
172; PWR9BE-NEXT:    vminub v2, v2, v3
173; PWR9BE-NEXT:    vspltb v3, v2, 1
174; PWR9BE-NEXT:    vminub v2, v2, v3
175; PWR9BE-NEXT:    vextublx r3, r3, v2
176; PWR9BE-NEXT:    blr
177;
178; PWR10LE-LABEL: v16i8:
179; PWR10LE:       # %bb.0: # %entry
180; PWR10LE-NEXT:    xxswapd v3, v2
181; PWR10LE-NEXT:    li r3, 0
182; PWR10LE-NEXT:    vminub v2, v2, v3
183; PWR10LE-NEXT:    xxspltw v3, v2, 2
184; PWR10LE-NEXT:    vminub v2, v2, v3
185; PWR10LE-NEXT:    vsplth v3, v2, 6
186; PWR10LE-NEXT:    vminub v2, v2, v3
187; PWR10LE-NEXT:    vspltb v3, v2, 14
188; PWR10LE-NEXT:    vminub v2, v2, v3
189; PWR10LE-NEXT:    vextubrx r3, r3, v2
190; PWR10LE-NEXT:    blr
191;
192; PWR10BE-LABEL: v16i8:
193; PWR10BE:       # %bb.0: # %entry
194; PWR10BE-NEXT:    xxswapd v3, v2
195; PWR10BE-NEXT:    li r3, 0
196; PWR10BE-NEXT:    vminub v2, v2, v3
197; PWR10BE-NEXT:    xxspltw v3, v2, 1
198; PWR10BE-NEXT:    vminub v2, v2, v3
199; PWR10BE-NEXT:    vsplth v3, v2, 1
200; PWR10BE-NEXT:    vminub v2, v2, v3
201; PWR10BE-NEXT:    vspltb v3, v2, 1
202; PWR10BE-NEXT:    vminub v2, v2, v3
203; PWR10BE-NEXT:    vextublx r3, r3, v2
204; PWR10BE-NEXT:    blr
205entry:
206  %0 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
207  ret i8 %0
208}
209
210declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) #0
211declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) #0
212declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) #0
213declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) #0
214
215;;
216;; Vectors of type i16
217;;
218define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 {
219; PWR9LE-LABEL: v2i16:
220; PWR9LE:       # %bb.0: # %entry
221; PWR9LE-NEXT:    vsplth v3, v2, 6
222; PWR9LE-NEXT:    li r3, 0
223; PWR9LE-NEXT:    vminuh v2, v2, v3
224; PWR9LE-NEXT:    vextuhrx r3, r3, v2
225; PWR9LE-NEXT:    blr
226;
227; PWR9BE-LABEL: v2i16:
228; PWR9BE:       # %bb.0: # %entry
229; PWR9BE-NEXT:    vsplth v3, v2, 1
230; PWR9BE-NEXT:    li r3, 0
231; PWR9BE-NEXT:    vminuh v2, v2, v3
232; PWR9BE-NEXT:    vextuhlx r3, r3, v2
233; PWR9BE-NEXT:    blr
234;
235; PWR10LE-LABEL: v2i16:
236; PWR10LE:       # %bb.0: # %entry
237; PWR10LE-NEXT:    vsplth v3, v2, 6
238; PWR10LE-NEXT:    li r3, 0
239; PWR10LE-NEXT:    vminuh v2, v2, v3
240; PWR10LE-NEXT:    vextuhrx r3, r3, v2
241; PWR10LE-NEXT:    blr
242;
243; PWR10BE-LABEL: v2i16:
244; PWR10BE:       # %bb.0: # %entry
245; PWR10BE-NEXT:    vsplth v3, v2, 1
246; PWR10BE-NEXT:    li r3, 0
247; PWR10BE-NEXT:    vminuh v2, v2, v3
248; PWR10BE-NEXT:    vextuhlx r3, r3, v2
249; PWR10BE-NEXT:    blr
250entry:
251  %0 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a)
252  ret i16 %0
253}
254
255define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 {
256; PWR9LE-LABEL: v4i16:
257; PWR9LE:       # %bb.0: # %entry
258; PWR9LE-NEXT:    xxspltw v3, v2, 2
259; PWR9LE-NEXT:    li r3, 0
260; PWR9LE-NEXT:    vminuh v2, v2, v3
261; PWR9LE-NEXT:    vsplth v3, v2, 6
262; PWR9LE-NEXT:    vminuh v2, v2, v3
263; PWR9LE-NEXT:    vextuhrx r3, r3, v2
264; PWR9LE-NEXT:    blr
265;
266; PWR9BE-LABEL: v4i16:
267; PWR9BE:       # %bb.0: # %entry
268; PWR9BE-NEXT:    xxspltw v3, v2, 1
269; PWR9BE-NEXT:    li r3, 0
270; PWR9BE-NEXT:    vminuh v2, v2, v3
271; PWR9BE-NEXT:    vsplth v3, v2, 1
272; PWR9BE-NEXT:    vminuh v2, v2, v3
273; PWR9BE-NEXT:    vextuhlx r3, r3, v2
274; PWR9BE-NEXT:    blr
275;
276; PWR10LE-LABEL: v4i16:
277; PWR10LE:       # %bb.0: # %entry
278; PWR10LE-NEXT:    xxspltw v3, v2, 2
279; PWR10LE-NEXT:    li r3, 0
280; PWR10LE-NEXT:    vminuh v2, v2, v3
281; PWR10LE-NEXT:    vsplth v3, v2, 6
282; PWR10LE-NEXT:    vminuh v2, v2, v3
283; PWR10LE-NEXT:    vextuhrx r3, r3, v2
284; PWR10LE-NEXT:    blr
285;
286; PWR10BE-LABEL: v4i16:
287; PWR10BE:       # %bb.0: # %entry
288; PWR10BE-NEXT:    xxspltw v3, v2, 1
289; PWR10BE-NEXT:    li r3, 0
290; PWR10BE-NEXT:    vminuh v2, v2, v3
291; PWR10BE-NEXT:    vsplth v3, v2, 1
292; PWR10BE-NEXT:    vminuh v2, v2, v3
293; PWR10BE-NEXT:    vextuhlx r3, r3, v2
294; PWR10BE-NEXT:    blr
295entry:
296  %0 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
297  ret i16 %0
298}
299
300define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 {
301; PWR9LE-LABEL: v8i16:
302; PWR9LE:       # %bb.0: # %entry
303; PWR9LE-NEXT:    xxswapd v3, v2
304; PWR9LE-NEXT:    li r3, 0
305; PWR9LE-NEXT:    vminuh v2, v2, v3
306; PWR9LE-NEXT:    xxspltw v3, v2, 2
307; PWR9LE-NEXT:    vminuh v2, v2, v3
308; PWR9LE-NEXT:    vsplth v3, v2, 6
309; PWR9LE-NEXT:    vminuh v2, v2, v3
310; PWR9LE-NEXT:    vextuhrx r3, r3, v2
311; PWR9LE-NEXT:    blr
312;
313; PWR9BE-LABEL: v8i16:
314; PWR9BE:       # %bb.0: # %entry
315; PWR9BE-NEXT:    xxswapd v3, v2
316; PWR9BE-NEXT:    li r3, 0
317; PWR9BE-NEXT:    vminuh v2, v2, v3
318; PWR9BE-NEXT:    xxspltw v3, v2, 1
319; PWR9BE-NEXT:    vminuh v2, v2, v3
320; PWR9BE-NEXT:    vsplth v3, v2, 1
321; PWR9BE-NEXT:    vminuh v2, v2, v3
322; PWR9BE-NEXT:    vextuhlx r3, r3, v2
323; PWR9BE-NEXT:    blr
324;
325; PWR10LE-LABEL: v8i16:
326; PWR10LE:       # %bb.0: # %entry
327; PWR10LE-NEXT:    xxswapd v3, v2
328; PWR10LE-NEXT:    li r3, 0
329; PWR10LE-NEXT:    vminuh v2, v2, v3
330; PWR10LE-NEXT:    xxspltw v3, v2, 2
331; PWR10LE-NEXT:    vminuh v2, v2, v3
332; PWR10LE-NEXT:    vsplth v3, v2, 6
333; PWR10LE-NEXT:    vminuh v2, v2, v3
334; PWR10LE-NEXT:    vextuhrx r3, r3, v2
335; PWR10LE-NEXT:    blr
336;
337; PWR10BE-LABEL: v8i16:
338; PWR10BE:       # %bb.0: # %entry
339; PWR10BE-NEXT:    xxswapd v3, v2
340; PWR10BE-NEXT:    li r3, 0
341; PWR10BE-NEXT:    vminuh v2, v2, v3
342; PWR10BE-NEXT:    xxspltw v3, v2, 1
343; PWR10BE-NEXT:    vminuh v2, v2, v3
344; PWR10BE-NEXT:    vsplth v3, v2, 1
345; PWR10BE-NEXT:    vminuh v2, v2, v3
346; PWR10BE-NEXT:    vextuhlx r3, r3, v2
347; PWR10BE-NEXT:    blr
348entry:
349  %0 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
350  ret i16 %0
351}
352
353define dso_local i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 {
354; PWR9LE-LABEL: v16i16:
355; PWR9LE:       # %bb.0: # %entry
356; PWR9LE-NEXT:    vminuh v2, v2, v3
357; PWR9LE-NEXT:    li r3, 0
358; PWR9LE-NEXT:    xxswapd v3, v2
359; PWR9LE-NEXT:    vminuh v2, v2, v3
360; PWR9LE-NEXT:    xxspltw v3, v2, 2
361; PWR9LE-NEXT:    vminuh v2, v2, v3
362; PWR9LE-NEXT:    vsplth v3, v2, 6
363; PWR9LE-NEXT:    vminuh v2, v2, v3
364; PWR9LE-NEXT:    vextuhrx r3, r3, v2
365; PWR9LE-NEXT:    blr
366;
367; PWR9BE-LABEL: v16i16:
368; PWR9BE:       # %bb.0: # %entry
369; PWR9BE-NEXT:    vminuh v2, v2, v3
370; PWR9BE-NEXT:    li r3, 0
371; PWR9BE-NEXT:    xxswapd v3, v2
372; PWR9BE-NEXT:    vminuh v2, v2, v3
373; PWR9BE-NEXT:    xxspltw v3, v2, 1
374; PWR9BE-NEXT:    vminuh v2, v2, v3
375; PWR9BE-NEXT:    vsplth v3, v2, 1
376; PWR9BE-NEXT:    vminuh v2, v2, v3
377; PWR9BE-NEXT:    vextuhlx r3, r3, v2
378; PWR9BE-NEXT:    blr
379;
380; PWR10LE-LABEL: v16i16:
381; PWR10LE:       # %bb.0: # %entry
382; PWR10LE-NEXT:    vminuh v2, v2, v3
383; PWR10LE-NEXT:    li r3, 0
384; PWR10LE-NEXT:    xxswapd v3, v2
385; PWR10LE-NEXT:    vminuh v2, v2, v3
386; PWR10LE-NEXT:    xxspltw v3, v2, 2
387; PWR10LE-NEXT:    vminuh v2, v2, v3
388; PWR10LE-NEXT:    vsplth v3, v2, 6
389; PWR10LE-NEXT:    vminuh v2, v2, v3
390; PWR10LE-NEXT:    vextuhrx r3, r3, v2
391; PWR10LE-NEXT:    blr
392;
393; PWR10BE-LABEL: v16i16:
394; PWR10BE:       # %bb.0: # %entry
395; PWR10BE-NEXT:    vminuh v2, v2, v3
396; PWR10BE-NEXT:    li r3, 0
397; PWR10BE-NEXT:    xxswapd v3, v2
398; PWR10BE-NEXT:    vminuh v2, v2, v3
399; PWR10BE-NEXT:    xxspltw v3, v2, 1
400; PWR10BE-NEXT:    vminuh v2, v2, v3
401; PWR10BE-NEXT:    vsplth v3, v2, 1
402; PWR10BE-NEXT:    vminuh v2, v2, v3
403; PWR10BE-NEXT:    vextuhlx r3, r3, v2
404; PWR10BE-NEXT:    blr
405entry:
406  %0 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a)
407  ret i16 %0
408}
409
410declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) #0
411declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) #0
412declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) #0
413declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) #0
414
415;;
416;; Vectors of type i32
417;;
418define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
419; PWR9LE-LABEL: v2i32:
420; PWR9LE:       # %bb.0: # %entry
421; PWR9LE-NEXT:    xxspltw v3, v2, 2
422; PWR9LE-NEXT:    li r3, 0
423; PWR9LE-NEXT:    vminuw v2, v2, v3
424; PWR9LE-NEXT:    vextuwrx r3, r3, v2
425; PWR9LE-NEXT:    blr
426;
427; PWR9BE-LABEL: v2i32:
428; PWR9BE:       # %bb.0: # %entry
429; PWR9BE-NEXT:    xxspltw v3, v2, 1
430; PWR9BE-NEXT:    li r3, 0
431; PWR9BE-NEXT:    vminuw v2, v2, v3
432; PWR9BE-NEXT:    vextuwlx r3, r3, v2
433; PWR9BE-NEXT:    blr
434;
435; PWR10LE-LABEL: v2i32:
436; PWR10LE:       # %bb.0: # %entry
437; PWR10LE-NEXT:    xxspltw v3, v2, 2
438; PWR10LE-NEXT:    li r3, 0
439; PWR10LE-NEXT:    vminuw v2, v2, v3
440; PWR10LE-NEXT:    vextuwrx r3, r3, v2
441; PWR10LE-NEXT:    blr
442;
443; PWR10BE-LABEL: v2i32:
444; PWR10BE:       # %bb.0: # %entry
445; PWR10BE-NEXT:    xxspltw v3, v2, 1
446; PWR10BE-NEXT:    li r3, 0
447; PWR10BE-NEXT:    vminuw v2, v2, v3
448; PWR10BE-NEXT:    vextuwlx r3, r3, v2
449; PWR10BE-NEXT:    blr
450entry:
451  %0 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
452  ret i32 %0
453}
454
455define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
456; PWR9LE-LABEL: v4i32:
457; PWR9LE:       # %bb.0: # %entry
458; PWR9LE-NEXT:    xxswapd v3, v2
459; PWR9LE-NEXT:    li r3, 0
460; PWR9LE-NEXT:    vminuw v2, v2, v3
461; PWR9LE-NEXT:    xxspltw v3, v2, 2
462; PWR9LE-NEXT:    vminuw v2, v2, v3
463; PWR9LE-NEXT:    vextuwrx r3, r3, v2
464; PWR9LE-NEXT:    blr
465;
466; PWR9BE-LABEL: v4i32:
467; PWR9BE:       # %bb.0: # %entry
468; PWR9BE-NEXT:    xxswapd v3, v2
469; PWR9BE-NEXT:    li r3, 0
470; PWR9BE-NEXT:    vminuw v2, v2, v3
471; PWR9BE-NEXT:    xxspltw v3, v2, 1
472; PWR9BE-NEXT:    vminuw v2, v2, v3
473; PWR9BE-NEXT:    vextuwlx r3, r3, v2
474; PWR9BE-NEXT:    blr
475;
476; PWR10LE-LABEL: v4i32:
477; PWR10LE:       # %bb.0: # %entry
478; PWR10LE-NEXT:    xxswapd v3, v2
479; PWR10LE-NEXT:    li r3, 0
480; PWR10LE-NEXT:    vminuw v2, v2, v3
481; PWR10LE-NEXT:    xxspltw v3, v2, 2
482; PWR10LE-NEXT:    vminuw v2, v2, v3
483; PWR10LE-NEXT:    vextuwrx r3, r3, v2
484; PWR10LE-NEXT:    blr
485;
486; PWR10BE-LABEL: v4i32:
487; PWR10BE:       # %bb.0: # %entry
488; PWR10BE-NEXT:    xxswapd v3, v2
489; PWR10BE-NEXT:    li r3, 0
490; PWR10BE-NEXT:    vminuw v2, v2, v3
491; PWR10BE-NEXT:    xxspltw v3, v2, 1
492; PWR10BE-NEXT:    vminuw v2, v2, v3
493; PWR10BE-NEXT:    vextuwlx r3, r3, v2
494; PWR10BE-NEXT:    blr
495entry:
496  %0 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
497  ret i32 %0
498}
499
500define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
501; PWR9LE-LABEL: v8i32:
502; PWR9LE:       # %bb.0: # %entry
503; PWR9LE-NEXT:    vminuw v2, v2, v3
504; PWR9LE-NEXT:    li r3, 0
505; PWR9LE-NEXT:    xxswapd v3, v2
506; PWR9LE-NEXT:    vminuw v2, v2, v3
507; PWR9LE-NEXT:    xxspltw v3, v2, 2
508; PWR9LE-NEXT:    vminuw v2, v2, v3
509; PWR9LE-NEXT:    vextuwrx r3, r3, v2
510; PWR9LE-NEXT:    blr
511;
512; PWR9BE-LABEL: v8i32:
513; PWR9BE:       # %bb.0: # %entry
514; PWR9BE-NEXT:    vminuw v2, v2, v3
515; PWR9BE-NEXT:    li r3, 0
516; PWR9BE-NEXT:    xxswapd v3, v2
517; PWR9BE-NEXT:    vminuw v2, v2, v3
518; PWR9BE-NEXT:    xxspltw v3, v2, 1
519; PWR9BE-NEXT:    vminuw v2, v2, v3
520; PWR9BE-NEXT:    vextuwlx r3, r3, v2
521; PWR9BE-NEXT:    blr
522;
523; PWR10LE-LABEL: v8i32:
524; PWR10LE:       # %bb.0: # %entry
525; PWR10LE-NEXT:    vminuw v2, v2, v3
526; PWR10LE-NEXT:    li r3, 0
527; PWR10LE-NEXT:    xxswapd v3, v2
528; PWR10LE-NEXT:    vminuw v2, v2, v3
529; PWR10LE-NEXT:    xxspltw v3, v2, 2
530; PWR10LE-NEXT:    vminuw v2, v2, v3
531; PWR10LE-NEXT:    vextuwrx r3, r3, v2
532; PWR10LE-NEXT:    blr
533;
534; PWR10BE-LABEL: v8i32:
535; PWR10BE:       # %bb.0: # %entry
536; PWR10BE-NEXT:    vminuw v2, v2, v3
537; PWR10BE-NEXT:    li r3, 0
538; PWR10BE-NEXT:    xxswapd v3, v2
539; PWR10BE-NEXT:    vminuw v2, v2, v3
540; PWR10BE-NEXT:    xxspltw v3, v2, 1
541; PWR10BE-NEXT:    vminuw v2, v2, v3
542; PWR10BE-NEXT:    vextuwlx r3, r3, v2
543; PWR10BE-NEXT:    blr
544entry:
545  %0 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a)
546  ret i32 %0
547}
548
549define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
550; PWR9LE-LABEL: v16i32:
551; PWR9LE:       # %bb.0: # %entry
552; PWR9LE-NEXT:    vminuw v3, v3, v5
553; PWR9LE-NEXT:    vminuw v2, v2, v4
554; PWR9LE-NEXT:    li r3, 0
555; PWR9LE-NEXT:    vminuw v2, v2, v3
556; PWR9LE-NEXT:    xxswapd v3, v2
557; PWR9LE-NEXT:    vminuw v2, v2, v3
558; PWR9LE-NEXT:    xxspltw v3, v2, 2
559; PWR9LE-NEXT:    vminuw v2, v2, v3
560; PWR9LE-NEXT:    vextuwrx r3, r3, v2
561; PWR9LE-NEXT:    blr
562;
563; PWR9BE-LABEL: v16i32:
564; PWR9BE:       # %bb.0: # %entry
565; PWR9BE-NEXT:    vminuw v3, v3, v5
566; PWR9BE-NEXT:    vminuw v2, v2, v4
567; PWR9BE-NEXT:    li r3, 0
568; PWR9BE-NEXT:    vminuw v2, v2, v3
569; PWR9BE-NEXT:    xxswapd v3, v2
570; PWR9BE-NEXT:    vminuw v2, v2, v3
571; PWR9BE-NEXT:    xxspltw v3, v2, 1
572; PWR9BE-NEXT:    vminuw v2, v2, v3
573; PWR9BE-NEXT:    vextuwlx r3, r3, v2
574; PWR9BE-NEXT:    blr
575;
576; PWR10LE-LABEL: v16i32:
577; PWR10LE:       # %bb.0: # %entry
578; PWR10LE-NEXT:    vminuw v3, v3, v5
579; PWR10LE-NEXT:    vminuw v2, v2, v4
580; PWR10LE-NEXT:    li r3, 0
581; PWR10LE-NEXT:    vminuw v2, v2, v3
582; PWR10LE-NEXT:    xxswapd v3, v2
583; PWR10LE-NEXT:    vminuw v2, v2, v3
584; PWR10LE-NEXT:    xxspltw v3, v2, 2
585; PWR10LE-NEXT:    vminuw v2, v2, v3
586; PWR10LE-NEXT:    vextuwrx r3, r3, v2
587; PWR10LE-NEXT:    blr
588;
589; PWR10BE-LABEL: v16i32:
590; PWR10BE:       # %bb.0: # %entry
591; PWR10BE-NEXT:    vminuw v3, v3, v5
592; PWR10BE-NEXT:    vminuw v2, v2, v4
593; PWR10BE-NEXT:    li r3, 0
594; PWR10BE-NEXT:    vminuw v2, v2, v3
595; PWR10BE-NEXT:    xxswapd v3, v2
596; PWR10BE-NEXT:    vminuw v2, v2, v3
597; PWR10BE-NEXT:    xxspltw v3, v2, 1
598; PWR10BE-NEXT:    vminuw v2, v2, v3
599; PWR10BE-NEXT:    vextuwlx r3, r3, v2
600; PWR10BE-NEXT:    blr
601entry:
602  %0 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %a)
603  ret i32 %0
604}
605
606declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) #0
607declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) #0
608declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) #0
609declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) #0
610
611;;
612;; Vectors of type i64
613;;
614define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
615; PWR9LE-LABEL: v2i64:
616; PWR9LE:       # %bb.0: # %entry
617; PWR9LE-NEXT:    xxswapd v3, v2
618; PWR9LE-NEXT:    vminud v2, v2, v3
619; PWR9LE-NEXT:    mfvsrld r3, v2
620; PWR9LE-NEXT:    blr
621;
622; PWR9BE-LABEL: v2i64:
623; PWR9BE:       # %bb.0: # %entry
624; PWR9BE-NEXT:    xxswapd v3, v2
625; PWR9BE-NEXT:    vminud v2, v2, v3
626; PWR9BE-NEXT:    mfvsrd r3, v2
627; PWR9BE-NEXT:    blr
628;
629; PWR10LE-LABEL: v2i64:
630; PWR10LE:       # %bb.0: # %entry
631; PWR10LE-NEXT:    xxswapd v3, v2
632; PWR10LE-NEXT:    vminud v2, v2, v3
633; PWR10LE-NEXT:    mfvsrld r3, v2
634; PWR10LE-NEXT:    blr
635;
636; PWR10BE-LABEL: v2i64:
637; PWR10BE:       # %bb.0: # %entry
638; PWR10BE-NEXT:    xxswapd v3, v2
639; PWR10BE-NEXT:    vminud v2, v2, v3
640; PWR10BE-NEXT:    mfvsrd r3, v2
641; PWR10BE-NEXT:    blr
642entry:
643  %0 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
644  ret i64 %0
645}
646
647define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
648; PWR9LE-LABEL: v4i64:
649; PWR9LE:       # %bb.0: # %entry
650; PWR9LE-NEXT:    vminud v2, v2, v3
651; PWR9LE-NEXT:    xxswapd v3, v2
652; PWR9LE-NEXT:    vminud v2, v2, v3
653; PWR9LE-NEXT:    mfvsrld r3, v2
654; PWR9LE-NEXT:    blr
655;
656; PWR9BE-LABEL: v4i64:
657; PWR9BE:       # %bb.0: # %entry
658; PWR9BE-NEXT:    vminud v2, v2, v3
659; PWR9BE-NEXT:    xxswapd v3, v2
660; PWR9BE-NEXT:    vminud v2, v2, v3
661; PWR9BE-NEXT:    mfvsrd r3, v2
662; PWR9BE-NEXT:    blr
663;
664; PWR10LE-LABEL: v4i64:
665; PWR10LE:       # %bb.0: # %entry
666; PWR10LE-NEXT:    vminud v2, v2, v3
667; PWR10LE-NEXT:    xxswapd v3, v2
668; PWR10LE-NEXT:    vminud v2, v2, v3
669; PWR10LE-NEXT:    mfvsrld r3, v2
670; PWR10LE-NEXT:    blr
671;
672; PWR10BE-LABEL: v4i64:
673; PWR10BE:       # %bb.0: # %entry
674; PWR10BE-NEXT:    vminud v2, v2, v3
675; PWR10BE-NEXT:    xxswapd v3, v2
676; PWR10BE-NEXT:    vminud v2, v2, v3
677; PWR10BE-NEXT:    mfvsrd r3, v2
678; PWR10BE-NEXT:    blr
679entry:
680  %0 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a)
681  ret i64 %0
682}
683
684define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
685; PWR9LE-LABEL: v8i64:
686; PWR9LE:       # %bb.0: # %entry
687; PWR9LE-NEXT:    vminud v2, v2, v4
688; PWR9LE-NEXT:    vminud v3, v3, v5
689; PWR9LE-NEXT:    vminud v2, v2, v3
690; PWR9LE-NEXT:    xxswapd v3, v2
691; PWR9LE-NEXT:    vminud v2, v2, v3
692; PWR9LE-NEXT:    mfvsrld r3, v2
693; PWR9LE-NEXT:    blr
694;
695; PWR9BE-LABEL: v8i64:
696; PWR9BE:       # %bb.0: # %entry
697; PWR9BE-NEXT:    vminud v2, v2, v4
698; PWR9BE-NEXT:    vminud v3, v3, v5
699; PWR9BE-NEXT:    vminud v2, v2, v3
700; PWR9BE-NEXT:    xxswapd v3, v2
701; PWR9BE-NEXT:    vminud v2, v2, v3
702; PWR9BE-NEXT:    mfvsrd r3, v2
703; PWR9BE-NEXT:    blr
704;
705; PWR10LE-LABEL: v8i64:
706; PWR10LE:       # %bb.0: # %entry
707; PWR10LE-NEXT:    vminud v2, v2, v4
708; PWR10LE-NEXT:    vminud v3, v3, v5
709; PWR10LE-NEXT:    vminud v2, v2, v3
710; PWR10LE-NEXT:    xxswapd v3, v2
711; PWR10LE-NEXT:    vminud v2, v2, v3
712; PWR10LE-NEXT:    mfvsrld r3, v2
713; PWR10LE-NEXT:    blr
714;
715; PWR10BE-LABEL: v8i64:
716; PWR10BE:       # %bb.0: # %entry
717; PWR10BE-NEXT:    vminud v2, v2, v4
718; PWR10BE-NEXT:    vminud v3, v3, v5
719; PWR10BE-NEXT:    vminud v2, v2, v3
720; PWR10BE-NEXT:    xxswapd v3, v2
721; PWR10BE-NEXT:    vminud v2, v2, v3
722; PWR10BE-NEXT:    mfvsrd r3, v2
723; PWR10BE-NEXT:    blr
724entry:
725  %0 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %a)
726  ret i64 %0
727}
728
729define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
730; PWR9LE-LABEL: v16i64:
731; PWR9LE:       # %bb.0: # %entry
732; PWR9LE-NEXT:    vminud v3, v3, v7
733; PWR9LE-NEXT:    vminud v5, v5, v9
734; PWR9LE-NEXT:    vminud v2, v2, v6
735; PWR9LE-NEXT:    vminud v4, v4, v8
736; PWR9LE-NEXT:    vminud v2, v2, v4
737; PWR9LE-NEXT:    vminud v3, v3, v5
738; PWR9LE-NEXT:    vminud v2, v2, v3
739; PWR9LE-NEXT:    xxswapd v3, v2
740; PWR9LE-NEXT:    vminud v2, v2, v3
741; PWR9LE-NEXT:    mfvsrld r3, v2
742; PWR9LE-NEXT:    blr
743;
744; PWR9BE-LABEL: v16i64:
745; PWR9BE:       # %bb.0: # %entry
746; PWR9BE-NEXT:    vminud v3, v3, v7
747; PWR9BE-NEXT:    vminud v5, v5, v9
748; PWR9BE-NEXT:    vminud v2, v2, v6
749; PWR9BE-NEXT:    vminud v4, v4, v8
750; PWR9BE-NEXT:    vminud v2, v2, v4
751; PWR9BE-NEXT:    vminud v3, v3, v5
752; PWR9BE-NEXT:    vminud v2, v2, v3
753; PWR9BE-NEXT:    xxswapd v3, v2
754; PWR9BE-NEXT:    vminud v2, v2, v3
755; PWR9BE-NEXT:    mfvsrd r3, v2
756; PWR9BE-NEXT:    blr
757;
758; PWR10LE-LABEL: v16i64:
759; PWR10LE:       # %bb.0: # %entry
760; PWR10LE-NEXT:    vminud v3, v3, v7
761; PWR10LE-NEXT:    vminud v5, v5, v9
762; PWR10LE-NEXT:    vminud v2, v2, v6
763; PWR10LE-NEXT:    vminud v4, v4, v8
764; PWR10LE-NEXT:    vminud v2, v2, v4
765; PWR10LE-NEXT:    vminud v3, v3, v5
766; PWR10LE-NEXT:    vminud v2, v2, v3
767; PWR10LE-NEXT:    xxswapd v3, v2
768; PWR10LE-NEXT:    vminud v2, v2, v3
769; PWR10LE-NEXT:    mfvsrld r3, v2
770; PWR10LE-NEXT:    blr
771;
772; PWR10BE-LABEL: v16i64:
773; PWR10BE:       # %bb.0: # %entry
774; PWR10BE-NEXT:    vminud v3, v3, v7
775; PWR10BE-NEXT:    vminud v5, v5, v9
776; PWR10BE-NEXT:    vminud v2, v2, v6
777; PWR10BE-NEXT:    vminud v4, v4, v8
778; PWR10BE-NEXT:    vminud v2, v2, v4
779; PWR10BE-NEXT:    vminud v3, v3, v5
780; PWR10BE-NEXT:    vminud v2, v2, v3
781; PWR10BE-NEXT:    xxswapd v3, v2
782; PWR10BE-NEXT:    vminud v2, v2, v3
783; PWR10BE-NEXT:    mfvsrd r3, v2
784; PWR10BE-NEXT:    blr
785entry:
786  %0 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %a)
787  ret i64 %0
788}
789
790declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) #0
791declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) #0
792declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) #0
793declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) #0
794
795
796attributes #0 = { nounwind }
797