xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll (revision 2c7786e94a1058bd4f96794a1d4f70dcb86e5cc5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,VI %s
4
5define half @reduction_half4(<4 x half> %a) {
6; GCN-LABEL: @reduction_half4(
7; GCN-NEXT:  entry:
8; GCN-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[A:%.*]])
9; GCN-NEXT:    ret half [[TMP0]]
10;
11entry:
12  %elt0 = extractelement <4 x half> %a, i64 0
13  %elt1 = extractelement <4 x half> %a, i64 1
14  %elt2 = extractelement <4 x half> %a, i64 2
15  %elt3 = extractelement <4 x half> %a, i64 3
16
17  %add1 = fadd fast half %elt1, %elt0
18  %add2 = fadd fast half %elt2, %add1
19  %add3 = fadd fast half %elt3, %add2
20
21  ret half %add3
22}
23
24define half @reduction_half8(<8 x half> %vec8) {
25; GCN-LABEL: @reduction_half8(
26; GCN-NEXT:  entry:
27; GCN-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8:%.*]])
28; GCN-NEXT:    ret half [[TMP0]]
29;
30entry:
31  %elt0 = extractelement <8 x half> %vec8, i64 0
32  %elt1 = extractelement <8 x half> %vec8, i64 1
33  %elt2 = extractelement <8 x half> %vec8, i64 2
34  %elt3 = extractelement <8 x half> %vec8, i64 3
35  %elt4 = extractelement <8 x half> %vec8, i64 4
36  %elt5 = extractelement <8 x half> %vec8, i64 5
37  %elt6 = extractelement <8 x half> %vec8, i64 6
38  %elt7 = extractelement <8 x half> %vec8, i64 7
39
40  %add1 = fadd fast half %elt1, %elt0
41  %add2 = fadd fast half %elt2, %add1
42  %add3 = fadd fast half %elt3, %add2
43  %add4 = fadd fast half %elt4, %add3
44  %add5 = fadd fast half %elt5, %add4
45  %add6 = fadd fast half %elt6, %add5
46  %add7 = fadd fast half %elt7, %add6
47
48  ret half %add7
49}
50
51define half @reduction_half16(<16 x half> %vec16) {
52; GFX9-LABEL: @reduction_half16(
53; GFX9-NEXT:  entry:
54; GFX9-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16:%.*]])
55; GFX9-NEXT:    ret half [[TMP0]]
56;
57; VI-LABEL: @reduction_half16(
58; VI-NEXT:  entry:
59; VI-NEXT:    [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
60; VI-NEXT:    [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[TMP0]])
61; VI-NEXT:    [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
62; VI-NEXT:    [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[TMP2]])
63; VI-NEXT:    [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
64; VI-NEXT:    ret half [[OP_RDX]]
65;
66entry:
67  %elt0 = extractelement <16 x half> %vec16, i64 0
68  %elt1 = extractelement <16 x half> %vec16, i64 1
69  %elt2 = extractelement <16 x half> %vec16, i64 2
70  %elt3 = extractelement <16 x half> %vec16, i64 3
71  %elt4 = extractelement <16 x half> %vec16, i64 4
72  %elt5 = extractelement <16 x half> %vec16, i64 5
73  %elt6 = extractelement <16 x half> %vec16, i64 6
74  %elt7 = extractelement <16 x half> %vec16, i64 7
75  %elt8 = extractelement <16 x half> %vec16, i64 8
76  %elt9 = extractelement <16 x half> %vec16, i64 9
77  %elt10 = extractelement <16 x half> %vec16, i64 10
78  %elt11 = extractelement <16 x half> %vec16, i64 11
79  %elt12 = extractelement <16 x half> %vec16, i64 12
80  %elt13 = extractelement <16 x half> %vec16, i64 13
81  %elt14 = extractelement <16 x half> %vec16, i64 14
82  %elt15 = extractelement <16 x half> %vec16, i64 15
83
84  %add1 = fadd fast half %elt1, %elt0
85  %add2 = fadd fast half %elt2, %add1
86  %add3 = fadd fast half %elt3, %add2
87  %add4 = fadd fast half %elt4, %add3
88  %add5 = fadd fast half %elt5, %add4
89  %add6 = fadd fast half %elt6, %add5
90  %add7 = fadd fast half %elt7, %add6
91  %add8 = fadd fast half %elt8, %add7
92  %add9 = fadd fast half %elt9, %add8
93  %add10 = fadd fast half %elt10, %add9
94  %add11 = fadd fast half %elt11, %add10
95  %add12 = fadd fast half %elt12, %add11
96  %add13 = fadd fast half %elt13, %add12
97  %add14 = fadd fast half %elt14, %add13
98  %add15 = fadd fast half %elt15, %add14
99
100  ret half %add15
101}
102
103; FIXME: support vectorization;
104define half @reduction_sub_half4(<4 x half> %a) {
105; GCN-LABEL: @reduction_sub_half4(
106; GCN-NEXT:  entry:
107; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
108; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
109; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
110; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
111; GCN-NEXT:    [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]]
112; GCN-NEXT:    [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]]
113; GCN-NEXT:    [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]]
114; GCN-NEXT:    ret half [[ADD3]]
115;
116entry:
117  %elt0 = extractelement <4 x half> %a, i64 0
118  %elt1 = extractelement <4 x half> %a, i64 1
119  %elt2 = extractelement <4 x half> %a, i64 2
120  %elt3 = extractelement <4 x half> %a, i64 3
121
122  %add1 = fsub fast half %elt1, %elt0
123  %add2 = fsub fast half %elt2, %add1
124  %add3 = fsub fast half %elt3, %add2
125
126  ret half %add3
127}
128
129define i16 @reduction_v4i16(<4 x i16> %a) {
130; GCN-LABEL: @reduction_v4i16(
131; GCN-NEXT:  entry:
132; GCN-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]])
133; GCN-NEXT:    ret i16 [[TMP0]]
134;
135entry:
136  %elt0 = extractelement <4 x i16> %a, i64 0
137  %elt1 = extractelement <4 x i16> %a, i64 1
138  %elt2 = extractelement <4 x i16> %a, i64 2
139  %elt3 = extractelement <4 x i16> %a, i64 3
140
141  %add1 = add i16 %elt1, %elt0
142  %add2 = add i16 %elt2, %add1
143  %add3 = add i16 %elt3, %add2
144
145  ret i16 %add3
146}
147
148define i16 @reduction_v8i16(<8 x i16> %vec8) {
149; GCN-LABEL: @reduction_v8i16(
150; GCN-NEXT:  entry:
151; GCN-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]])
152; GCN-NEXT:    ret i16 [[TMP0]]
153;
154entry:
155  %elt0 = extractelement <8 x i16> %vec8, i64 0
156  %elt1 = extractelement <8 x i16> %vec8, i64 1
157  %elt2 = extractelement <8 x i16> %vec8, i64 2
158  %elt3 = extractelement <8 x i16> %vec8, i64 3
159  %elt4 = extractelement <8 x i16> %vec8, i64 4
160  %elt5 = extractelement <8 x i16> %vec8, i64 5
161  %elt6 = extractelement <8 x i16> %vec8, i64 6
162  %elt7 = extractelement <8 x i16> %vec8, i64 7
163
164  %add1 = add i16 %elt1, %elt0
165  %add2 = add i16 %elt2, %add1
166  %add3 = add i16 %elt3, %add2
167  %add4 = add i16 %elt4, %add3
168  %add5 = add i16 %elt5, %add4
169  %add6 = add i16 %elt6, %add5
170  %add7 = add i16 %elt7, %add6
171
172  ret i16 %add7
173}
174
175define i16 @reduction_umin_v4i16(<4 x i16> %vec4) {
176; GFX9-LABEL: @reduction_umin_v4i16(
177; GFX9-NEXT:  entry:
178; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[VEC4:%.*]])
179; GFX9-NEXT:    ret i16 [[TMP0]]
180;
181; VI-LABEL: @reduction_umin_v4i16(
182; VI-NEXT:  entry:
183; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
184; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
185; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
186; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
187; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
188; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
189; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
190; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]]
191; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
192; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]]
193; VI-NEXT:    ret i16 [[MIN3]]
194;
195entry:
196  %elt0 = extractelement <4 x i16> %vec4, i64 0
197  %elt1 = extractelement <4 x i16> %vec4, i64 1
198  %elt2 = extractelement <4 x i16> %vec4, i64 2
199  %elt3 = extractelement <4 x i16> %vec4, i64 3
200
201  %cmp1 = icmp ult i16 %elt1, %elt0
202  %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0
203  %cmp2 = icmp ult i16 %elt2, %min1
204  %min2 = select i1 %cmp2, i16 %elt2, i16 %min1
205  %cmp3 = icmp ult i16 %elt3, %min2
206  %min3 = select i1 %cmp3, i16 %elt3, i16 %min2
207
208  ret i16 %min3
209}
210
211define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) {
212; GFX9-LABEL: @reduction_icmp_v8i16(
213; GFX9-NEXT:  entry:
214; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[VEC8:%.*]])
215; GFX9-NEXT:    ret i16 [[TMP0]]
216;
217; VI-LABEL: @reduction_icmp_v8i16(
218; VI-NEXT:  entry:
219; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
220; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
221; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
222; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
223; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
224; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
225; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
226; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
227; VI-NEXT:    [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
228; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
229; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
230; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
231; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
232; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
233; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]]
234; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
235; VI-NEXT:    [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]]
236; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
237; VI-NEXT:    [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]]
238; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
239; VI-NEXT:    [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]]
240; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
241; VI-NEXT:    ret i16 [[MIN7]]
242;
243entry:
244  %elt0 = extractelement <8 x i16> %vec8, i64 0
245  %elt1 = extractelement <8 x i16> %vec8, i64 1
246  %elt2 = extractelement <8 x i16> %vec8, i64 2
247  %elt3 = extractelement <8 x i16> %vec8, i64 3
248  %elt4 = extractelement <8 x i16> %vec8, i64 4
249  %elt5 = extractelement <8 x i16> %vec8, i64 5
250  %elt6 = extractelement <8 x i16> %vec8, i64 6
251  %elt7 = extractelement <8 x i16> %vec8, i64 7
252
253  %cmp0 = icmp ult i16 %elt1, %elt0
254  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
255  %cmp1 = icmp ult i16 %elt2, %min1
256  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
257  %cmp2 = icmp ult i16 %elt3, %min2
258  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
259
260  %cmp3 = icmp ult i16 %elt4, %min3
261  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
262  %cmp4 = icmp ult i16 %elt5, %min4
263  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
264
265  %cmp5 = icmp ult i16 %elt6, %min5
266  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
267  %cmp6 = icmp ult i16 %elt7, %min6
268  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
269
270  ret i16 %min7
271}
272
273define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
274; GFX9-LABEL: @reduction_smin_v16i16(
275; GFX9-NEXT:  entry:
276; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> [[VEC16:%.*]])
277; GFX9-NEXT:    ret i16 [[TMP0]]
278;
279; VI-LABEL: @reduction_smin_v16i16(
280; VI-NEXT:  entry:
281; VI-NEXT:    [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0
282; VI-NEXT:    [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1
283; VI-NEXT:    [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2
284; VI-NEXT:    [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3
285; VI-NEXT:    [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4
286; VI-NEXT:    [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5
287; VI-NEXT:    [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6
288; VI-NEXT:    [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7
289; VI-NEXT:    [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8
290; VI-NEXT:    [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9
291; VI-NEXT:    [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10
292; VI-NEXT:    [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11
293; VI-NEXT:    [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12
294; VI-NEXT:    [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13
295; VI-NEXT:    [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14
296; VI-NEXT:    [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15
297; VI-NEXT:    [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]]
298; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
299; VI-NEXT:    [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]]
300; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
301; VI-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]]
302; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
303; VI-NEXT:    [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]]
304; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
305; VI-NEXT:    [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]]
306; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
307; VI-NEXT:    [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]]
308; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
309; VI-NEXT:    [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]]
310; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
311; VI-NEXT:    [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]]
312; VI-NEXT:    [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]]
313; VI-NEXT:    [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]]
314; VI-NEXT:    [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]]
315; VI-NEXT:    [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]]
316; VI-NEXT:    [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]]
317; VI-NEXT:    [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]]
318; VI-NEXT:    [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]]
319; VI-NEXT:    [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]]
320; VI-NEXT:    [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]]
321; VI-NEXT:    [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]]
322; VI-NEXT:    [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]]
323; VI-NEXT:    [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]]
324; VI-NEXT:    [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]]
325; VI-NEXT:    [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]]
326; VI-NEXT:    [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]]
327; VI-NEXT:    ret i16 [[MIN15]]
328;
329entry:
330  %elt0 = extractelement <16 x i16> %vec16, i64 0
331  %elt1 = extractelement <16 x i16> %vec16, i64 1
332  %elt2 = extractelement <16 x i16> %vec16, i64 2
333  %elt3 = extractelement <16 x i16> %vec16, i64 3
334  %elt4 = extractelement <16 x i16> %vec16, i64 4
335  %elt5 = extractelement <16 x i16> %vec16, i64 5
336  %elt6 = extractelement <16 x i16> %vec16, i64 6
337  %elt7 = extractelement <16 x i16> %vec16, i64 7
338
339  %elt8 = extractelement <16 x i16> %vec16, i64 8
340  %elt9 = extractelement <16 x i16> %vec16, i64 9
341  %elt10 = extractelement <16 x i16> %vec16, i64 10
342  %elt11 = extractelement <16 x i16> %vec16, i64 11
343  %elt12 = extractelement <16 x i16> %vec16, i64 12
344  %elt13 = extractelement <16 x i16> %vec16, i64 13
345  %elt14 = extractelement <16 x i16> %vec16, i64 14
346  %elt15 = extractelement <16 x i16> %vec16, i64 15
347
348  %cmp0 = icmp slt i16 %elt1, %elt0
349  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
350  %cmp1 = icmp slt i16 %elt2, %min1
351  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
352  %cmp2 = icmp slt i16 %elt3, %min2
353  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
354
355  %cmp3 = icmp slt i16 %elt4, %min3
356  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
357  %cmp4 = icmp slt i16 %elt5, %min4
358  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
359
360  %cmp5 = icmp slt i16 %elt6, %min5
361  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
362  %cmp6 = icmp slt i16 %elt7, %min6
363  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
364
365  %cmp7 = icmp slt i16 %elt8, %min7
366  %min8 = select i1 %cmp7, i16 %elt8, i16 %min7
367  %cmp8 = icmp slt i16 %elt9, %min8
368  %min9 = select i1 %cmp8, i16 %elt9, i16 %min8
369
370  %cmp9 = icmp slt i16 %elt10, %min9
371  %min10 = select i1 %cmp9, i16 %elt10, i16 %min9
372  %cmp10 = icmp slt i16 %elt11, %min10
373  %min11 = select i1 %cmp10, i16 %elt11, i16 %min10
374
375  %cmp11 = icmp slt i16 %elt12, %min11
376  %min12 = select i1 %cmp11, i16 %elt12, i16 %min11
377  %cmp12 = icmp slt i16 %elt13, %min12
378  %min13 = select i1 %cmp12, i16 %elt13, i16 %min12
379
380  %cmp13 = icmp slt i16 %elt14, %min13
381  %min14 = select i1 %cmp13, i16 %elt14, i16 %min13
382  %cmp14 = icmp slt i16 %elt15, %min14
383  %min15 = select i1 %cmp14, i16 %elt15, i16 %min14
384
385
386  ret i16 %min15
387}
388
389define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
390; GFX9-LABEL: @reduction_umax_v4i16(
391; GFX9-NEXT:  entry:
392; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[VEC4:%.*]])
393; GFX9-NEXT:    ret i16 [[TMP0]]
394;
395; VI-LABEL: @reduction_umax_v4i16(
396; VI-NEXT:  entry:
397; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
398; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
399; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
400; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
401; VI-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]]
402; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
403; VI-NEXT:    [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]]
404; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
405; VI-NEXT:    [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]]
406; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
407; VI-NEXT:    ret i16 [[MAX3]]
408;
409entry:
410  %elt0 = extractelement <4 x i16> %vec4, i64 0
411  %elt1 = extractelement <4 x i16> %vec4, i64 1
412  %elt2 = extractelement <4 x i16> %vec4, i64 2
413  %elt3 = extractelement <4 x i16> %vec4, i64 3
414
415  %cmp1 = icmp ugt i16 %elt1, %elt0
416  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
417  %cmp2 = icmp ugt i16 %elt2, %max1
418  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
419  %cmp3 = icmp ugt i16 %elt3, %max2
420  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
421
422  ret i16 %max3
423}
424
425define i16 @reduction_smax_v4i16(<4 x i16> %vec4) {
426; GFX9-LABEL: @reduction_smax_v4i16(
427; GFX9-NEXT:  entry:
428; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[VEC4:%.*]])
429; GFX9-NEXT:    ret i16 [[TMP0]]
430;
431; VI-LABEL: @reduction_smax_v4i16(
432; VI-NEXT:  entry:
433; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
434; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
435; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
436; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
437; VI-NEXT:    [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]]
438; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
439; VI-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]]
440; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
441; VI-NEXT:    [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]]
442; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
443; VI-NEXT:    ret i16 [[MAX3]]
444;
445entry:
446  %elt0 = extractelement <4 x i16> %vec4, i64 0
447  %elt1 = extractelement <4 x i16> %vec4, i64 1
448  %elt2 = extractelement <4 x i16> %vec4, i64 2
449  %elt3 = extractelement <4 x i16> %vec4, i64 3
450
451  %cmp1 = icmp sgt i16 %elt1, %elt0
452  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
453  %cmp2 = icmp sgt i16 %elt2, %max1
454  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
455  %cmp3 = icmp sgt i16 %elt3, %max2
456  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
457
458  ret i16 %max3
459}
460
461; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
462; with fastmath on the select.
463define half @reduction_fmax_v4half(<4 x half> %vec4) {
464; GCN-LABEL: @reduction_fmax_v4half(
465; GCN-NEXT:  entry:
466; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
467; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
468; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
469; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
470; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]]
471; GCN-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
472; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]]
473; GCN-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]]
474; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]]
475; GCN-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]]
476; GCN-NEXT:    ret half [[MAX3]]
477;
478entry:
479  %elt0 = extractelement <4 x half> %vec4, i64 0
480  %elt1 = extractelement <4 x half> %vec4, i64 1
481  %elt2 = extractelement <4 x half> %vec4, i64 2
482  %elt3 = extractelement <4 x half> %vec4, i64 3
483
484  %cmp1 = fcmp fast ogt half %elt1, %elt0
485  %max1 = select i1 %cmp1, half %elt1, half %elt0
486  %cmp2 = fcmp fast ogt half %elt2, %max1
487  %max2 = select i1 %cmp2, half %elt2, half %max1
488  %cmp3 = fcmp fast ogt half %elt3, %max2
489  %max3 = select i1 %cmp3, half %elt3, half %max2
490
491  ret half %max3
492}
493
494; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
495; with fastmath on the select.
496define half @reduction_fmin_v4half(<4 x half> %vec4) {
497; GCN-LABEL: @reduction_fmin_v4half(
498; GCN-NEXT:  entry:
499; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
500; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
501; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
502; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
503; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]]
504; GCN-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
505; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]]
506; GCN-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]]
507; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]]
508; GCN-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]]
509; GCN-NEXT:    ret half [[MIN3]]
510;
511entry:
512  %elt0 = extractelement <4 x half> %vec4, i64 0
513  %elt1 = extractelement <4 x half> %vec4, i64 1
514  %elt2 = extractelement <4 x half> %vec4, i64 2
515  %elt3 = extractelement <4 x half> %vec4, i64 3
516
517  %cmp1 = fcmp fast olt half %elt1, %elt0
518  %min1 = select i1 %cmp1, half %elt1, half %elt0
519  %cmp2 = fcmp fast olt half %elt2, %min1
520  %min2 = select i1 %cmp2, half %elt2, half %min1
521  %cmp3 = fcmp fast olt half %elt3, %min2
522  %min3 = select i1 %cmp3, half %elt3, half %min2
523
524  ret half %min3
525}
526
527; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits.
528define float @reduction_v4float(<4 x float> %a) {
529; GCN-LABEL: @reduction_v4float(
530; GCN-NEXT:  entry:
531; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
532; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1
533; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2
534; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3
535; GCN-NEXT:    [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
536; GCN-NEXT:    [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]]
537; GCN-NEXT:    [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]]
538; GCN-NEXT:    ret float [[ADD3]]
539;
540entry:
541  %elt0 = extractelement <4 x float> %a, i64 0
542  %elt1 = extractelement <4 x float> %a, i64 1
543  %elt2 = extractelement <4 x float> %a, i64 2
544  %elt3 = extractelement <4 x float> %a, i64 3
545
546  %add1 = fadd fast float %elt1, %elt0
547  %add2 = fadd fast float %elt2, %add1
548  %add3 = fadd fast float %elt3, %add2
549
550  ret float %add3
551}
552