xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll (revision 5921295dcaa1ad514d79e0ee824b9df1c077a2d0)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16
3; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FULLFP16
4
5define half @reduce_fast_half2(<2 x half> %vec2) {
6; CHECK-LABEL: define half @reduce_fast_half2(
7; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] {
8; CHECK-NEXT:  [[ENTRY:.*:]]
9; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
10; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
11; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
12; CHECK-NEXT:    ret half [[ADD1]]
13;
14entry:
15  %elt0 = extractelement <2 x half> %vec2, i64 0
16  %elt1 = extractelement <2 x half> %vec2, i64 1
17  %add1 = fadd fast half %elt1, %elt0
18  ret half %add1
19}
20
21define half @reduce_half2(<2 x half> %vec2) {
22; CHECK-LABEL: define half @reduce_half2(
23; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] {
24; CHECK-NEXT:  [[ENTRY:.*:]]
25; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0
26; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
27; CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
28; CHECK-NEXT:    ret half [[ADD1]]
29;
30entry:
31  %elt0 = extractelement <2 x half> %vec2, i64 0
32  %elt1 = extractelement <2 x half> %vec2, i64 1
33  %add1 = fadd half %elt1, %elt0
34  ret half %add1
35}
36
37define half @reduce_fast_half4(<4 x half> %vec4) {
38; CHECK-LABEL: define half @reduce_fast_half4(
39; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
40; CHECK-NEXT:  [[ENTRY:.*:]]
41; CHECK-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
42; CHECK-NEXT:    ret half [[TMP0]]
43;
44entry:
45  %elt0 = extractelement <4 x half> %vec4, i64 0
46  %elt1 = extractelement <4 x half> %vec4, i64 1
47  %elt2 = extractelement <4 x half> %vec4, i64 2
48  %elt3 = extractelement <4 x half> %vec4, i64 3
49  %add1 = fadd fast half %elt1, %elt0
50  %add2 = fadd fast half %elt2, %add1
51  %add3 = fadd fast half %elt3, %add2
52  ret half %add3
53}
54
55define half @reduce_half4(<4 x half> %vec4) {
56; CHECK-LABEL: define half @reduce_half4(
57; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
58; CHECK-NEXT:  [[ENTRY:.*:]]
59; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0
60; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
61; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
62; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
63; CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
64; CHECK-NEXT:    [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
65; CHECK-NEXT:    [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
66; CHECK-NEXT:    ret half [[ADD3]]
67;
68entry:
69  %elt0 = extractelement <4 x half> %vec4, i64 0
70  %elt1 = extractelement <4 x half> %vec4, i64 1
71  %elt2 = extractelement <4 x half> %vec4, i64 2
72  %elt3 = extractelement <4 x half> %vec4, i64 3
73  %add1 = fadd half %elt1, %elt0
74  %add2 = fadd half %elt2, %add1
75  %add3 = fadd half %elt3, %add2
76  ret half %add3
77}
78
79define half @reduce_fast_half8(<8 x half> %vec8) {
80; NOFP16-LABEL: define half @reduce_fast_half8(
81; NOFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
82; NOFP16-NEXT:  [[ENTRY:.*:]]
83; NOFP16-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84; NOFP16-NEXT:    [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]])
85; NOFP16-NEXT:    [[TMP2:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
86; NOFP16-NEXT:    [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]])
87; NOFP16-NEXT:    [[OP_RDX3:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
88; NOFP16-NEXT:    ret half [[OP_RDX3]]
89;
90; FULLFP16-LABEL: define half @reduce_fast_half8(
91; FULLFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
92; FULLFP16-NEXT:  [[ENTRY:.*:]]
93; FULLFP16-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8]])
94; FULLFP16-NEXT:    ret half [[TMP0]]
95;
96entry:
97  %elt0 = extractelement <8 x half> %vec8, i64 0
98  %elt1 = extractelement <8 x half> %vec8, i64 1
99  %elt2 = extractelement <8 x half> %vec8, i64 2
100  %elt3 = extractelement <8 x half> %vec8, i64 3
101  %elt4 = extractelement <8 x half> %vec8, i64 4
102  %elt5 = extractelement <8 x half> %vec8, i64 5
103  %elt6 = extractelement <8 x half> %vec8, i64 6
104  %elt7 = extractelement <8 x half> %vec8, i64 7
105  %add1 = fadd fast half %elt1, %elt0
106  %add2 = fadd fast half %elt2, %add1
107  %add3 = fadd fast half %elt3, %add2
108  %add4 = fadd fast half %elt4, %add3
109  %add5 = fadd fast half %elt5, %add4
110  %add6 = fadd fast half %elt6, %add5
111  %add7 = fadd fast half %elt7, %add6
112  ret half %add7
113}
114
115define half @reduce_half8(<8 x half> %vec8) {
116; CHECK-LABEL: define half @reduce_half8(
117; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] {
118; CHECK-NEXT:  [[ENTRY:.*:]]
119; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0
120; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
121; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
122; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
123; CHECK-NEXT:    [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
124; CHECK-NEXT:    [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
125; CHECK-NEXT:    [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
126; CHECK-NEXT:    [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
127; CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
128; CHECK-NEXT:    [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
129; CHECK-NEXT:    [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
130; CHECK-NEXT:    [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]]
131; CHECK-NEXT:    [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]]
132; CHECK-NEXT:    [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
133; CHECK-NEXT:    [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
134; CHECK-NEXT:    ret half [[ADD7]]
135;
136entry:
137  %elt0 = extractelement <8 x half> %vec8, i64 0
138  %elt1 = extractelement <8 x half> %vec8, i64 1
139  %elt2 = extractelement <8 x half> %vec8, i64 2
140  %elt3 = extractelement <8 x half> %vec8, i64 3
141  %elt4 = extractelement <8 x half> %vec8, i64 4
142  %elt5 = extractelement <8 x half> %vec8, i64 5
143  %elt6 = extractelement <8 x half> %vec8, i64 6
144  %elt7 = extractelement <8 x half> %vec8, i64 7
145  %add1 = fadd half %elt1, %elt0
146  %add2 = fadd half %elt2, %add1
147  %add3 = fadd half %elt3, %add2
148  %add4 = fadd half %elt4, %add3
149  %add5 = fadd half %elt5, %add4
150  %add6 = fadd half %elt6, %add5
151  %add7 = fadd half %elt7, %add6
152  ret half %add7
153}
154
155define half @reduce_fast_half16(<16 x half> %vec16) {
156; CHECK-LABEL: define half @reduce_fast_half16(
157; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
158; CHECK-NEXT:  [[ENTRY:.*:]]
159; CHECK-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]])
160; CHECK-NEXT:    ret half [[TMP0]]
161;
162entry:
163  %elt0 = extractelement <16 x half> %vec16, i64 0
164  %elt1 = extractelement <16 x half> %vec16, i64 1
165  %elt2 = extractelement <16 x half> %vec16, i64 2
166  %elt3 = extractelement <16 x half> %vec16, i64 3
167  %elt4 = extractelement <16 x half> %vec16, i64 4
168  %elt5 = extractelement <16 x half> %vec16, i64 5
169  %elt6 = extractelement <16 x half> %vec16, i64 6
170  %elt7 = extractelement <16 x half> %vec16, i64 7
171  %elt8 = extractelement <16 x half> %vec16, i64 8
172  %elt9 = extractelement <16 x half> %vec16, i64 9
173  %elt10 = extractelement <16 x half> %vec16, i64 10
174  %elt11 = extractelement <16 x half> %vec16, i64 11
175  %elt12 = extractelement <16 x half> %vec16, i64 12
176  %elt13 = extractelement <16 x half> %vec16, i64 13
177  %elt14 = extractelement <16 x half> %vec16, i64 14
178  %elt15 = extractelement <16 x half> %vec16, i64 15
179  %add1 = fadd fast half %elt1, %elt0
180  %add2 = fadd fast half %elt2, %add1
181  %add3 = fadd fast half %elt3, %add2
182  %add4 = fadd fast half %elt4, %add3
183  %add5 = fadd fast half %elt5, %add4
184  %add6 = fadd fast half %elt6, %add5
185  %add7 = fadd fast half %elt7, %add6
186  %add8 = fadd fast half %elt8, %add7
187  %add9 = fadd fast half %elt9, %add8
188  %add10 = fadd fast half %elt10, %add9
189  %add11 = fadd fast half %elt11, %add10
190  %add12 = fadd fast half %elt12, %add11
191  %add13 = fadd fast half %elt13, %add12
192  %add14 = fadd fast half %elt14, %add13
193  %add15 = fadd fast half %elt15, %add14
194  ret half %add15
195}
196
197define half @reduce_half16(<16 x half> %vec16) {
198; CHECK-LABEL: define half @reduce_half16(
199; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
200; CHECK-NEXT:  [[ENTRY:.*:]]
201; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <16 x half> [[VEC16]], i64 0
202; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1
203; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2
204; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3
205; CHECK-NEXT:    [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
206; CHECK-NEXT:    [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
207; CHECK-NEXT:    [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
208; CHECK-NEXT:    [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
209; CHECK-NEXT:    [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8
210; CHECK-NEXT:    [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
211; CHECK-NEXT:    [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
212; CHECK-NEXT:    [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
213; CHECK-NEXT:    [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
214; CHECK-NEXT:    [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
215; CHECK-NEXT:    [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
216; CHECK-NEXT:    [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
217; CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
218; CHECK-NEXT:    [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
219; CHECK-NEXT:    [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
220; CHECK-NEXT:    [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]]
221; CHECK-NEXT:    [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]]
222; CHECK-NEXT:    [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
223; CHECK-NEXT:    [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
224; CHECK-NEXT:    [[ADD8:%.*]] = fadd half [[ELT8]], [[ADD7]]
225; CHECK-NEXT:    [[ADD9:%.*]] = fadd half [[ELT9]], [[ADD8]]
226; CHECK-NEXT:    [[ADD10:%.*]] = fadd half [[ELT10]], [[ADD9]]
227; CHECK-NEXT:    [[ADD11:%.*]] = fadd half [[ELT11]], [[ADD10]]
228; CHECK-NEXT:    [[ADD12:%.*]] = fadd half [[ELT12]], [[ADD11]]
229; CHECK-NEXT:    [[ADD13:%.*]] = fadd half [[ELT13]], [[ADD12]]
230; CHECK-NEXT:    [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]]
231; CHECK-NEXT:    [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]]
232; CHECK-NEXT:    ret half [[ADD15]]
233;
234entry:
235  %elt0 = extractelement <16 x half> %vec16, i64 0
236  %elt1 = extractelement <16 x half> %vec16, i64 1
237  %elt2 = extractelement <16 x half> %vec16, i64 2
238  %elt3 = extractelement <16 x half> %vec16, i64 3
239  %elt4 = extractelement <16 x half> %vec16, i64 4
240  %elt5 = extractelement <16 x half> %vec16, i64 5
241  %elt6 = extractelement <16 x half> %vec16, i64 6
242  %elt7 = extractelement <16 x half> %vec16, i64 7
243  %elt8 = extractelement <16 x half> %vec16, i64 8
244  %elt9 = extractelement <16 x half> %vec16, i64 9
245  %elt10 = extractelement <16 x half> %vec16, i64 10
246  %elt11 = extractelement <16 x half> %vec16, i64 11
247  %elt12 = extractelement <16 x half> %vec16, i64 12
248  %elt13 = extractelement <16 x half> %vec16, i64 13
249  %elt14 = extractelement <16 x half> %vec16, i64 14
250  %elt15 = extractelement <16 x half> %vec16, i64 15
251  %add1 = fadd half %elt1, %elt0
252  %add2 = fadd half %elt2, %add1
253  %add3 = fadd half %elt3, %add2
254  %add4 = fadd half %elt4, %add3
255  %add5 = fadd half %elt5, %add4
256  %add6 = fadd half %elt6, %add5
257  %add7 = fadd half %elt7, %add6
258  %add8 = fadd half %elt8, %add7
259  %add9 = fadd half %elt9, %add8
260  %add10 = fadd half %elt10, %add9
261  %add11 = fadd half %elt11, %add10
262  %add12 = fadd half %elt12, %add11
263  %add13 = fadd half %elt13, %add12
264  %add14 = fadd half %elt14, %add13
265  %add15 = fadd half %elt15, %add14
266  ret half %add15
267}
268
269define float @reduce_fast_float2(<2 x float> %vec2) {
270; CHECK-LABEL: define float @reduce_fast_float2(
271; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] {
272; CHECK-NEXT:  [[ENTRY:.*:]]
273; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0
274; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
275; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
276; CHECK-NEXT:    ret float [[ADD1]]
277;
278entry:
279  %elt0 = extractelement <2 x float> %vec2, i64 0
280  %elt1 = extractelement <2 x float> %vec2, i64 1
281  %add1 = fadd fast float %elt1, %elt0
282  ret float %add1
283}
284
285define float @reduce_float2(<2 x float> %vec2) {
286; CHECK-LABEL: define float @reduce_float2(
287; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] {
288; CHECK-NEXT:  [[ENTRY:.*:]]
289; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0
290; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
291; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
292; CHECK-NEXT:    ret float [[ADD1]]
293;
294entry:
295  %elt0 = extractelement <2 x float> %vec2, i64 0
296  %elt1 = extractelement <2 x float> %vec2, i64 1
297  %add1 = fadd float %elt1, %elt0
298  ret float %add1
299}
300
301define float @reduce_fast_float4(<4 x float> %vec4) {
302; CHECK-LABEL: define float @reduce_fast_float4(
303; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
304; CHECK-NEXT:  [[ENTRY:.*:]]
305; CHECK-NEXT:    [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]])
306; CHECK-NEXT:    ret float [[TMP0]]
307;
308entry:
309  %elt0 = extractelement <4 x float> %vec4, i64 0
310  %elt1 = extractelement <4 x float> %vec4, i64 1
311  %elt2 = extractelement <4 x float> %vec4, i64 2
312  %elt3 = extractelement <4 x float> %vec4, i64 3
313  %add1 = fadd fast float %elt1, %elt0
314  %add2 = fadd fast float %elt2, %add1
315  %add3 = fadd fast float %elt3, %add2
316  ret float %add3
317}
318
319define float @reduce_float4(<4 x float> %vec4) {
320; CHECK-LABEL: define float @reduce_float4(
321; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
322; CHECK-NEXT:  [[ENTRY:.*:]]
323; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[VEC4]], i64 0
324; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[VEC4]], i64 1
325; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <4 x float> [[VEC4]], i64 2
326; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <4 x float> [[VEC4]], i64 3
327; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
328; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
329; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
330; CHECK-NEXT:    ret float [[ADD3]]
331;
332entry:
333  %elt0 = extractelement <4 x float> %vec4, i64 0
334  %elt1 = extractelement <4 x float> %vec4, i64 1
335  %elt2 = extractelement <4 x float> %vec4, i64 2
336  %elt3 = extractelement <4 x float> %vec4, i64 3
337  %add1 = fadd float %elt1, %elt0
338  %add2 = fadd float %elt2, %add1
339  %add3 = fadd float %elt3, %add2
340  ret float %add3
341}
342
343define float @reduce_fast_float8(<8 x float> %vec8) {
344; CHECK-LABEL: define float @reduce_fast_float8(
345; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
346; CHECK-NEXT:  [[ENTRY:.*:]]
347; CHECK-NEXT:    [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]])
348; CHECK-NEXT:    ret float [[TMP0]]
349;
350entry:
351  %elt0 = extractelement <8 x float> %vec8, i64 0
352  %elt1 = extractelement <8 x float> %vec8, i64 1
353  %elt2 = extractelement <8 x float> %vec8, i64 2
354  %elt3 = extractelement <8 x float> %vec8, i64 3
355  %elt4 = extractelement <8 x float> %vec8, i64 4
356  %elt5 = extractelement <8 x float> %vec8, i64 5
357  %elt6 = extractelement <8 x float> %vec8, i64 6
358  %elt7 = extractelement <8 x float> %vec8, i64 7
359  %add1 = fadd fast float %elt1, %elt0
360  %add2 = fadd fast float %elt2, %add1
361  %add3 = fadd fast float %elt3, %add2
362  %add4 = fadd fast float %elt4, %add3
363  %add5 = fadd fast float %elt5, %add4
364  %add6 = fadd fast float %elt6, %add5
365  %add7 = fadd fast float %elt7, %add6
366  ret float %add7
367}
368
369define float @reduce_float8(<8 x float> %vec8) {
370; CHECK-LABEL: define float @reduce_float8(
371; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
372; CHECK-NEXT:  [[ENTRY:.*:]]
373; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <8 x float> [[VEC8]], i64 0
374; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <8 x float> [[VEC8]], i64 1
375; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <8 x float> [[VEC8]], i64 2
376; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <8 x float> [[VEC8]], i64 3
377; CHECK-NEXT:    [[ELT4:%.*]] = extractelement <8 x float> [[VEC8]], i64 4
378; CHECK-NEXT:    [[ELT5:%.*]] = extractelement <8 x float> [[VEC8]], i64 5
379; CHECK-NEXT:    [[ELT6:%.*]] = extractelement <8 x float> [[VEC8]], i64 6
380; CHECK-NEXT:    [[ELT7:%.*]] = extractelement <8 x float> [[VEC8]], i64 7
381; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
382; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
383; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
384; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[ELT4]], [[ADD3]]
385; CHECK-NEXT:    [[ADD5:%.*]] = fadd float [[ELT5]], [[ADD4]]
386; CHECK-NEXT:    [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]]
387; CHECK-NEXT:    [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]]
388; CHECK-NEXT:    ret float [[ADD7]]
389;
390entry:
391  %elt0 = extractelement <8 x float> %vec8, i64 0
392  %elt1 = extractelement <8 x float> %vec8, i64 1
393  %elt2 = extractelement <8 x float> %vec8, i64 2
394  %elt3 = extractelement <8 x float> %vec8, i64 3
395  %elt4 = extractelement <8 x float> %vec8, i64 4
396  %elt5 = extractelement <8 x float> %vec8, i64 5
397  %elt6 = extractelement <8 x float> %vec8, i64 6
398  %elt7 = extractelement <8 x float> %vec8, i64 7
399  %add1 = fadd float %elt1, %elt0
400  %add2 = fadd float %elt2, %add1
401  %add3 = fadd float %elt3, %add2
402  %add4 = fadd float %elt4, %add3
403  %add5 = fadd float %elt5, %add4
404  %add6 = fadd float %elt6, %add5
405  %add7 = fadd float %elt7, %add6
406  ret float %add7
407}
408
409define double @reduce_fast_double2(<2 x double> %vec2) {
410; CHECK-LABEL: define double @reduce_fast_double2(
411; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] {
412; CHECK-NEXT:  [[ENTRY:.*:]]
413; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0
414; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
415; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]]
416; CHECK-NEXT:    ret double [[ADD1]]
417;
418entry:
419  %elt0 = extractelement <2 x double> %vec2, i64 0
420  %elt1 = extractelement <2 x double> %vec2, i64 1
421  %add1 = fadd fast double %elt1, %elt0
422  ret double %add1
423}
424
425define double @reduce_double2(<2 x double> %vec2) {
426; CHECK-LABEL: define double @reduce_double2(
427; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] {
428; CHECK-NEXT:  [[ENTRY:.*:]]
429; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0
430; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
431; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
432; CHECK-NEXT:    ret double [[ADD1]]
433;
434entry:
435  %elt0 = extractelement <2 x double> %vec2, i64 0
436  %elt1 = extractelement <2 x double> %vec2, i64 1
437  %add1 = fadd double %elt1, %elt0
438  ret double %add1
439}
440
441define double @reduce_fast_double4(<4 x double> %vec4) {
442; CHECK-LABEL: define double @reduce_fast_double4(
443; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
444; CHECK-NEXT:  [[ENTRY:.*:]]
445; CHECK-NEXT:    [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]])
446; CHECK-NEXT:    ret double [[TMP0]]
447;
448entry:
449  %elt0 = extractelement <4 x double> %vec4, i64 0
450  %elt1 = extractelement <4 x double> %vec4, i64 1
451  %elt2 = extractelement <4 x double> %vec4, i64 2
452  %elt3 = extractelement <4 x double> %vec4, i64 3
453  %add1 = fadd fast double %elt1, %elt0
454  %add2 = fadd fast double %elt2, %add1
455  %add3 = fadd fast double %elt3, %add2
456  ret double %add3
457}
458
459define double @reduce_double4(<4 x double> %vec4) {
460; CHECK-LABEL: define double @reduce_double4(
461; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
462; CHECK-NEXT:  [[ENTRY:.*:]]
463; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <4 x double> [[VEC4]], i64 0
464; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <4 x double> [[VEC4]], i64 1
465; CHECK-NEXT:    [[ELT2:%.*]] = extractelement <4 x double> [[VEC4]], i64 2
466; CHECK-NEXT:    [[ELT3:%.*]] = extractelement <4 x double> [[VEC4]], i64 3
467; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
468; CHECK-NEXT:    [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]]
469; CHECK-NEXT:    [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]]
470; CHECK-NEXT:    ret double [[ADD3]]
471;
472entry:
473  %elt0 = extractelement <4 x double> %vec4, i64 0
474  %elt1 = extractelement <4 x double> %vec4, i64 1
475  %elt2 = extractelement <4 x double> %vec4, i64 2
476  %elt3 = extractelement <4 x double> %vec4, i64 3
477  %add1 = fadd double %elt1, %elt0
478  %add2 = fadd double %elt2, %add1
479  %add3 = fadd double %elt3, %add2
480  ret double %add3
481}
482
483; Fixed iteration count. sum += a[i]
484define float @reduce_fast_float_case1(ptr %a) {
485; CHECK-LABEL: define float @reduce_fast_float_case1(
486; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
487; CHECK-NEXT:  [[ENTRY:.*:]]
488; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4
489; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
490; CHECK-NEXT:    [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
491; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]])
492; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float [[TMP1]], [[LOAD4]]
493; CHECK-NEXT:    ret float [[ADD4]]
494;
495entry:
496  %load = load float, ptr %a
497  %gep = getelementptr inbounds i8, ptr %a, i64 4
498  %load1 = load float, ptr %gep
499  %add1 = fadd fast float %load1, %load
500  %gep2 = getelementptr inbounds i8, ptr %a, i64 8
501  %load2 = load float, ptr %gep2
502  %add2 = fadd fast float %load2, %add1
503  %gep3 = getelementptr inbounds i8, ptr %a, i64 12
504  %load3 = load float, ptr %gep3
505  %add3 = fadd fast float %load3, %add2
506  %gep4 = getelementptr inbounds i8, ptr %a, i64 16
507  %load4 = load float, ptr %gep4
508  %add4 = fadd fast float %load4, %add3
509  ret float %add4
510}
511
512; Fixed iteration count. sum += a[i]
513define float @reduce_float_case1(ptr %a) {
514; CHECK-LABEL: define float @reduce_float_case1(
515; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
516; CHECK-NEXT:  [[ENTRY:.*:]]
517; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[A]], align 4
518; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
519; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4
520; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[LOAD1]], [[LOAD]]
521; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
522; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
523; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[LOAD2]], [[ADD1]]
524; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12
525; CHECK-NEXT:    [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
526; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[LOAD3]], [[ADD2]]
527; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
528; CHECK-NEXT:    [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
529; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]]
530; CHECK-NEXT:    ret float [[ADD4]]
531;
532entry:
533  %load = load float, ptr %a
534  %gep = getelementptr inbounds i8, ptr %a, i64 4
535  %load1 = load float, ptr %gep
536  %add1 = fadd float %load1, %load
537  %gep2 = getelementptr inbounds i8, ptr %a, i64 8
538  %load2 = load float, ptr %gep2
539  %add2 = fadd float %load2, %add1
540  %gep3 = getelementptr inbounds i8, ptr %a, i64 12
541  %load3 = load float, ptr %gep3
542  %add3 = fadd float %load3, %add2
543  %gep4 = getelementptr inbounds i8, ptr %a, i64 16
544  %load4 = load float, ptr %gep4
545  %add4 = fadd float %load4, %add3
546  ret float %add4
547}
548
549; Reduction needs a shuffle. See add2 and add3.
550define float @reduce_fast_float_case2(ptr %a, ptr %b) {
551; CHECK-LABEL: define float @reduce_fast_float_case2(
552; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
553; CHECK-NEXT:  [[ENTRY:.*:]]
554; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4
555; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 4
556; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP1]], i64 0)
557; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[TMP0]], i64 4)
558; CHECK-NEXT:    [[RED3:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]])
559; CHECK-NEXT:    ret float [[RED3]]
560;
561entry:
562  %gepa1 = getelementptr inbounds float, ptr %a, i32 1
563  %gepa2 = getelementptr inbounds float, ptr %a, i32 2
564  %gepa3 = getelementptr inbounds float, ptr %a, i32 3
565  %gepb1 = getelementptr inbounds float, ptr %b, i32 1
566  %gepb2 = getelementptr inbounds float, ptr %b, i32 2
567  %gepb3 = getelementptr inbounds float, ptr %b, i32 3
568  %loada = load float, ptr %a
569  %loada1 = load float, ptr %gepa1
570  %loada2 = load float, ptr %gepa2
571  %loada3 = load float, ptr %gepa3
572  %loadb = load float, ptr %b
573  %loadb1 = load float, ptr %gepb1
574  %loadb2 = load float, ptr %gepb2
575  %loadb3 = load float, ptr %gepb3
576  %add = fadd fast float %loada, %loadb
577  %add1 = fadd fast float %loada1, %loadb1
578  %add2 = fadd fast float %loada3, %loadb2
579  %add3 = fadd fast float %loada2, %loadb3
580  %red1 = fadd fast float %add, %add1
581  %red2 = fadd fast float %add2, %red1
582  %red3 = fadd fast float %add3, %red2
583  ret float %red3
584}
585
586; Reduction needs a shuffle. See add2 and add3.
587define float @reduce_float_case2(ptr %a, ptr %b) {
588; CHECK-LABEL: define float @reduce_float_case2(
589; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
590; CHECK-NEXT:  [[ENTRY:.*:]]
591; CHECK-NEXT:    [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
592; CHECK-NEXT:    [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
593; CHECK-NEXT:    [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
594; CHECK-NEXT:    [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3
595; CHECK-NEXT:    [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4
596; CHECK-NEXT:    [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4
597; CHECK-NEXT:    [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4
598; CHECK-NEXT:    [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4
599; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
600; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4
601; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[TMP0]], [[TMP1]]
602; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[LOADA3]], [[LOADB2]]
603; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[LOADA2]], [[LOADB3]]
604; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
605; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
606; CHECK-NEXT:    [[RED1:%.*]] = fadd float [[TMP3]], [[TMP4]]
607; CHECK-NEXT:    [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]]
608; CHECK-NEXT:    [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]]
609; CHECK-NEXT:    ret float [[RED3]]
610;
611entry:
612  %gepa1 = getelementptr inbounds float, ptr %a, i32 1
613  %gepa2 = getelementptr inbounds float, ptr %a, i32 2
614  %gepa3 = getelementptr inbounds float, ptr %a, i32 3
615  %gepb1 = getelementptr inbounds float, ptr %b, i32 1
616  %gepb2 = getelementptr inbounds float, ptr %b, i32 2
617  %gepb3 = getelementptr inbounds float, ptr %b, i32 3
618  %loada = load float, ptr %a
619  %loada1 = load float, ptr %gepa1
620  %loada2 = load float, ptr %gepa2
621  %loada3 = load float, ptr %gepa3
622  %loadb = load float, ptr %b
623  %loadb1 = load float, ptr %gepb1
624  %loadb2 = load float, ptr %gepb2
625  %loadb3 = load float, ptr %gepb3
626  %add = fadd float %loada, %loadb
627  %add1 = fadd float %loada1, %loadb1
628  %add2 = fadd float %loada3, %loadb2
629  %add3 = fadd float %loada2, %loadb3
630  %red1 = fadd float %add, %add1
631  %red2 = fadd float %add2, %red1
632  %red3 = fadd float %add3, %red2
633  ret float %red3
634}
635
636; Addition of log.
637define float @reduce_fast_float_case3(ptr %a) {
638; CHECK-LABEL: define float @reduce_fast_float_case3(
639; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
640; CHECK-NEXT:  [[ENTRY:.*:]]
641; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
642; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
643; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
644; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4
645; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
646; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
647; CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
648; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[A]], align 4
649; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
650; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
651; CHECK-NEXT:    [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
652; CHECK-NEXT:    [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
653; CHECK-NEXT:    [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
654; CHECK-NEXT:    [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
655; CHECK-NEXT:    [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
656; CHECK-NEXT:    [[LOG:%.*]] = call fast float @llvm.log.f32(float [[LOAD]])
657; CHECK-NEXT:    [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[LOAD1]])
658; CHECK-NEXT:    [[LOG2:%.*]] = call fast float @llvm.log.f32(float [[LOAD2]])
659; CHECK-NEXT:    [[LOG3:%.*]] = call fast float @llvm.log.f32(float [[LOAD3]])
660; CHECK-NEXT:    [[LOG4:%.*]] = call fast float @llvm.log.f32(float [[LOAD4]])
661; CHECK-NEXT:    [[LOG5:%.*]] = call fast float @llvm.log.f32(float [[LOAD5]])
662; CHECK-NEXT:    [[LOG6:%.*]] = call fast float @llvm.log.f32(float [[LOAD6]])
663; CHECK-NEXT:    [[LOG7:%.*]] = call fast float @llvm.log.f32(float [[LOAD7]])
664; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[LOG]], [[LOG1]]
665; CHECK-NEXT:    [[ADD2:%.*]] = fadd fast float [[ADD1]], [[LOG2]]
666; CHECK-NEXT:    [[ADD3:%.*]] = fadd fast float [[ADD2]], [[LOG3]]
667; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float [[ADD3]], [[LOG4]]
668; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[LOG5]]
669; CHECK-NEXT:    [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]]
670; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]]
671; CHECK-NEXT:    ret float [[ADD7]]
672;
673entry:
674  %gep1 = getelementptr inbounds float, ptr %a, i32 1
675  %gep2 = getelementptr inbounds float, ptr %a, i32 2
676  %gep3 = getelementptr inbounds float, ptr %a, i32 3
677  %gep4 = getelementptr inbounds float, ptr %a, i32 4
678  %gep5 = getelementptr inbounds float, ptr %a, i32 5
679  %gep6 = getelementptr inbounds float, ptr %a, i32 6
680  %gep7 = getelementptr inbounds float, ptr %a, i32 7
681  %load = load float, ptr %a
682  %load1 = load float, ptr %gep1
683  %load2 = load float, ptr %gep2
684  %load3 = load float, ptr %gep3
685  %load4 = load float, ptr %gep4
686  %load5 = load float, ptr %gep5
687  %load6 = load float, ptr %gep6
688  %load7 = load float, ptr %gep7
689  %log = call fast float @llvm.log.f32(float %load)
690  %log1 = call fast float @llvm.log.f32(float %load1)
691  %log2 = call fast float @llvm.log.f32(float %load2)
692  %log3 = call fast float @llvm.log.f32(float %load3)
693  %log4 = call fast float @llvm.log.f32(float %load4)
694  %log5 = call fast float @llvm.log.f32(float %load5)
695  %log6 = call fast float @llvm.log.f32(float %load6)
696  %log7 = call fast float @llvm.log.f32(float %load7)
697  %add1 = fadd fast float %log, %log1
698  %add2 = fadd fast float %add1, %log2
699  %add3 = fadd fast float %add2, %log3
700  %add4 = fadd fast float %add3, %log4
701  %add5 = fadd fast float %add4, %log5
702  %add6 = fadd fast float %add5, %log6
703  %add7 = fadd fast float %add6, %log7
704  ret float %add7
705}
706
707; Addition of log.
708define float @reduce_float_case3(ptr %a) {
709; CHECK-LABEL: define float @reduce_float_case3(
710; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
711; CHECK-NEXT:  [[ENTRY:.*:]]
712; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
713; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
714; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
715; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4
716; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5
717; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6
718; CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7
719; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[A]], align 4
720; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
721; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
722; CHECK-NEXT:    [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4
723; CHECK-NEXT:    [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
724; CHECK-NEXT:    [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4
725; CHECK-NEXT:    [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4
726; CHECK-NEXT:    [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4
727; CHECK-NEXT:    [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]])
728; CHECK-NEXT:    [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]])
729; CHECK-NEXT:    [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]])
730; CHECK-NEXT:    [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]])
731; CHECK-NEXT:    [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]])
732; CHECK-NEXT:    [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]])
733; CHECK-NEXT:    [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]])
734; CHECK-NEXT:    [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]])
735; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]]
736; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]]
737; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]]
738; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]]
739; CHECK-NEXT:    [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]]
740; CHECK-NEXT:    [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
741; CHECK-NEXT:    [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
742; CHECK-NEXT:    ret float [[ADD7]]
743;
744entry:
745  %gep1 = getelementptr inbounds float, ptr %a, i32 1
746  %gep2 = getelementptr inbounds float, ptr %a, i32 2
747  %gep3 = getelementptr inbounds float, ptr %a, i32 3
748  %gep4 = getelementptr inbounds float, ptr %a, i32 4
749  %gep5 = getelementptr inbounds float, ptr %a, i32 5
750  %gep6 = getelementptr inbounds float, ptr %a, i32 6
751  %gep7 = getelementptr inbounds float, ptr %a, i32 7
752  %load = load float, ptr %a
753  %load1 = load float, ptr %gep1
754  %load2 = load float, ptr %gep2
755  %load3 = load float, ptr %gep3
756  %load4 = load float, ptr %gep4
757  %load5 = load float, ptr %gep5
758  %load6 = load float, ptr %gep6
759  %load7 = load float, ptr %gep7
760  %log = call float @llvm.log.f32(float %load)
761  %log1 = call float @llvm.log.f32(float %load1)
762  %log2 = call float @llvm.log.f32(float %load2)
763  %log3 = call float @llvm.log.f32(float %load3)
764  %log4 = call float @llvm.log.f32(float %load4)
765  %log5 = call float @llvm.log.f32(float %load5)
766  %log6 = call float @llvm.log.f32(float %load6)
767  %log7 = call float @llvm.log.f32(float %load7)
768  %add1 = fadd float %log, %log1
769  %add2 = fadd float %add1, %log2
770  %add3 = fadd float %add2, %log3
771  %add4 = fadd float %add3, %log4
772  %add5 = fadd float %add4, %log5
773  %add6 = fadd float %add5, %log6
774  %add7 = fadd float %add6, %log7
775  ret float %add7
776}
777
778define half @reduce_unordered_fast_half4(<4 x half> %vec4) {
779; CHECK-LABEL: define half @reduce_unordered_fast_half4(
780; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
781; CHECK-NEXT:  [[ENTRY:.*:]]
782; CHECK-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
783; CHECK-NEXT:    ret half [[TMP0]]
784;
785entry:
786  %elt0 = extractelement <4 x half> %vec4, i64 0
787  %elt1 = extractelement <4 x half> %vec4, i64 1
788  %elt2 = extractelement <4 x half> %vec4, i64 2
789  %elt3 = extractelement <4 x half> %vec4, i64 3
790  %add1 = fadd fast half %elt1, %elt0
791  %add2 = fadd fast half %elt2, %elt3
792  %add3 = fadd fast half %add1, %add2
793  ret half %add3
794}
795
796define half @reduce_unordered_half4(<4 x half> %vec4) {
797; CHECK-LABEL: define half @reduce_unordered_half4(
798; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
799; CHECK-NEXT:  [[ENTRY:.*:]]
800; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 1, i32 2>
801; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 0, i32 3>
802; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x half> [[TMP0]], [[TMP1]]
803; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x half> [[TMP2]], i32 0
804; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1
805; CHECK-NEXT:    [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]]
806; CHECK-NEXT:    ret half [[ADD3]]
807;
808entry:
809  %elt0 = extractelement <4 x half> %vec4, i64 0
810  %elt1 = extractelement <4 x half> %vec4, i64 1
811  %elt2 = extractelement <4 x half> %vec4, i64 2
812  %elt3 = extractelement <4 x half> %vec4, i64 3
813  %add1 = fadd half %elt1, %elt0
814  %add2 = fadd half %elt2, %elt3
815  %add3 = fadd half %add1, %add2
816  ret half %add3
817}
818