xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll (revision 8b56da5e9f3ba737a5ff4bf5dee654416849042f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -O2 -mattr=avx < %s | opt -expand-reductions -mattr=avx -S | FileCheck %s
3
4; Test if SLP vector reduction patterns are recognized
5; and optionally converted to reduction intrinsics and
6; back to raw IR.
7
8target triple = "x86_64--"
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10
11define i32 @add_v4i32(ptr %p) #0 {
12; CHECK-LABEL: @add_v4i32(
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
15; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
16; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
17; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
18; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
19; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
20; CHECK-NEXT:    ret i32 [[TMP2]]
21;
22entry:
23  br label %for.cond
24
25for.cond:
26  %r.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
27  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
28  %cmp = icmp slt i32 %i.0, 4
29  br i1 %cmp, label %for.body, label %for.cond.cleanup
30
31for.cond.cleanup:
32  br label %for.end
33
34for.body:
35  %idxprom = sext i32 %i.0 to i64
36  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
37  %0 = load i32, ptr %arrayidx, align 4, !tbaa !3
38  %add = add nsw i32 %r.0, %0
39  br label %for.inc
40
41for.inc:
42  %inc = add nsw i32 %i.0, 1
43  br label %for.cond
44
45for.end:
46  ret i32 %r.0
47}
48
49define signext i16 @mul_v8i16(ptr %p) #0 {
50; CHECK-LABEL: @mul_v8i16(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2, !tbaa [[TBAA4:![0-9]+]]
53; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
54; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
55; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
56; CHECK-NEXT:    [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]]
57; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
58; CHECK-NEXT:    [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]]
59; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0
60; CHECK-NEXT:    ret i16 [[TMP2]]
61;
62entry:
63  br label %for.cond
64
65for.cond:
66  %r.0 = phi i16 [ 1, %entry ], [ %conv2, %for.inc ]
67  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
68  %cmp = icmp slt i32 %i.0, 8
69  br i1 %cmp, label %for.body, label %for.cond.cleanup
70
71for.cond.cleanup:
72  br label %for.end
73
74for.body:
75  %idxprom = sext i32 %i.0 to i64
76  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %idxprom
77  %0 = load i16, ptr %arrayidx, align 2, !tbaa !7
78  %conv = sext i16 %0 to i32
79  %conv1 = sext i16 %r.0 to i32
80  %mul = mul nsw i32 %conv1, %conv
81  %conv2 = trunc i32 %mul to i16
82  br label %for.inc
83
84for.inc:
85  %inc = add nsw i32 %i.0, 1
86  br label %for.cond
87
88for.end:
89  ret i16 %r.0
90}
91
92define signext i8 @or_v16i8(ptr %p) #0 {
93; CHECK-LABEL: @or_v16i8(
94; CHECK-NEXT:  entry:
95; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1, !tbaa [[TBAA6:![0-9]+]]
96; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
97; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
98; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
99; CHECK-NEXT:    [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]]
100; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
101; CHECK-NEXT:    [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]]
102; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
103; CHECK-NEXT:    [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]]
104; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0
105; CHECK-NEXT:    ret i8 [[TMP2]]
106;
107entry:
108  br label %for.cond
109
110for.cond:
111  %r.0 = phi i8 [ 0, %entry ], [ %conv2, %for.inc ]
112  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
113  %cmp = icmp slt i32 %i.0, 16
114  br i1 %cmp, label %for.body, label %for.cond.cleanup
115
116for.cond.cleanup:
117  br label %for.end
118
119for.body:
120  %idxprom = sext i32 %i.0 to i64
121  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %idxprom
122  %0 = load i8, ptr %arrayidx, align 1, !tbaa !9
123  %conv = sext i8 %0 to i32
124  %conv1 = sext i8 %r.0 to i32
125  %or = or i32 %conv1, %conv
126  %conv2 = trunc i32 %or to i8
127  br label %for.inc
128
129for.inc:
130  %inc = add nsw i32 %i.0, 1
131  br label %for.cond
132
133for.end:
134  ret i8 %r.0
135}
136
137define i32 @smin_v4i32(ptr %p) #0 {
138; CHECK-LABEL: @smin_v4i32(
139; CHECK-NEXT:  entry:
140; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
141; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
142; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]])
143; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
144; CHECK-NEXT:    [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]])
145; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0
146; CHECK-NEXT:    ret i32 [[TMP2]]
147;
148entry:
149  br label %for.cond
150
151for.cond:
152  %r.0 = phi i32 [ 2147483647, %entry ], [ %cond, %for.inc ]
153  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
154  %cmp = icmp slt i32 %i.0, 4
155  br i1 %cmp, label %for.body, label %for.cond.cleanup
156
157for.cond.cleanup:
158  br label %for.end
159
160for.body:
161  %idxprom = sext i32 %i.0 to i64
162  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
163  %0 = load i32, ptr %arrayidx, align 4, !tbaa !3
164  %cmp1 = icmp slt i32 %0, %r.0
165  br i1 %cmp1, label %cond.true, label %cond.false
166
167cond.true:
168  %idxprom2 = sext i32 %i.0 to i64
169  %arrayidx3 = getelementptr inbounds i32, ptr %p, i64 %idxprom2
170  %1 = load i32, ptr %arrayidx3, align 4, !tbaa !3
171  br label %cond.end
172
173cond.false:
174  br label %cond.end
175
176cond.end:
177  %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
178  br label %for.inc
179
180for.inc:
181  %inc = add nsw i32 %i.0, 1
182  br label %for.cond
183
184for.end:
185  ret i32 %r.0
186}
187
188define i32 @umax_v4i32(ptr %p) #0 {
189; CHECK-LABEL: @umax_v4i32(
190; CHECK-NEXT:  entry:
191; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
192; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
193; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]])
194; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
195; CHECK-NEXT:    [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]])
196; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0
197; CHECK-NEXT:    ret i32 [[TMP2]]
198;
199entry:
200  br label %for.cond
201
202for.cond:
203  %r.0 = phi i32 [ 0, %entry ], [ %cond, %for.inc ]
204  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
205  %cmp = icmp slt i32 %i.0, 4
206  br i1 %cmp, label %for.body, label %for.cond.cleanup
207
208for.cond.cleanup:
209  br label %for.end
210
211for.body:
212  %idxprom = sext i32 %i.0 to i64
213  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
214  %0 = load i32, ptr %arrayidx, align 4, !tbaa !3
215  %cmp1 = icmp ugt i32 %0, %r.0
216  br i1 %cmp1, label %cond.true, label %cond.false
217
218cond.true:
219  %idxprom2 = sext i32 %i.0 to i64
220  %arrayidx3 = getelementptr inbounds i32, ptr %p, i64 %idxprom2
221  %1 = load i32, ptr %arrayidx3, align 4, !tbaa !3
222  br label %cond.end
223
224cond.false:
225  br label %cond.end
226
227cond.end:
228  %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
229  br label %for.inc
230
231for.inc:
232  %inc = add nsw i32 %i.0, 1
233  br label %for.cond
234
235for.end:
236  ret i32 %r.0
237}
238
239define float @fadd_v4i32(ptr %p) #0 {
240; CHECK-LABEL: @fadd_v4i32(
241; CHECK-NEXT:  entry:
242; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7:![0-9]+]]
243; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
244; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
245; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
246; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
247; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
248; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]]
249; CHECK-NEXT:    ret float [[BIN_RDX5]]
250;
251entry:
252  br label %for.cond
253
254for.cond:
255  %r.0 = phi float [ 4.200000e+01, %entry ], [ %add, %for.inc ]
256  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
257  %cmp = icmp slt i32 %i.0, 4
258  br i1 %cmp, label %for.body, label %for.cond.cleanup
259
260for.cond.cleanup:
261  br label %for.end
262
263for.body:
264  %idxprom = sext i32 %i.0 to i64
265  %arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
266  %0 = load float, ptr %arrayidx, align 4, !tbaa !10
267  %add = fadd fast float %r.0, %0
268  br label %for.inc
269
270for.inc:
271  %inc = add nsw i32 %i.0, 1
272  br label %for.cond
273
274for.end:
275  ret float %r.0
276}
277
278define float @fmul_v4i32(ptr %p) #0 {
279; CHECK-LABEL: @fmul_v4i32(
280; CHECK-NEXT:  entry:
281; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
282; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
283; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
284; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
285; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
286; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
287; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]]
288; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fmul fast float [[BIN_RDX5]], 4.200000e+01
289; CHECK-NEXT:    ret float [[OP_EXTRA]]
290;
291entry:
292  br label %for.cond
293
294for.cond:
295  %r.0 = phi float [ 4.200000e+01, %entry ], [ %mul, %for.inc ]
296  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
297  %cmp = icmp slt i32 %i.0, 4
298  br i1 %cmp, label %for.body, label %for.cond.cleanup
299
300for.cond.cleanup:
301  br label %for.end
302
303for.body:
304  %idxprom = sext i32 %i.0 to i64
305  %arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
306  %0 = load float, ptr %arrayidx, align 4, !tbaa !10
307  %mul = fmul fast float %r.0, %0
308  br label %for.inc
309
310for.inc:
311  %inc = add nsw i32 %i.0, 1
312  br label %for.cond
313
314for.end:
315  ret float %r.0
316}
317
318define float @fmin_v4f32(ptr %p) #0 {
319; CHECK-LABEL: @fmin_v4f32(
320; CHECK-NEXT:  entry:
321; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
322; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
323; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]]
324; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]]
325; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
326; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
327; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]]
328; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0
329; CHECK-NEXT:    ret float [[TMP2]]
330;
331entry:
332  br label %for.cond
333
334for.cond:
335  %r.0 = phi float [  0x47EFFFFFE0000000, %entry ], [ %cond, %for.inc ]
336  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
337  %cmp = icmp slt i32 %i.0, 4
338  br i1 %cmp, label %for.body, label %for.cond.cleanup
339
340for.cond.cleanup:
341  br label %for.end
342
343for.body:
344  %idxprom = sext i32 %i.0 to i64
345  %arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
346  %0 = load float, ptr %arrayidx, align 4, !tbaa !10
347  %cmp1 = fcmp fast olt float %0, %r.0
348  br i1 %cmp1, label %cond.true, label %cond.false
349
350cond.true:
351  %idxprom2 = sext i32 %i.0 to i64
352  %arrayidx3 = getelementptr inbounds float, ptr %p, i64 %idxprom2
353  %1 = load float, ptr %arrayidx3, align 4, !tbaa !10
354  br label %cond.end
355
356cond.false:
357  br label %cond.end
358
359cond.end:
360  %cond = phi fast float [ %1, %cond.true ], [ %r.0, %cond.false ]
361  br label %for.inc
362
363for.inc:
364  %inc = add nsw i32 %i.0, 1
365  br label %for.cond
366
367for.end:
368  ret float %r.0
369}
370
371define available_externally float @max(float %a, float %b) {
372entry:
373  %a.addr = alloca float, align 4
374  %b.addr = alloca float, align 4
375  store float %a, ptr %a.addr, align 4
376  store float %b, ptr %b.addr, align 4
377  %0 = load float, ptr %a.addr, align 4
378  %1 = load float, ptr %b.addr, align 4
379  %cmp = fcmp nnan ninf nsz ogt float %0, %1
380  br i1 %cmp, label %cond.true, label %cond.false
381
382cond.true:                                        ; preds = %entry
383  %2 = load float, ptr %a.addr, align 4
384  br label %cond.end
385
386cond.false:                                       ; preds = %entry
387  %3 = load float, ptr %b.addr, align 4
388  br label %cond.end
389
390cond.end:                                         ; preds = %cond.false, %cond.true
391  %cond = phi nnan ninf nsz float [ %2, %cond.true ], [ %3, %cond.false ]
392  ret float %cond
393}
394
395; PR23116
396
397define float @findMax(ptr byval(<8 x float>) align 16 %0) {
398; CHECK-LABEL: @findMax(
399; CHECK-NEXT:  entry:
400; CHECK-NEXT:    [[V:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
401; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
402; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]]
403; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]]
404; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
405; CHECK-NEXT:    [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]]
406; CHECK-NEXT:    [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]]
407; CHECK-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
408; CHECK-NEXT:    [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]]
409; CHECK-NEXT:    [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]]
410; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0
411; CHECK-NEXT:    ret float [[TMP1]]
412;
413entry:
414  %v.addr = alloca <8 x float>, align 32
415  %v = load <8 x float>, ptr %0, align 16, !tbaa !3
416  store <8 x float> %v, ptr %v.addr, align 32, !tbaa !3
417  %1 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
418  %vecext = extractelement <8 x float> %1, i32 0
419  %2 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
420  %vecext1 = extractelement <8 x float> %2, i32 1
421  %call = call nnan ninf nsz float @max(float %vecext, float %vecext1)
422  %3 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
423  %vecext2 = extractelement <8 x float> %3, i32 2
424  %call3 = call nnan ninf nsz float @max(float %call, float %vecext2)
425  %4 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
426  %vecext4 = extractelement <8 x float> %4, i32 3
427  %call5 = call nnan ninf nsz float @max(float %call3, float %vecext4)
428  %5 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
429  %vecext6 = extractelement <8 x float> %5, i32 4
430  %call7 = call nnan ninf nsz float @max(float %call5, float %vecext6)
431  %6 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
432  %vecext8 = extractelement <8 x float> %6, i32 5
433  %call9 = call nnan ninf nsz float @max(float %call7, float %vecext8)
434  %7 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
435  %vecext10 = extractelement <8 x float> %7, i32 6
436  %call11 = call nnan ninf nsz float @max(float %call9, float %vecext10)
437  %8 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
438  %vecext12 = extractelement <8 x float> %8, i32 7
439  %call13 = call nnan ninf nsz float @max(float %call11, float %vecext12)
440  ret float %call13
441}
442
443attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" }
444
445!0 = !{i32 1, !"wchar_size", i32 4}
446!1 = !{i32 7, !"PIC Level", i32 2}
447!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"}
448!3 = !{!4, !4, i64 0}
449!4 = !{!"int", !5, i64 0}
450!5 = !{!"omnipotent char", !6, i64 0}
451!6 = !{!"Simple C/C++ TBAA"}
452!7 = !{!8, !8, i64 0}
453!8 = !{!"short", !5, i64 0}
454!9 = !{!5, !5, i64 0}
455!10 = !{!11, !11, i64 0}
456!11 = !{!"float", !5, i64 0}
457