xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll (revision 611401c11594871aa5c7692cd17a7f12b6fbe660)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE2
3; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE4
4; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
5; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
6; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
7
8@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
9@arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
10@arrp = local_unnamed_addr global [32 x ptr] zeroinitializer, align 16
11@var = global i32 zeroinitializer, align 8
12
13declare i32 @llvm.smax.i32(i32, i32)
14declare i16 @llvm.smin.i16(i16, i16)
15declare i64 @llvm.umax.i64(i64, i64)
16declare i8 @llvm.umin.i8(i8, i8)
17declare i32 @llvm.smin.i32(i32, i32)
18declare i32 @llvm.umin.i32(i32, i32)
19
20define i32 @maxi8(i32) {
21; CHECK-LABEL: @maxi8(
22; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
23; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
24; CHECK-NEXT:    ret i32 [[TMP3]]
25;
26  %2 = load i32, ptr @arr, align 16
27  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
28  %4 = icmp sgt i32 %2, %3
29  %5 = select i1 %4, i32 %2, i32 %3
30  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
31  %7 = icmp sgt i32 %5, %6
32  %8 = select i1 %7, i32 %5, i32 %6
33  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
34  %10 = icmp sgt i32 %8, %9
35  %11 = select i1 %10, i32 %8, i32 %9
36  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
37  %13 = icmp sgt i32 %11, %12
38  %14 = select i1 %13, i32 %11, i32 %12
39  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
40  %16 = icmp sgt i32 %14, %15
41  %17 = select i1 %16, i32 %14, i32 %15
42  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
43  %19 = icmp sgt i32 %17, %18
44  %20 = select i1 %19, i32 %17, i32 %18
45  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
46  %22 = icmp sgt i32 %20, %21
47  %23 = select i1 %22, i32 %20, i32 %21
48  ret i32 %23
49}
50
51define i32 @maxi8_store_in(i32) {
52; CHECK-LABEL: @maxi8_store_in(
53; CHECK-NEXT:    store i32 0, ptr @var, align 8
54; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
55; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
56; CHECK-NEXT:    ret i32 [[TMP3]]
57;
58  %2 = load i32, ptr @arr, align 16
59  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
60  %4 = icmp sgt i32 %2, %3
61  %5 = select i1 %4, i32 %2, i32 %3
62  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
63  %7 = icmp sgt i32 %5, %6
64  %8 = select i1 %7, i32 %5, i32 %6
65  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
66  %10 = icmp sgt i32 %8, %9
67  %11 = select i1 %10, i32 %8, i32 %9
68  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
69  %13 = icmp sgt i32 %11, %12
70  %14 = select i1 %13, i32 %11, i32 %12
71  store i32 0, ptr @var, align 8
72  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
73  %16 = icmp sgt i32 %14, %15
74  %17 = select i1 %16, i32 %14, i32 %15
75  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
76  %19 = icmp sgt i32 %17, %18
77  %20 = select i1 %19, i32 %17, i32 %18
78  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
79  %22 = icmp sgt i32 %20, %21
80  %23 = select i1 %22, i32 %20, i32 %21
81  ret i32 %23
82}
83
84define i32 @maxi16(i32) {
85; CHECK-LABEL: @maxi16(
86; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @arr, align 16
87; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
88; CHECK-NEXT:    ret i32 [[TMP3]]
89;
90  %2 = load i32, ptr @arr, align 16
91  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
92  %4 = icmp sgt i32 %2, %3
93  %5 = select i1 %4, i32 %2, i32 %3
94  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
95  %7 = icmp sgt i32 %5, %6
96  %8 = select i1 %7, i32 %5, i32 %6
97  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
98  %10 = icmp sgt i32 %8, %9
99  %11 = select i1 %10, i32 %8, i32 %9
100  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
101  %13 = icmp sgt i32 %11, %12
102  %14 = select i1 %13, i32 %11, i32 %12
103  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
104  %16 = icmp sgt i32 %14, %15
105  %17 = select i1 %16, i32 %14, i32 %15
106  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
107  %19 = icmp sgt i32 %17, %18
108  %20 = select i1 %19, i32 %17, i32 %18
109  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
110  %22 = icmp sgt i32 %20, %21
111  %23 = select i1 %22, i32 %20, i32 %21
112  %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
113  %25 = icmp sgt i32 %23, %24
114  %26 = select i1 %25, i32 %23, i32 %24
115  %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
116  %28 = icmp sgt i32 %26, %27
117  %29 = select i1 %28, i32 %26, i32 %27
118  %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
119  %31 = icmp sgt i32 %29, %30
120  %32 = select i1 %31, i32 %29, i32 %30
121  %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
122  %34 = icmp sgt i32 %32, %33
123  %35 = select i1 %34, i32 %32, i32 %33
124  %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
125  %37 = icmp sgt i32 %35, %36
126  %38 = select i1 %37, i32 %35, i32 %36
127  %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
128  %40 = icmp sgt i32 %38, %39
129  %41 = select i1 %40, i32 %38, i32 %39
130  %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
131  %43 = icmp sgt i32 %41, %42
132  %44 = select i1 %43, i32 %41, i32 %42
133  %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
134  %46 = icmp sgt i32 %44, %45
135  %47 = select i1 %46, i32 %44, i32 %45
136  ret i32 %47
137}
138
139define i32 @maxi32(i32) {
140; CHECK-LABEL: @maxi32(
141; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, ptr @arr, align 16
142; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
143; CHECK-NEXT:    ret i32 [[TMP3]]
144;
145  %2 = load i32, ptr @arr, align 16
146  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
147  %4 = icmp sgt i32 %2, %3
148  %5 = select i1 %4, i32 %2, i32 %3
149  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
150  %7 = icmp sgt i32 %5, %6
151  %8 = select i1 %7, i32 %5, i32 %6
152  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
153  %10 = icmp sgt i32 %8, %9
154  %11 = select i1 %10, i32 %8, i32 %9
155  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
156  %13 = icmp sgt i32 %11, %12
157  %14 = select i1 %13, i32 %11, i32 %12
158  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
159  %16 = icmp sgt i32 %14, %15
160  %17 = select i1 %16, i32 %14, i32 %15
161  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
162  %19 = icmp sgt i32 %17, %18
163  %20 = select i1 %19, i32 %17, i32 %18
164  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
165  %22 = icmp sgt i32 %20, %21
166  %23 = select i1 %22, i32 %20, i32 %21
167  %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
168  %25 = icmp sgt i32 %23, %24
169  %26 = select i1 %25, i32 %23, i32 %24
170  %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
171  %28 = icmp sgt i32 %26, %27
172  %29 = select i1 %28, i32 %26, i32 %27
173  %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
174  %31 = icmp sgt i32 %29, %30
175  %32 = select i1 %31, i32 %29, i32 %30
176  %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
177  %34 = icmp sgt i32 %32, %33
178  %35 = select i1 %34, i32 %32, i32 %33
179  %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
180  %37 = icmp sgt i32 %35, %36
181  %38 = select i1 %37, i32 %35, i32 %36
182  %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
183  %40 = icmp sgt i32 %38, %39
184  %41 = select i1 %40, i32 %38, i32 %39
185  %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
186  %43 = icmp sgt i32 %41, %42
187  %44 = select i1 %43, i32 %41, i32 %42
188  %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
189  %46 = icmp sgt i32 %44, %45
190  %47 = select i1 %46, i32 %44, i32 %45
191  %48 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 16), align 16
192  %49 = icmp sgt i32 %47, %48
193  %50 = select i1 %49, i32 %47, i32 %48
194  %51 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 17), align 4
195  %52 = icmp sgt i32 %50, %51
196  %53 = select i1 %52, i32 %50, i32 %51
197  %54 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 18), align 8
198  %55 = icmp sgt i32 %53, %54
199  %56 = select i1 %55, i32 %53, i32 %54
200  %57 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 19), align 4
201  %58 = icmp sgt i32 %56, %57
202  %59 = select i1 %58, i32 %56, i32 %57
203  %60 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 20), align 16
204  %61 = icmp sgt i32 %59, %60
205  %62 = select i1 %61, i32 %59, i32 %60
206  %63 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 21), align 4
207  %64 = icmp sgt i32 %62, %63
208  %65 = select i1 %64, i32 %62, i32 %63
209  %66 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 22), align 8
210  %67 = icmp sgt i32 %65, %66
211  %68 = select i1 %67, i32 %65, i32 %66
212  %69 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 23), align 4
213  %70 = icmp sgt i32 %68, %69
214  %71 = select i1 %70, i32 %68, i32 %69
215  %72 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 24), align 16
216  %73 = icmp sgt i32 %71, %72
217  %74 = select i1 %73, i32 %71, i32 %72
218  %75 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 25), align 4
219  %76 = icmp sgt i32 %74, %75
220  %77 = select i1 %76, i32 %74, i32 %75
221  %78 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 26), align 8
222  %79 = icmp sgt i32 %77, %78
223  %80 = select i1 %79, i32 %77, i32 %78
224  %81 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 27), align 4
225  %82 = icmp sgt i32 %80, %81
226  %83 = select i1 %82, i32 %80, i32 %81
227  %84 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 28), align 16
228  %85 = icmp sgt i32 %83, %84
229  %86 = select i1 %85, i32 %83, i32 %84
230  %87 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 29), align 4
231  %88 = icmp sgt i32 %86, %87
232  %89 = select i1 %88, i32 %86, i32 %87
233  %90 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 30), align 8
234  %91 = icmp sgt i32 %89, %90
235  %92 = select i1 %91, i32 %89, i32 %90
236  %93 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 31), align 4
237  %94 = icmp sgt i32 %92, %93
238  %95 = select i1 %94, i32 %92, i32 %93
239  ret i32 %95
240}
241
242; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
243
244define float @maxf8(float) {
245; DEFAULT-LABEL: @maxf8(
246; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
247; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
248; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
249; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
250; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
251; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
252; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
253; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
254; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
255; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
256; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
257; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
258; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
259; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
260; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
261; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
262; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
263; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
264; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
265; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
266; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
267; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
268; DEFAULT-NEXT:    ret float [[TMP23]]
269;
270; THRESH-LABEL: @maxf8(
271; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
272; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
273; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
274; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
275; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
276; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
277; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
278; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
279; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
280; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
281; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
282; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
283; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
284; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
285; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
286; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
287; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
288; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
289; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
290; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
291; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
292; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
293; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
294; THRESH-NEXT:    ret float [[TMP24]]
295;
296  %2 = load float, ptr @arr1, align 16
297  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
298  %4 = fcmp fast ogt float %2, %3
299  %5 = select i1 %4, float %2, float %3
300  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
301  %7 = fcmp fast ogt float %5, %6
302  %8 = select i1 %7, float %5, float %6
303  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
304  %10 = fcmp fast ogt float %8, %9
305  %11 = select i1 %10, float %8, float %9
306  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
307  %13 = fcmp fast ogt float %11, %12
308  %14 = select i1 %13, float %11, float %12
309  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
310  %16 = fcmp fast ogt float %14, %15
311  %17 = select i1 %16, float %14, float %15
312  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
313  %19 = fcmp fast ogt float %17, %18
314  %20 = select i1 %19, float %17, float %18
315  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
316  %22 = fcmp fast ogt float %20, %21
317  %23 = select i1 %22, float %20, float %21
318  ret float %23
319}
320
321; Note: legacy test - maxnum intrinsics match what InstCombine creates for fcmp+select with fastmath on the select.
322
323define float @maxf16(float) {
324; DEFAULT-LABEL: @maxf16(
325; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
326; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
327; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
328; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
329; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
330; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
331; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
332; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
333; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
334; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
335; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
336; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
337; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
338; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
339; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
340; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
341; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
342; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
343; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
344; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
345; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
346; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
347; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
348; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
349; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
350; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
351; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
352; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
353; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
354; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
355; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
356; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
357; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
358; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
359; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
360; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
361; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
362; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
363; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
364; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
365; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
366; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
367; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
368; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
369; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
370; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
371; DEFAULT-NEXT:    ret float [[TMP47]]
372;
373; THRESH-LABEL: @maxf16(
374; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
375; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
376; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
377; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
378; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
379; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
380; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
381; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
382; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
383; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
384; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
385; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
386; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
387; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
388; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
389; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
390; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
391; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
392; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
393; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
394; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
395; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
396; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
397; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
398; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
399; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
400; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
401; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
402; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
403; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
404; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
405; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
406; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
407; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
408; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
409; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
410; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
411; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
412; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
413; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
414; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
415; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
416; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
417; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
418; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
419; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
420; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
421; THRESH-NEXT:    ret float [[TMP48]]
422;
423  %2 = load float, ptr @arr1, align 16
424  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
425  %4 = fcmp fast ogt float %2, %3
426  %5 = select i1 %4, float %2, float %3
427  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
428  %7 = fcmp fast ogt float %5, %6
429  %8 = select i1 %7, float %5, float %6
430  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
431  %10 = fcmp fast ogt float %8, %9
432  %11 = select i1 %10, float %8, float %9
433  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
434  %13 = fcmp fast ogt float %11, %12
435  %14 = select i1 %13, float %11, float %12
436  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
437  %16 = fcmp fast ogt float %14, %15
438  %17 = select i1 %16, float %14, float %15
439  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
440  %19 = fcmp fast ogt float %17, %18
441  %20 = select i1 %19, float %17, float %18
442  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
443  %22 = fcmp fast ogt float %20, %21
444  %23 = select i1 %22, float %20, float %21
445  %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
446  %25 = fcmp fast ogt float %23, %24
447  %26 = select i1 %25, float %23, float %24
448  %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
449  %28 = fcmp fast ogt float %26, %27
450  %29 = select i1 %28, float %26, float %27
451  %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
452  %31 = fcmp fast ogt float %29, %30
453  %32 = select i1 %31, float %29, float %30
454  %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
455  %34 = fcmp fast ogt float %32, %33
456  %35 = select i1 %34, float %32, float %33
457  %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
458  %37 = fcmp fast ogt float %35, %36
459  %38 = select i1 %37, float %35, float %36
460  %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
461  %40 = fcmp fast ogt float %38, %39
462  %41 = select i1 %40, float %38, float %39
463  %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
464  %43 = fcmp fast ogt float %41, %42
465  %44 = select i1 %43, float %41, float %42
466  %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
467  %46 = fcmp fast ogt float %44, %45
468  %47 = select i1 %46, float %44, float %45
469  ret float %47
470}
471
472; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
473
474define float @maxf32(float) {
475; DEFAULT-LABEL: @maxf32(
476; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
477; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
478; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
479; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
480; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
481; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
482; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
483; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
484; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
485; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
486; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
487; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
488; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
489; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
490; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
491; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
492; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
493; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
494; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
495; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
496; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
497; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
498; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
499; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
500; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
501; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
502; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
503; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
504; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
505; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
506; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
507; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
508; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
509; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
510; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
511; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
512; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
513; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
514; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
515; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
516; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
517; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
518; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
519; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
520; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
521; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
522; DEFAULT-NEXT:    [[TMP48:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
523; DEFAULT-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]]
524; DEFAULT-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]]
525; DEFAULT-NEXT:    [[TMP51:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
526; DEFAULT-NEXT:    [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]]
527; DEFAULT-NEXT:    [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]]
528; DEFAULT-NEXT:    [[TMP54:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
529; DEFAULT-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]]
530; DEFAULT-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]]
531; DEFAULT-NEXT:    [[TMP57:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
532; DEFAULT-NEXT:    [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]]
533; DEFAULT-NEXT:    [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]]
534; DEFAULT-NEXT:    [[TMP60:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
535; DEFAULT-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]]
536; DEFAULT-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]]
537; DEFAULT-NEXT:    [[TMP63:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
538; DEFAULT-NEXT:    [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]]
539; DEFAULT-NEXT:    [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]]
540; DEFAULT-NEXT:    [[TMP66:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
541; DEFAULT-NEXT:    [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]]
542; DEFAULT-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]]
543; DEFAULT-NEXT:    [[TMP69:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
544; DEFAULT-NEXT:    [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]]
545; DEFAULT-NEXT:    [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]]
546; DEFAULT-NEXT:    [[TMP72:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
547; DEFAULT-NEXT:    [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]]
548; DEFAULT-NEXT:    [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]]
549; DEFAULT-NEXT:    [[TMP75:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
550; DEFAULT-NEXT:    [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]]
551; DEFAULT-NEXT:    [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]]
552; DEFAULT-NEXT:    [[TMP78:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
553; DEFAULT-NEXT:    [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]]
554; DEFAULT-NEXT:    [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]]
555; DEFAULT-NEXT:    [[TMP81:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
556; DEFAULT-NEXT:    [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]]
557; DEFAULT-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]]
558; DEFAULT-NEXT:    [[TMP84:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
559; DEFAULT-NEXT:    [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]]
560; DEFAULT-NEXT:    [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]]
561; DEFAULT-NEXT:    [[TMP87:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
562; DEFAULT-NEXT:    [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]]
563; DEFAULT-NEXT:    [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]]
564; DEFAULT-NEXT:    [[TMP90:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
565; DEFAULT-NEXT:    [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]]
566; DEFAULT-NEXT:    [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]]
567; DEFAULT-NEXT:    [[TMP93:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
568; DEFAULT-NEXT:    [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]]
569; DEFAULT-NEXT:    [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]]
570; DEFAULT-NEXT:    ret float [[TMP95]]
571;
572; THRESH-LABEL: @maxf32(
573; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
574; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
575; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
576; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
577; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
578; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
579; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
580; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
581; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
582; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
583; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
584; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
585; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
586; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
587; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
588; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
589; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
590; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
591; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
592; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
593; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
594; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
595; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
596; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
597; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
598; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
599; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
600; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
601; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
602; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
603; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
604; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
605; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
606; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
607; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
608; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
609; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
610; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
611; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
612; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
613; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
614; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
615; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
616; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
617; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
618; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
619; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
620; THRESH-NEXT:    [[TMP49:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
621; THRESH-NEXT:    [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]]
622; THRESH-NEXT:    [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]]
623; THRESH-NEXT:    [[TMP52:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
624; THRESH-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]]
625; THRESH-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]]
626; THRESH-NEXT:    [[TMP55:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
627; THRESH-NEXT:    [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]]
628; THRESH-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]]
629; THRESH-NEXT:    [[TMP58:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
630; THRESH-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]]
631; THRESH-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]]
632; THRESH-NEXT:    [[TMP61:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
633; THRESH-NEXT:    [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]]
634; THRESH-NEXT:    [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]]
635; THRESH-NEXT:    [[TMP64:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
636; THRESH-NEXT:    [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]]
637; THRESH-NEXT:    [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]]
638; THRESH-NEXT:    [[TMP67:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
639; THRESH-NEXT:    [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]]
640; THRESH-NEXT:    [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]]
641; THRESH-NEXT:    [[TMP70:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
642; THRESH-NEXT:    [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]]
643; THRESH-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]]
644; THRESH-NEXT:    [[TMP73:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
645; THRESH-NEXT:    [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]]
646; THRESH-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]]
647; THRESH-NEXT:    [[TMP76:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
648; THRESH-NEXT:    [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]]
649; THRESH-NEXT:    [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]]
650; THRESH-NEXT:    [[TMP79:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
651; THRESH-NEXT:    [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]]
652; THRESH-NEXT:    [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]]
653; THRESH-NEXT:    [[TMP82:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
654; THRESH-NEXT:    [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]]
655; THRESH-NEXT:    [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]]
656; THRESH-NEXT:    [[TMP85:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
657; THRESH-NEXT:    [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]]
658; THRESH-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]]
659; THRESH-NEXT:    [[TMP88:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
660; THRESH-NEXT:    [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]]
661; THRESH-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]]
662; THRESH-NEXT:    [[TMP91:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
663; THRESH-NEXT:    [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]]
664; THRESH-NEXT:    [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]]
665; THRESH-NEXT:    [[TMP94:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
666; THRESH-NEXT:    [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]]
667; THRESH-NEXT:    [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]]
668; THRESH-NEXT:    ret float [[TMP96]]
669;
670  %2 = load float, ptr @arr1, align 16
671  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
672  %4 = fcmp fast ogt float %2, %3
673  %5 = select i1 %4, float %2, float %3
674  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
675  %7 = fcmp fast ogt float %5, %6
676  %8 = select i1 %7, float %5, float %6
677  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
678  %10 = fcmp fast ogt float %8, %9
679  %11 = select i1 %10, float %8, float %9
680  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
681  %13 = fcmp fast ogt float %11, %12
682  %14 = select i1 %13, float %11, float %12
683  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
684  %16 = fcmp fast ogt float %14, %15
685  %17 = select i1 %16, float %14, float %15
686  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
687  %19 = fcmp fast ogt float %17, %18
688  %20 = select i1 %19, float %17, float %18
689  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
690  %22 = fcmp fast ogt float %20, %21
691  %23 = select i1 %22, float %20, float %21
692  %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
693  %25 = fcmp fast ogt float %23, %24
694  %26 = select i1 %25, float %23, float %24
695  %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
696  %28 = fcmp fast ogt float %26, %27
697  %29 = select i1 %28, float %26, float %27
698  %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
699  %31 = fcmp fast ogt float %29, %30
700  %32 = select i1 %31, float %29, float %30
701  %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
702  %34 = fcmp fast ogt float %32, %33
703  %35 = select i1 %34, float %32, float %33
704  %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
705  %37 = fcmp fast ogt float %35, %36
706  %38 = select i1 %37, float %35, float %36
707  %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
708  %40 = fcmp fast ogt float %38, %39
709  %41 = select i1 %40, float %38, float %39
710  %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
711  %43 = fcmp fast ogt float %41, %42
712  %44 = select i1 %43, float %41, float %42
713  %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
714  %46 = fcmp fast ogt float %44, %45
715  %47 = select i1 %46, float %44, float %45
716  %48 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
717  %49 = fcmp fast ogt float %47, %48
718  %50 = select i1 %49, float %47, float %48
719  %51 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
720  %52 = fcmp fast ogt float %50, %51
721  %53 = select i1 %52, float %50, float %51
722  %54 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
723  %55 = fcmp fast ogt float %53, %54
724  %56 = select i1 %55, float %53, float %54
725  %57 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
726  %58 = fcmp fast ogt float %56, %57
727  %59 = select i1 %58, float %56, float %57
728  %60 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
729  %61 = fcmp fast ogt float %59, %60
730  %62 = select i1 %61, float %59, float %60
731  %63 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
732  %64 = fcmp fast ogt float %62, %63
733  %65 = select i1 %64, float %62, float %63
734  %66 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
735  %67 = fcmp fast ogt float %65, %66
736  %68 = select i1 %67, float %65, float %66
737  %69 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
738  %70 = fcmp fast ogt float %68, %69
739  %71 = select i1 %70, float %68, float %69
740  %72 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
741  %73 = fcmp fast ogt float %71, %72
742  %74 = select i1 %73, float %71, float %72
743  %75 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
744  %76 = fcmp fast ogt float %74, %75
745  %77 = select i1 %76, float %74, float %75
746  %78 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
747  %79 = fcmp fast ogt float %77, %78
748  %80 = select i1 %79, float %77, float %78
749  %81 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
750  %82 = fcmp fast ogt float %80, %81
751  %83 = select i1 %82, float %80, float %81
752  %84 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
753  %85 = fcmp fast ogt float %83, %84
754  %86 = select i1 %85, float %83, float %84
755  %87 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
756  %88 = fcmp fast ogt float %86, %87
757  %89 = select i1 %88, float %86, float %87
758  %90 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
759  %91 = fcmp fast ogt float %89, %90
760  %92 = select i1 %91, float %89, float %90
761  %93 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
762  %94 = fcmp fast ogt float %92, %93
763  %95 = select i1 %94, float %92, float %93
764  ret float %95
765}
766
767define i32 @maxi8_mutiple_uses(i32) {
768; SSE2-LABEL: @maxi8_mutiple_uses(
769; SSE2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
770; SSE2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
771; SSE2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
772; SSE2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
773; SSE2-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
774; SSE2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
775; SSE2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
776; SSE2-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
777; SSE2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
778; SSE2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
779; SSE2-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
780; SSE2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
781; SSE2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
782; SSE2-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
783; SSE2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
784; SSE2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
785; SSE2-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
786; SSE2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
787; SSE2-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
788; SSE2-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
789; SSE2-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
790; SSE2-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
791; SSE2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
792; SSE2-NEXT:    store i32 [[TMP24]], ptr @var, align 8
793; SSE2-NEXT:    ret i32 [[TMP23]]
794;
795; SSE4-LABEL: @maxi8_mutiple_uses(
796; SSE4-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
797; SSE4-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
798; SSE4-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
799; SSE4-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
800; SSE4-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
801; SSE4-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
802; SSE4-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
803; SSE4-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
804; SSE4-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
805; SSE4-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
806; SSE4-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
807; SSE4-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
808; SSE4-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
809; SSE4-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
810; SSE4-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
811; SSE4-NEXT:    store i32 [[TMP10]], ptr @var, align 8
812; SSE4-NEXT:    ret i32 [[OP_RDX5]]
813;
814; AVX-LABEL: @maxi8_mutiple_uses(
815; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
816; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
817; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
818; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
819; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
820; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
821; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
822; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
823; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
824; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
825; AVX-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
826; AVX-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
827; AVX-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
828; AVX-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
829; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
830; AVX-NEXT:    store i32 [[TMP10]], ptr @var, align 8
831; AVX-NEXT:    ret i32 [[OP_RDX5]]
832;
833; THRESH-LABEL: @maxi8_mutiple_uses(
834; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
835; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
836; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
837; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
838; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
839; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
840; THRESH-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
841; THRESH-NEXT:    [[TMP9:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
842; THRESH-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i32 0
843; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP6]], i32 1
844; THRESH-NEXT:    [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP10]], [[TMP11]]
845; THRESH-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]]
846; THRESH-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
847; THRESH-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
848; THRESH-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
849; THRESH-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP14]], i32 [[TMP15]]
850; THRESH-NEXT:    [[TMP16:%.*]] = select i1 [[TMP5]], i32 3, i32 4
851; THRESH-NEXT:    store i32 [[TMP16]], ptr @var, align 8
852; THRESH-NEXT:    ret i32 [[OP_RDX5]]
853;
854  %2 = load i32, ptr @arr, align 16
855  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
856  %4 = icmp sgt i32 %2, %3
857  %5 = select i1 %4, i32 %2, i32 %3
858  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
859  %7 = icmp sgt i32 %5, %6
860  %8 = select i1 %7, i32 %5, i32 %6
861  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
862  %10 = icmp sgt i32 %8, %9
863  %11 = select i1 %10, i32 %8, i32 %9
864  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
865  %13 = icmp sgt i32 %11, %12
866  %14 = select i1 %13, i32 %11, i32 %12
867  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
868  %16 = icmp sgt i32 %14, %15
869  %17 = select i1 %16, i32 %14, i32 %15
870  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
871  %19 = icmp sgt i32 %17, %18
872  %20 = select i1 %19, i32 %17, i32 %18
873  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
874  %22 = icmp sgt i32 %20, %21
875  %23 = select i1 %22, i32 %20, i32 %21
876  %24 = select i1 %4, i32 3, i32 4
877  store i32 %24, ptr @var, align 8
878  ret i32 %23
879}
880
881define i32 @maxi8_mutiple_uses2(i32) {
882; DEFAULT-LABEL: @maxi8_mutiple_uses2(
883; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
884; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
885; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
886; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
887; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
888; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
889; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
890; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
891; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
892; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
893; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
894; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
895; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
896; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
897; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
898; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
899; DEFAULT-NEXT:    [[TMP18:%.*]] = select i1 [[TMP10]], i32 3, i32 4
900; DEFAULT-NEXT:    store i32 [[TMP18]], ptr @var, align 8
901; DEFAULT-NEXT:    ret i32 [[TMP17]]
902;
903; THRESH-LABEL: @maxi8_mutiple_uses2(
904; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
905; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
906; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
907; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
908; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
909; THRESH-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
910; THRESH-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
911; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]]
912; THRESH-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
913; THRESH-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
914; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
915; THRESH-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
916; THRESH-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
917; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]]
918; THRESH-NEXT:    [[TMP16:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
919; THRESH-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
920; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]]
921; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP11]], i32 3, i32 4
922; THRESH-NEXT:    store i32 [[TMP19]], ptr @var, align 8
923; THRESH-NEXT:    ret i32 [[TMP18]]
924;
925  %2 = load i32, ptr @arr, align 16
926  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
927  %4 = icmp sgt i32 %2, %3
928  %5 = select i1 %4, i32 %2, i32 %3
929  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
930  %7 = icmp sgt i32 %5, %6
931  %8 = select i1 %7, i32 %5, i32 %6
932  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
933  %10 = icmp sgt i32 %8, %9
934  %11 = select i1 %10, i32 %8, i32 %9
935  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
936  %13 = icmp sgt i32 %11, %12
937  %14 = select i1 %13, i32 %11, i32 %12
938  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
939  %16 = icmp sgt i32 %14, %15
940  %17 = select i1 %16, i32 %14, i32 %15
941  %18 = select i1 %10, i32 3, i32 4
942  store i32 %18, ptr @var, align 8
943  ret i32 %17
944}
945
946define i32 @maxi8_wrong_parent(i32) {
947; SSE2-LABEL: @maxi8_wrong_parent(
948; SSE2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
949; SSE2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
950; SSE2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
951; SSE2-NEXT:    br label [[PP:%.*]]
952; SSE2:       pp:
953; SSE2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
954; SSE2-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
955; SSE2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
956; SSE2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
957; SSE2-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
958; SSE2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
959; SSE2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
960; SSE2-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
961; SSE2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
962; SSE2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
963; SSE2-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
964; SSE2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
965; SSE2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
966; SSE2-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
967; SSE2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
968; SSE2-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
969; SSE2-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
970; SSE2-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
971; SSE2-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
972; SSE2-NEXT:    ret i32 [[TMP23]]
973;
974; SSE4-LABEL: @maxi8_wrong_parent(
975; SSE4-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
976; SSE4-NEXT:    br label [[PP:%.*]]
977; SSE4:       pp:
978; SSE4-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
979; SSE4-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
980; SSE4-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP4]], i64 0)
981; SSE4-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP8]], i64 4)
982; SSE4-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6)
983; SSE4-NEXT:    [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]])
984; SSE4-NEXT:    ret i32 [[OP_RDX7]]
985;
986; AVX-LABEL: @maxi8_wrong_parent(
987; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
988; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
989; AVX-NEXT:    br label [[PP:%.*]]
990; AVX:       pp:
991; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
992; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
993; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
994; AVX-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
995; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP5]]
996; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP5]]
997; AVX-NEXT:    [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP6]], [[TMP2]]
998; AVX-NEXT:    [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP6]], i32 [[TMP2]]
999; AVX-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
1000; AVX-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
1001; AVX-NEXT:    [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]]
1002; AVX-NEXT:    [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[TMP3]]
1003; AVX-NEXT:    ret i32 [[OP_RDX7]]
1004;
1005; THRESH-LABEL: @maxi8_wrong_parent(
1006; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
1007; THRESH-NEXT:    br label [[PP:%.*]]
1008; THRESH:       pp:
1009; THRESH-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
1010; THRESH-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
1011; THRESH-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
1012; THRESH-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP4]], i64 4)
1013; THRESH-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6)
1014; THRESH-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]])
1015; THRESH-NEXT:    ret i32 [[TMP8]]
1016;
1017  %2 = load i32, ptr @arr, align 16
1018  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
1019  %4 = icmp sgt i32 %2, %3
1020  br label %pp
1021
1022pp:
1023  %5 = select i1 %4, i32 %2, i32 %3
1024  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
1025  %7 = icmp sgt i32 %5, %6
1026  %8 = select i1 %7, i32 %5, i32 %6
1027  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
1028  %10 = icmp sgt i32 %8, %9
1029  %11 = select i1 %10, i32 %8, i32 %9
1030  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
1031  %13 = icmp sgt i32 %11, %12
1032  %14 = select i1 %13, i32 %11, i32 %12
1033  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
1034  %16 = icmp sgt i32 %14, %15
1035  %17 = select i1 %16, i32 %14, i32 %15
1036  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
1037  %19 = icmp sgt i32 %17, %18
1038  %20 = select i1 %19, i32 %17, i32 %18
1039  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
1040  %22 = icmp sgt i32 %20, %21
1041  %23 = select i1 %22, i32 %20, i32 %21
1042  ret i32 %23
1043}
1044
1045; PR38191 - We don't handle array-of-pointer reductions.
1046define ptr @maxp8(i32) {
1047; DEFAULT-LABEL: @maxp8(
1048; DEFAULT-NEXT:    [[TMP2:%.*]] = load ptr, ptr @arrp, align 16
1049; DEFAULT-NEXT:    [[TMP3:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
1050; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp ugt ptr [[TMP2]], [[TMP3]]
1051; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], ptr [[TMP2]], ptr [[TMP3]]
1052; DEFAULT-NEXT:    [[TMP6:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1053; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[TMP5]], [[TMP6]]
1054; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], ptr [[TMP5]], ptr [[TMP6]]
1055; DEFAULT-NEXT:    [[TMP9:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1056; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp ugt ptr [[TMP8]], [[TMP9]]
1057; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], ptr [[TMP8]], ptr [[TMP9]]
1058; DEFAULT-NEXT:    [[TMP12:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1059; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp ugt ptr [[TMP11]], [[TMP12]]
1060; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], ptr [[TMP11]], ptr [[TMP12]]
1061; DEFAULT-NEXT:    [[TMP15:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1062; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp ugt ptr [[TMP14]], [[TMP15]]
1063; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], ptr [[TMP14]], ptr [[TMP15]]
1064; DEFAULT-NEXT:    [[TMP18:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1065; DEFAULT-NEXT:    [[TMP19:%.*]] = icmp ugt ptr [[TMP17]], [[TMP18]]
1066; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], ptr [[TMP17]], ptr [[TMP18]]
1067; DEFAULT-NEXT:    [[TMP21:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1068; DEFAULT-NEXT:    [[TMP22:%.*]] = icmp ugt ptr [[TMP20]], [[TMP21]]
1069; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], ptr [[TMP20]], ptr [[TMP21]]
1070; DEFAULT-NEXT:    ret ptr [[TMP23]]
1071;
1072; THRESH-LABEL: @maxp8(
1073; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr @arrp, align 16
1074; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
1075; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
1076; THRESH-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[TMP3]], [[TMP4]]
1077; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], ptr [[TMP3]], ptr [[TMP4]]
1078; THRESH-NEXT:    [[TMP7:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1079; THRESH-NEXT:    [[TMP8:%.*]] = icmp ugt ptr [[TMP6]], [[TMP7]]
1080; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], ptr [[TMP6]], ptr [[TMP7]]
1081; THRESH-NEXT:    [[TMP10:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1082; THRESH-NEXT:    [[TMP11:%.*]] = icmp ugt ptr [[TMP9]], [[TMP10]]
1083; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], ptr [[TMP9]], ptr [[TMP10]]
1084; THRESH-NEXT:    [[TMP13:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1085; THRESH-NEXT:    [[TMP14:%.*]] = icmp ugt ptr [[TMP12]], [[TMP13]]
1086; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], ptr [[TMP12]], ptr [[TMP13]]
1087; THRESH-NEXT:    [[TMP16:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1088; THRESH-NEXT:    [[TMP17:%.*]] = icmp ugt ptr [[TMP15]], [[TMP16]]
1089; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], ptr [[TMP15]], ptr [[TMP16]]
1090; THRESH-NEXT:    [[TMP19:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1091; THRESH-NEXT:    [[TMP20:%.*]] = icmp ugt ptr [[TMP18]], [[TMP19]]
1092; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], ptr [[TMP18]], ptr [[TMP19]]
1093; THRESH-NEXT:    [[TMP22:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1094; THRESH-NEXT:    [[TMP23:%.*]] = icmp ugt ptr [[TMP21]], [[TMP22]]
1095; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], ptr [[TMP21]], ptr [[TMP22]]
1096; THRESH-NEXT:    ret ptr [[TMP24]]
1097;
1098  %2 = load ptr, ptr @arrp, align 16
1099  %3 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
1100  %4 = icmp ugt ptr %2, %3
1101  %5 = select i1 %4, ptr %2, ptr %3
1102  %6 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
1103  %7 = icmp ugt ptr %5, %6
1104  %8 = select i1 %7, ptr %5, ptr %6
1105  %9 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
1106  %10 = icmp ugt ptr %8, %9
1107  %11 = select i1 %10, ptr %8, ptr %9
1108  %12 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
1109  %13 = icmp ugt ptr %11, %12
1110  %14 = select i1 %13, ptr %11, ptr %12
1111  %15 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
1112  %16 = icmp ugt ptr %14, %15
1113  %17 = select i1 %16, ptr %14, ptr %15
1114  %18 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
1115  %19 = icmp ugt ptr %17, %18
1116  %20 = select i1 %19, ptr %17, ptr %18
1117  %21 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
1118  %22 = icmp ugt ptr %20, %21
1119  %23 = select i1 %22, ptr %20, ptr %21
1120  ret ptr %23
1121}
1122
1123define i32 @smax_intrinsic_rdx_v8i32(ptr %p0) {
1124; CHECK-LABEL: @smax_intrinsic_rdx_v8i32(
1125; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P0:%.*]], align 4
1126; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]])
1127; CHECK-NEXT:    ret i32 [[TMP2]]
1128;
1129  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
1130  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
1131  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
1132  %p4 = getelementptr inbounds i32, ptr %p0, i64 4
1133  %p5 = getelementptr inbounds i32, ptr %p0, i64 5
1134  %p6 = getelementptr inbounds i32, ptr %p0, i64 6
1135  %p7 = getelementptr inbounds i32, ptr %p0, i64 7
1136  %t0 = load i32, ptr %p0, align 4
1137  %t1 = load i32, ptr %p1, align 4
1138  %t2 = load i32, ptr %p2, align 4
1139  %t3 = load i32, ptr %p3, align 4
1140  %t4 = load i32, ptr %p4, align 4
1141  %t5 = load i32, ptr %p5, align 4
1142  %t6 = load i32, ptr %p6, align 4
1143  %t7 = load i32, ptr %p7, align 4
1144  %m10 = tail call i32 @llvm.smax.i32(i32 %t1, i32 %t0)
1145  %m32 = tail call i32 @llvm.smax.i32(i32 %t3, i32 %t2)
1146  %m54 = tail call i32 @llvm.smax.i32(i32 %t5, i32 %t4)
1147  %m76 = tail call i32 @llvm.smax.i32(i32 %t7, i32 %t6)
1148  %m3210 = tail call i32 @llvm.smax.i32(i32 %m32, i32 %m10)
1149  %m7654 = tail call i32 @llvm.smax.i32(i32 %m76, i32 %m54)
1150  %m = tail call i32 @llvm.smax.i32(i32 %m7654, i32 %m3210)
1151  ret i32 %m
1152}
1153
1154define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
1155; CHECK-LABEL: @smin_intrinsic_rdx_v8i16(
1156; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 4
1157; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP1]])
1158; CHECK-NEXT:    ret i16 [[TMP2]]
1159;
1160  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
1161  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
1162  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
1163  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
1164  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
1165  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
1166  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
1167  %t0 = load i16, ptr %p0, align 4
1168  %t1 = load i16, ptr %p1, align 4
1169  %t2 = load i16, ptr %p2, align 4
1170  %t3 = load i16, ptr %p3, align 4
1171  %t4 = load i16, ptr %p4, align 4
1172  %t5 = load i16, ptr %p5, align 4
1173  %t6 = load i16, ptr %p6, align 4
1174  %t7 = load i16, ptr %p7, align 4
1175  %m10 = tail call i16 @llvm.smin.i16(i16 %t1, i16 %t0)
1176  %m32 = tail call i16 @llvm.smin.i16(i16 %t3, i16 %t2)
1177  %m54 = tail call i16 @llvm.smin.i16(i16 %t5, i16 %t4)
1178  %m76 = tail call i16 @llvm.smin.i16(i16 %t7, i16 %t6)
1179  %m3210 = tail call i16 @llvm.smin.i16(i16 %m32, i16 %m10)
1180  %m7654 = tail call i16 @llvm.smin.i16(i16 %m76, i16 %m54)
1181  %m = tail call i16 @llvm.smin.i16(i16 %m7654, i16 %m3210)
1182  ret i16 %m
1183}
1184
1185define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
1186; DEFAULT-LABEL: @umax_intrinsic_rdx_v4i64(
1187; DEFAULT-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
1188; DEFAULT-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
1189; DEFAULT-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
1190; DEFAULT-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
1191; DEFAULT-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
1192; DEFAULT-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
1193; DEFAULT-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
1194; DEFAULT-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
1195; DEFAULT-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
1196; DEFAULT-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
1197; DEFAULT-NEXT:    ret i64 [[M]]
1198;
1199; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
1200; THRESH-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
1201; THRESH-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
1202; THRESH-NEXT:    ret i64 [[TMP2]]
1203;
1204  %p1 = getelementptr inbounds i64, ptr %p0, i64 1
1205  %p2 = getelementptr inbounds i64, ptr %p0, i64 2
1206  %p3 = getelementptr inbounds i64, ptr %p0, i64 3
1207  %t0 = load i64, ptr %p0, align 4
1208  %t1 = load i64, ptr %p1, align 4
1209  %t2 = load i64, ptr %p2, align 4
1210  %t3 = load i64, ptr %p3, align 4
1211  %m10 = tail call i64 @llvm.umax.i64(i64 %t1, i64 %t0)
1212  %m32 = tail call i64 @llvm.umax.i64(i64 %t3, i64 %t2)
1213  %m = tail call i64 @llvm.umax.i64(i64 %m32, i64 %m10)
1214  ret i64 %m
1215}
1216
1217define i8 @umin_intrinsic_rdx_v16i8(ptr %p0) {
1218; CHECK-LABEL: @umin_intrinsic_rdx_v16i8(
1219; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 4
1220; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP1]])
1221; CHECK-NEXT:    ret i8 [[TMP2]]
1222;
1223  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
1224  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
1225  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
1226  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
1227  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
1228  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
1229  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
1230  %p8 = getelementptr inbounds i8, ptr %p0, i64 8
1231  %p9 = getelementptr inbounds i8, ptr %p0, i64 9
1232  %pa = getelementptr inbounds i8, ptr %p0, i64 10
1233  %pb = getelementptr inbounds i8, ptr %p0, i64 11
1234  %pc = getelementptr inbounds i8, ptr %p0, i64 12
1235  %pd = getelementptr inbounds i8, ptr %p0, i64 13
1236  %pe = getelementptr inbounds i8, ptr %p0, i64 14
1237  %pf = getelementptr inbounds i8, ptr %p0, i64 15
1238  %t0 = load i8, ptr %p0, align 4
1239  %t1 = load i8, ptr %p1, align 4
1240  %t2 = load i8, ptr %p2, align 4
1241  %t3 = load i8, ptr %p3, align 4
1242  %t4 = load i8, ptr %p4, align 4
1243  %t5 = load i8, ptr %p5, align 4
1244  %t6 = load i8, ptr %p6, align 4
1245  %t7 = load i8, ptr %p7, align 4
1246  %t8 = load i8, ptr %p8, align 4
1247  %t9 = load i8, ptr %p9, align 4
1248  %ta = load i8, ptr %pa, align 4
1249  %tb = load i8, ptr %pb, align 4
1250  %tc = load i8, ptr %pc, align 4
1251  %td = load i8, ptr %pd, align 4
1252  %te = load i8, ptr %pe, align 4
1253  %tf = load i8, ptr %pf, align 4
1254  %m10 = tail call i8 @llvm.umin.i8(i8 %t1, i8 %t0)
1255  %m32 = tail call i8 @llvm.umin.i8(i8 %t3, i8 %t2)
1256  %m54 = tail call i8 @llvm.umin.i8(i8 %t5, i8 %t4)
1257  %m76 = tail call i8 @llvm.umin.i8(i8 %t7, i8 %t6)
1258  %m98 = tail call i8 @llvm.umin.i8(i8 %t9, i8 %t8)
1259  %mba = tail call i8 @llvm.umin.i8(i8 %tb, i8 %ta)
1260  %mdc = tail call i8 @llvm.umin.i8(i8 %td, i8 %tc)
1261  %mfe = tail call i8 @llvm.umin.i8(i8 %tf, i8 %te)
1262  %m3210 = tail call i8 @llvm.umin.i8(i8 %m32, i8 %m10)
1263  %m7654 = tail call i8 @llvm.umin.i8(i8 %m76, i8 %m54)
1264  %mdc98 = tail call i8 @llvm.umin.i8(i8 %mdc, i8 %m98)
1265  %mfeba = tail call i8 @llvm.umin.i8(i8 %mfe, i8 %mba)
1266  %ml = tail call i8 @llvm.umin.i8(i8 %m3210, i8 %m7654)
1267  %mh = tail call i8 @llvm.umin.i8(i8 %mfeba, i8 %mdc98)
1268  %m = tail call i8 @llvm.umin.i8(i8 %mh, i8 %ml)
1269  ret i8 %m
1270}
1271
1272; This should not crash.
1273
1274define void @PR49730() {
1275; CHECK-LABEL: @PR49730(
1276; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> <i32 2, i32 2, i32 1, i32 undef>, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>)
1277; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]]
1278; CHECK-NEXT:    [[T12:%.*]] = sub nsw i32 undef, undef
1279; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]])
1280; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]])
1281; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP4]], i32 undef)
1282; CHECK-NEXT:    [[T14:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP5]], i32 93)
1283; CHECK-NEXT:    ret void
1284;
1285  %t = call i32 @llvm.smin.i32(i32 undef, i32 2)
1286  %t1 = sub nsw i32 undef, %t
1287  %t2 = call i32 @llvm.umin.i32(i32 undef, i32 %t1)
1288  %t3 = call i32 @llvm.smin.i32(i32 undef, i32 2)
1289  %t4 = sub nsw i32 undef, %t3
1290  %t5 = call i32 @llvm.umin.i32(i32 %t2, i32 %t4)
1291  %t6 = call i32 @llvm.smin.i32(i32 undef, i32 1)
1292  %t7 = sub nuw nsw i32 undef, %t6
1293  %t8 = call i32 @llvm.umin.i32(i32 %t5, i32 %t7)
1294  %t9 = call i32 @llvm.smin.i32(i32 undef, i32 1)
1295  %t10 = sub nsw i32 undef, %t9
1296  %t11 = call i32 @llvm.umin.i32(i32 %t8, i32 %t10)
1297  %t12 = sub nsw i32 undef, undef
1298  %t13 = call i32 @llvm.umin.i32(i32 %t11, i32 %t12)
1299  %t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
1300  ret void
1301}
1302