xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s
3; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6target triple = "arm64-apple-ios5.0.0"
7
8define void @select_umin_8xi16(ptr %ptr, i16 %x) {
9; CHECK-LABEL: @select_umin_8xi16(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
12; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], splat (i16 16383)
13; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
14; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
15; CHECK-NEXT:    ret void
16;
17entry:
18  %l.0 = load i16, ptr %ptr
19  %cmp.0 = icmp ult i16 %l.0, 16383
20  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
21  store i16 %s.0, ptr %ptr, align 2
22
23  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
24  %l.1 = load i16, ptr %gep.1
25  %cmp.1 = icmp ult i16 %l.1, 16383
26  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
27  store i16 %s.1, ptr %gep.1, align 2
28
29  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
30  %l.2 = load i16, ptr %gep.2
31  %cmp.2 = icmp ult i16 %l.2, 16383
32  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
33  store i16 %s.2, ptr %gep.2, align 2
34
35  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
36  %l.3 = load i16, ptr %gep.3
37  %cmp.3 = icmp ult i16 %l.3, 16383
38  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
39  store i16 %s.3, ptr %gep.3, align 2
40
41  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
42  %l.4 = load i16, ptr %gep.4
43  %cmp.4 = icmp ult i16 %l.4, 16383
44  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
45  store i16 %s.4, ptr %gep.4, align 2
46
47  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
48  %l.5 = load i16, ptr %gep.5
49  %cmp.5 = icmp ult i16 %l.5, 16383
50  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
51  store i16 %s.5, ptr %gep.5, align 2
52
53  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
54  %l.6 = load i16, ptr %gep.6
55  %cmp.6 = icmp ult i16 %l.6, 16383
56  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
57  store i16 %s.6, ptr %gep.6, align 2
58
59  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
60  %l.7 = load i16, ptr %gep.7
61  %cmp.7 = icmp ult i16 %l.7, 16383
62  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
63  store i16 %s.7, ptr %gep.7, align 2
64  ret void
65}
66
67define void @select_umin_4xi32(ptr %ptr, i32 %x) {
68; CHECK-LABEL: @select_umin_4xi32(
69; CHECK-NEXT:  entry:
70; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
71; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383)
72; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
73; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
74; CHECK-NEXT:    ret void
75;
76entry:
77  %l.0 = load i32, ptr %ptr
78  %cmp.0 = icmp ult i32 %l.0, 16383
79  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
80  store i32 %s.0, ptr %ptr, align 4
81
82  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
83  %l.1 = load i32, ptr %gep.1
84  %cmp.1 = icmp ult i32 %l.1, 16383
85  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
86  store i32 %s.1, ptr %gep.1, align 4
87
88  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
89  %l.2 = load i32, ptr %gep.2
90  %cmp.2 = icmp ult i32 %l.2, 16383
91  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
92  store i32 %s.2, ptr %gep.2, align 4
93
94  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
95  %l.3 = load i32, ptr %gep.3
96  %cmp.3 = icmp ult i32 %l.3, 16383
97  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
98  store i32 %s.3, ptr %gep.3, align 4
99
100  ret void
101}
102
103define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) {
104; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
105; CHECK-NEXT:  entry:
106; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
107; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383)
108; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383)
109; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
110; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
111; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[PTR]], align 4
112; CHECK-NEXT:    ret void
113;
114entry:
115  %l.0 = load i32, ptr %ptr
116  %cmp.0 = icmp ult i32 %l.0, 16383
117  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
118  store i32 %s.0, ptr %ptr, align 4
119
120  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
121  %l.1 = load i32, ptr %gep.1
122  %cmp.1 = icmp ugt i32 %l.1, 16383
123  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
124  store i32 %s.1, ptr %gep.1, align 4
125
126  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
127  %l.2 = load i32, ptr %gep.2
128  %cmp.2 = icmp ult i32 %l.2, 16383
129  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
130  store i32 %s.2, ptr %gep.2, align 4
131
132  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
133  %l.3 = load i32, ptr %gep.3
134  %cmp.3 = icmp ugt i32 %l.3, 16383
135  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
136  store i32 %s.3, ptr %gep.3, align 4
137
138  ret void
139}
140
141; There is no <2 x i64> version of umin, but we can efficiently lower
142; compare/select pairs with uniform predicates.
143define void @select_umin_2xi64(ptr %ptr, i64 %x) {
144; CHECK-LABEL: @select_umin_2xi64(
145; CHECK-NEXT:  entry:
146; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
147; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 16383)
148; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
149; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
150; CHECK-NEXT:    ret void
151;
152entry:
153  %l.0 = load i64, ptr %ptr
154  %cmp.0 = icmp ult i64 %l.0, 16383
155  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
156  store i64 %s.0, ptr %ptr, align 4
157
158  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
159  %l.1 = load i64, ptr %gep.1
160  %cmp.1 = icmp ult i64 %l.1, 16383
161  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
162  store i64 %s.1, ptr %gep.1, align 4
163
164  ret void
165}
166
167
168define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) {
169; CHECK-LABEL: @select_umin_ule_8xi16(
170; CHECK-NEXT:  entry:
171; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
172; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], splat (i16 16383)
173; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
174; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
175; CHECK-NEXT:    ret void
176;
177entry:
178  %l.0 = load i16, ptr %ptr
179  %cmp.0 = icmp ule i16 %l.0, 16383
180  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
181  store i16 %s.0, ptr %ptr, align 2
182
183  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
184  %l.1 = load i16, ptr %gep.1
185  %cmp.1 = icmp ule i16 %l.1, 16383
186  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
187  store i16 %s.1, ptr %gep.1, align 2
188
189  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
190  %l.2 = load i16, ptr %gep.2
191  %cmp.2 = icmp ule i16 %l.2, 16383
192  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
193  store i16 %s.2, ptr %gep.2, align 2
194
195  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
196  %l.3 = load i16, ptr %gep.3
197  %cmp.3 = icmp ule i16 %l.3, 16383
198  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
199  store i16 %s.3, ptr %gep.3, align 2
200
201  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
202  %l.4 = load i16, ptr %gep.4
203  %cmp.4 = icmp ule i16 %l.4, 16383
204  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
205  store i16 %s.4, ptr %gep.4, align 2
206
207  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
208  %l.5 = load i16, ptr %gep.5
209  %cmp.5 = icmp ule i16 %l.5, 16383
210  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
211  store i16 %s.5, ptr %gep.5, align 2
212
213  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
214  %l.6 = load i16, ptr %gep.6
215  %cmp.6 = icmp ule i16 %l.6, 16383
216  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
217  store i16 %s.6, ptr %gep.6, align 2
218
219  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
220  %l.7 = load i16, ptr %gep.7
221  %cmp.7 = icmp ule i16 %l.7, 16383
222  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
223  store i16 %s.7, ptr %gep.7, align 2
224  ret void
225}
226
227define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) {
228; CHECK-LABEL: @select_umin_ule_4xi32(
229; CHECK-NEXT:  entry:
230; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
231; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], splat (i32 16383)
232; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
233; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
234; CHECK-NEXT:    ret void
235;
236entry:
237  %l.0 = load i32, ptr %ptr
238  %cmp.0 = icmp ule i32 %l.0, 16383
239  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
240  store i32 %s.0, ptr %ptr, align 4
241
242  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
243  %l.1 = load i32, ptr %gep.1
244  %cmp.1 = icmp ule i32 %l.1, 16383
245  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
246  store i32 %s.1, ptr %gep.1, align 4
247
248  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
249  %l.2 = load i32, ptr %gep.2
250  %cmp.2 = icmp ule i32 %l.2, 16383
251  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
252  store i32 %s.2, ptr %gep.2, align 4
253
254  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
255  %l.3 = load i32, ptr %gep.3
256  %cmp.3 = icmp ule i32 %l.3, 16383
257  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
258  store i32 %s.3, ptr %gep.3, align 4
259
260  ret void
261}
262
263; There is no <2 x i64> version of umin, but we can efficiently lower
264; compare/select pairs with uniform predicates.
265define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) {
266; CHECK-LABEL: @select_umin_ule_2xi64(
267; CHECK-NEXT:  entry:
268; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
269; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], splat (i64 16383)
270; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
271; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
272; CHECK-NEXT:    ret void
273;
274entry:
275  %l.0 = load i64, ptr %ptr
276  %cmp.0 = icmp ule i64 %l.0, 16383
277  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
278  store i64 %s.0, ptr %ptr, align 4
279
280  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
281  %l.1 = load i64, ptr %gep.1
282  %cmp.1 = icmp ule i64 %l.1, 16383
283  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
284  store i64 %s.1, ptr %gep.1, align 4
285
286  ret void
287}
288
289define void @select_smin_8xi16(ptr %ptr, i16 %x) {
290; CHECK-LABEL: @select_smin_8xi16(
291; CHECK-NEXT:  entry:
292; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
293; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], splat (i16 16383)
294; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
295; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
296; CHECK-NEXT:    ret void
297;
298entry:
299  %l.0 = load i16, ptr %ptr
300  %cmp.0 = icmp slt i16 %l.0, 16383
301  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
302  store i16 %s.0, ptr %ptr, align 2
303
304  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
305  %l.1 = load i16, ptr %gep.1
306  %cmp.1 = icmp slt i16 %l.1, 16383
307  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
308  store i16 %s.1, ptr %gep.1, align 2
309
310  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
311  %l.2 = load i16, ptr %gep.2
312  %cmp.2 = icmp slt i16 %l.2, 16383
313  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
314  store i16 %s.2, ptr %gep.2, align 2
315
316  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
317  %l.3 = load i16, ptr %gep.3
318  %cmp.3 = icmp slt i16 %l.3, 16383
319  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
320  store i16 %s.3, ptr %gep.3, align 2
321
322  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
323  %l.4 = load i16, ptr %gep.4
324  %cmp.4 = icmp slt i16 %l.4, 16383
325  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
326  store i16 %s.4, ptr %gep.4, align 2
327
328  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
329  %l.5 = load i16, ptr %gep.5
330  %cmp.5 = icmp slt i16 %l.5, 16383
331  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
332  store i16 %s.5, ptr %gep.5, align 2
333
334  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
335  %l.6 = load i16, ptr %gep.6
336  %cmp.6 = icmp slt i16 %l.6, 16383
337  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
338  store i16 %s.6, ptr %gep.6, align 2
339
340  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
341  %l.7 = load i16, ptr %gep.7
342  %cmp.7 = icmp slt i16 %l.7, 16383
343  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
344  store i16 %s.7, ptr %gep.7, align 2
345  ret void
346}
347
348define void @select_smin_4xi32(ptr %ptr, i32 %x) {
349; CHECK-LABEL: @select_smin_4xi32(
350; CHECK-NEXT:  entry:
351; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
352; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], splat (i32 16383)
353; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
354; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
355; CHECK-NEXT:    ret void
356;
357entry:
358  %l.0 = load i32, ptr %ptr
359  %cmp.0 = icmp slt i32 %l.0, 16383
360  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
361  store i32 %s.0, ptr %ptr, align 4
362
363  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
364  %l.1 = load i32, ptr %gep.1
365  %cmp.1 = icmp slt i32 %l.1, 16383
366  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
367  store i32 %s.1, ptr %gep.1, align 4
368
369  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
370  %l.2 = load i32, ptr %gep.2
371  %cmp.2 = icmp slt i32 %l.2, 16383
372  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
373  store i32 %s.2, ptr %gep.2, align 4
374
375  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
376  %l.3 = load i32, ptr %gep.3
377  %cmp.3 = icmp slt i32 %l.3, 16383
378  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
379  store i32 %s.3, ptr %gep.3, align 4
380
381  ret void
382}
383
384; There is no <2 x i64> version of smin, but we can efficiently lower
385; compare/select pairs with uniform predicates.
386define void @select_smin_2xi64(ptr %ptr, i64 %x) {
387; CHECK-LABEL: @select_smin_2xi64(
388; CHECK-NEXT:  entry:
389; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
390; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], splat (i64 16383)
391; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
392; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
393; CHECK-NEXT:    ret void
394;
395entry:
396  %l.0 = load i64, ptr %ptr
397  %cmp.0 = icmp slt i64 %l.0, 16383
398  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
399  store i64 %s.0, ptr %ptr, align 4
400
401  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
402  %l.1 = load i64, ptr %gep.1
403  %cmp.1 = icmp slt i64 %l.1, 16383
404  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
405  store i64 %s.1, ptr %gep.1, align 4
406
407  ret void
408}
409
410define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) {
411; CHECK-LABEL: @select_smin_sle_8xi16(
412; CHECK-NEXT:  entry:
413; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
414; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], splat (i16 16383)
415; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
416; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
417; CHECK-NEXT:    ret void
418;
419entry:
420  %l.0 = load i16, ptr %ptr
421  %cmp.0 = icmp sle i16 %l.0, 16383
422  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
423  store i16 %s.0, ptr %ptr, align 2
424
425  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
426  %l.1 = load i16, ptr %gep.1
427  %cmp.1 = icmp sle i16 %l.1, 16383
428  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
429  store i16 %s.1, ptr %gep.1, align 2
430
431  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
432  %l.2 = load i16, ptr %gep.2
433  %cmp.2 = icmp sle i16 %l.2, 16383
434  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
435  store i16 %s.2, ptr %gep.2, align 2
436
437  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
438  %l.3 = load i16, ptr %gep.3
439  %cmp.3 = icmp sle i16 %l.3, 16383
440  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
441  store i16 %s.3, ptr %gep.3, align 2
442
443  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
444  %l.4 = load i16, ptr %gep.4
445  %cmp.4 = icmp sle i16 %l.4, 16383
446  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
447  store i16 %s.4, ptr %gep.4, align 2
448
449  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
450  %l.5 = load i16, ptr %gep.5
451  %cmp.5 = icmp sle i16 %l.5, 16383
452  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
453  store i16 %s.5, ptr %gep.5, align 2
454
455  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
456  %l.6 = load i16, ptr %gep.6
457  %cmp.6 = icmp sle i16 %l.6, 16383
458  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
459  store i16 %s.6, ptr %gep.6, align 2
460
461  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
462  %l.7 = load i16, ptr %gep.7
463  %cmp.7 = icmp sle i16 %l.7, 16383
464  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
465  store i16 %s.7, ptr %gep.7, align 2
466  ret void
467}
468
469define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) {
470; CHECK-LABEL: @select_smin_sle_4xi32(
471; CHECK-NEXT:  entry:
472; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
473; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], splat (i32 16383)
474; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
475; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
476; CHECK-NEXT:    ret void
477;
478entry:
479  %l.0 = load i32, ptr %ptr
480  %cmp.0 = icmp sle i32 %l.0, 16383
481  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
482  store i32 %s.0, ptr %ptr, align 4
483
484  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
485  %l.1 = load i32, ptr %gep.1
486  %cmp.1 = icmp sle i32 %l.1, 16383
487  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
488  store i32 %s.1, ptr %gep.1, align 4
489
490  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
491  %l.2 = load i32, ptr %gep.2
492  %cmp.2 = icmp sle i32 %l.2, 16383
493  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
494  store i32 %s.2, ptr %gep.2, align 4
495
496  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
497  %l.3 = load i32, ptr %gep.3
498  %cmp.3 = icmp sle i32 %l.3, 16383
499  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
500  store i32 %s.3, ptr %gep.3, align 4
501
502  ret void
503}
504
505; There is no <2 x i64> version of smin, but we can efficiently lower
506; compare/select pairs with uniform predicates.
507define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) {
508; CHECK-LABEL: @select_smin_sle_2xi64(
509; CHECK-NEXT:  entry:
510; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
511; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], splat (i64 16383)
512; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
513; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
514; CHECK-NEXT:    ret void
515;
516entry:
517  %l.0 = load i64, ptr %ptr
518  %cmp.0 = icmp sle i64 %l.0, 16383
519  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
520  store i64 %s.0, ptr %ptr, align 4
521
522  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
523  %l.1 = load i64, ptr %gep.1
524  %cmp.1 = icmp sle i64 %l.1, 16383
525  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
526  store i64 %s.1, ptr %gep.1, align 4
527
528  ret void
529}
530define void @select_umax_8xi16(ptr %ptr, i16 %x) {
531; CHECK-LABEL: @select_umax_8xi16(
532; CHECK-NEXT:  entry:
533; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
534; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], splat (i16 16383)
535; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
536; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
537; CHECK-NEXT:    ret void
538;
539entry:
540  %l.0 = load i16, ptr %ptr
541  %cmp.0 = icmp ugt i16 %l.0, 16383
542  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
543  store i16 %s.0, ptr %ptr, align 2
544
545  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
546  %l.1 = load i16, ptr %gep.1
547  %cmp.1 = icmp ugt i16 %l.1, 16383
548  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
549  store i16 %s.1, ptr %gep.1, align 2
550
551  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
552  %l.2 = load i16, ptr %gep.2
553  %cmp.2 = icmp ugt i16 %l.2, 16383
554  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
555  store i16 %s.2, ptr %gep.2, align 2
556
557  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
558  %l.3 = load i16, ptr %gep.3
559  %cmp.3 = icmp ugt i16 %l.3, 16383
560  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
561  store i16 %s.3, ptr %gep.3, align 2
562
563  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
564  %l.4 = load i16, ptr %gep.4
565  %cmp.4 = icmp ugt i16 %l.4, 16383
566  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
567  store i16 %s.4, ptr %gep.4, align 2
568
569  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
570  %l.5 = load i16, ptr %gep.5
571  %cmp.5 = icmp ugt i16 %l.5, 16383
572  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
573  store i16 %s.5, ptr %gep.5, align 2
574
575  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
576  %l.6 = load i16, ptr %gep.6
577  %cmp.6 = icmp ugt i16 %l.6, 16383
578  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
579  store i16 %s.6, ptr %gep.6, align 2
580
581  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
582  %l.7 = load i16, ptr %gep.7
583  %cmp.7 = icmp ugt i16 %l.7, 16383
584  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
585  store i16 %s.7, ptr %gep.7, align 2
586  ret void
587}
588
589define void @select_umax_4xi32(ptr %ptr, i32 %x) {
590; CHECK-LABEL: @select_umax_4xi32(
591; CHECK-NEXT:  entry:
592; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
593; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383)
594; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
595; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
596; CHECK-NEXT:    ret void
597;
598entry:
599  %l.0 = load i32, ptr %ptr
600  %cmp.0 = icmp ugt i32 %l.0, 16383
601  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
602  store i32 %s.0, ptr %ptr, align 4
603
604  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
605  %l.1 = load i32, ptr %gep.1
606  %cmp.1 = icmp ugt i32 %l.1, 16383
607  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
608  store i32 %s.1, ptr %gep.1, align 4
609
610  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
611  %l.2 = load i32, ptr %gep.2
612  %cmp.2 = icmp ugt i32 %l.2, 16383
613  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
614  store i32 %s.2, ptr %gep.2, align 4
615
616  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
617  %l.3 = load i32, ptr %gep.3
618  %cmp.3 = icmp ugt i32 %l.3, 16383
619  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
620  store i32 %s.3, ptr %gep.3, align 4
621
622  ret void
623}
624
625; There is no <2 x i64> version of umax, but we can efficiently lower
626; compare/select pairs with uniform predicates.
627define void @select_umax_2xi64(ptr %ptr, i64 %x) {
628; CHECK-LABEL: @select_umax_2xi64(
629; CHECK-NEXT:  entry:
630; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
631; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], splat (i64 16383)
632; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
633; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
634; CHECK-NEXT:    ret void
635;
636entry:
637  %l.0 = load i64, ptr %ptr
638  %cmp.0 = icmp ugt i64 %l.0, 16383
639  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
640  store i64 %s.0, ptr %ptr, align 4
641
642  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
643  %l.1 = load i64, ptr %gep.1
644  %cmp.1 = icmp ugt i64 %l.1, 16383
645  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
646  store i64 %s.1, ptr %gep.1, align 4
647
648  ret void
649}
650
651define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) {
652; CHECK-LABEL: @select_umax_uge_8xi16(
653; CHECK-NEXT:  entry:
654; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
655; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], splat (i16 16383)
656; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
657; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
658; CHECK-NEXT:    ret void
659;
660entry:
661  %l.0 = load i16, ptr %ptr
662  %cmp.0 = icmp uge i16 %l.0, 16383
663  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
664  store i16 %s.0, ptr %ptr, align 2
665
666  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
667  %l.1 = load i16, ptr %gep.1
668  %cmp.1 = icmp uge i16 %l.1, 16383
669  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
670  store i16 %s.1, ptr %gep.1, align 2
671
672  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
673  %l.2 = load i16, ptr %gep.2
674  %cmp.2 = icmp uge i16 %l.2, 16383
675  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
676  store i16 %s.2, ptr %gep.2, align 2
677
678  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
679  %l.3 = load i16, ptr %gep.3
680  %cmp.3 = icmp uge i16 %l.3, 16383
681  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
682  store i16 %s.3, ptr %gep.3, align 2
683
684  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
685  %l.4 = load i16, ptr %gep.4
686  %cmp.4 = icmp uge i16 %l.4, 16383
687  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
688  store i16 %s.4, ptr %gep.4, align 2
689
690  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
691  %l.5 = load i16, ptr %gep.5
692  %cmp.5 = icmp uge i16 %l.5, 16383
693  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
694  store i16 %s.5, ptr %gep.5, align 2
695
696  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
697  %l.6 = load i16, ptr %gep.6
698  %cmp.6 = icmp uge i16 %l.6, 16383
699  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
700  store i16 %s.6, ptr %gep.6, align 2
701
702  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
703  %l.7 = load i16, ptr %gep.7
704  %cmp.7 = icmp uge i16 %l.7, 16383
705  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
706  store i16 %s.7, ptr %gep.7, align 2
707  ret void
708}
709
710define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) {
711; CHECK-LABEL: @select_umax_uge_4xi32(
712; CHECK-NEXT:  entry:
713; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
714; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], splat (i32 16383)
715; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
716; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
717; CHECK-NEXT:    ret void
718;
719entry:
720  %l.0 = load i32, ptr %ptr
721  %cmp.0 = icmp uge i32 %l.0, 16383
722  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
723  store i32 %s.0, ptr %ptr, align 4
724
725  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
726  %l.1 = load i32, ptr %gep.1
727  %cmp.1 = icmp uge i32 %l.1, 16383
728  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
729  store i32 %s.1, ptr %gep.1, align 4
730
731  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
732  %l.2 = load i32, ptr %gep.2
733  %cmp.2 = icmp uge i32 %l.2, 16383
734  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
735  store i32 %s.2, ptr %gep.2, align 4
736
737  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
738  %l.3 = load i32, ptr %gep.3
739  %cmp.3 = icmp uge i32 %l.3, 16383
740  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
741  store i32 %s.3, ptr %gep.3, align 4
742
743  ret void
744}
745
746; There is no <2 x i64> version of umax, but we can efficiently lower
747; compare/select pairs with uniform predicates.
748define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) {
749; CHECK-LABEL: @select_umax_uge_2xi64(
750; CHECK-NEXT:  entry:
751; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
752; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], splat (i64 16383)
753; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
754; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
755; CHECK-NEXT:    ret void
756;
757entry:
758  %l.0 = load i64, ptr %ptr
759  %cmp.0 = icmp uge i64 %l.0, 16383
760  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
761  store i64 %s.0, ptr %ptr, align 4
762
763  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
764  %l.1 = load i64, ptr %gep.1
765  %cmp.1 = icmp uge i64 %l.1, 16383
766  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
767  store i64 %s.1, ptr %gep.1, align 4
768
769  ret void
770}
771
772define void @select_smax_8xi16(ptr %ptr, i16 %x) {
773; CHECK-LABEL: @select_smax_8xi16(
774; CHECK-NEXT:  entry:
775; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
776; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], splat (i16 16383)
777; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
778; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
779; CHECK-NEXT:    ret void
780;
781entry:
782  %l.0 = load i16, ptr %ptr
783  %cmp.0 = icmp sgt i16 %l.0, 16383
784  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
785  store i16 %s.0, ptr %ptr, align 2
786
787  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
788  %l.1 = load i16, ptr %gep.1
789  %cmp.1 = icmp sgt i16 %l.1, 16383
790  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
791  store i16 %s.1, ptr %gep.1, align 2
792
793  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
794  %l.2 = load i16, ptr %gep.2
795  %cmp.2 = icmp sgt i16 %l.2, 16383
796  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
797  store i16 %s.2, ptr %gep.2, align 2
798
799  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
800  %l.3 = load i16, ptr %gep.3
801  %cmp.3 = icmp sgt i16 %l.3, 16383
802  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
803  store i16 %s.3, ptr %gep.3, align 2
804
805  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
806  %l.4 = load i16, ptr %gep.4
807  %cmp.4 = icmp sgt i16 %l.4, 16383
808  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
809  store i16 %s.4, ptr %gep.4, align 2
810
811  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
812  %l.5 = load i16, ptr %gep.5
813  %cmp.5 = icmp sgt i16 %l.5, 16383
814  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
815  store i16 %s.5, ptr %gep.5, align 2
816
817  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
818  %l.6 = load i16, ptr %gep.6
819  %cmp.6 = icmp sgt i16 %l.6, 16383
820  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
821  store i16 %s.6, ptr %gep.6, align 2
822
823  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
824  %l.7 = load i16, ptr %gep.7
825  %cmp.7 = icmp sgt i16 %l.7, 16383
826  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
827  store i16 %s.7, ptr %gep.7, align 2
828  ret void
829}
830
831define void @select_smax_4xi32(ptr %ptr, i32 %x) {
832; CHECK-LABEL: @select_smax_4xi32(
833; CHECK-NEXT:  entry:
834; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
835; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], splat (i32 16383)
836; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
837; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
838; CHECK-NEXT:    ret void
839;
840entry:
841  %l.0 = load i32, ptr %ptr
842  %cmp.0 = icmp sgt i32 %l.0, 16383
843  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
844  store i32 %s.0, ptr %ptr, align 4
845
846  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
847  %l.1 = load i32, ptr %gep.1
848  %cmp.1 = icmp sgt i32 %l.1, 16383
849  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
850  store i32 %s.1, ptr %gep.1, align 4
851
852  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
853  %l.2 = load i32, ptr %gep.2
854  %cmp.2 = icmp sgt i32 %l.2, 16383
855  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
856  store i32 %s.2, ptr %gep.2, align 4
857
858  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
859  %l.3 = load i32, ptr %gep.3
860  %cmp.3 = icmp sgt i32 %l.3, 16383
861  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
862  store i32 %s.3, ptr %gep.3, align 4
863
864  ret void
865}
866
867; There is no <2 x i64> version of smax, but we can efficiently lower
868; compare/select pairs with uniform predicates.
869define void @select_smax_2xi64(ptr %ptr, i64 %x) {
870; CHECK-LABEL: @select_smax_2xi64(
871; CHECK-NEXT:  entry:
872; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
873; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 16383)
874; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
875; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
876; CHECK-NEXT:    ret void
877;
878entry:
879  %l.0 = load i64, ptr %ptr
880  %cmp.0 = icmp sgt i64 %l.0, 16383
881  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
882  store i64 %s.0, ptr %ptr, align 4
883
884  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
885  %l.1 = load i64, ptr %gep.1
886  %cmp.1 = icmp sgt i64 %l.1, 16383
887  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
888  store i64 %s.1, ptr %gep.1, align 4
889
890  ret void
891}
892
893
894define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) {
895; CHECK-LABEL: @select_smax_sge_8xi16(
896; CHECK-NEXT:  entry:
897; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
898; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], splat (i16 16383)
899; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
900; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
901; CHECK-NEXT:    ret void
902;
903entry:
904  %l.0 = load i16, ptr %ptr
905  %cmp.0 = icmp sge i16 %l.0, 16383
906  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
907  store i16 %s.0, ptr %ptr, align 2
908
909  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
910  %l.1 = load i16, ptr %gep.1
911  %cmp.1 = icmp sge i16 %l.1, 16383
912  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
913  store i16 %s.1, ptr %gep.1, align 2
914
915  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
916  %l.2 = load i16, ptr %gep.2
917  %cmp.2 = icmp sge i16 %l.2, 16383
918  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
919  store i16 %s.2, ptr %gep.2, align 2
920
921  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
922  %l.3 = load i16, ptr %gep.3
923  %cmp.3 = icmp sge i16 %l.3, 16383
924  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
925  store i16 %s.3, ptr %gep.3, align 2
926
927  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
928  %l.4 = load i16, ptr %gep.4
929  %cmp.4 = icmp sge i16 %l.4, 16383
930  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
931  store i16 %s.4, ptr %gep.4, align 2
932
933  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
934  %l.5 = load i16, ptr %gep.5
935  %cmp.5 = icmp sge i16 %l.5, 16383
936  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
937  store i16 %s.5, ptr %gep.5, align 2
938
939  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
940  %l.6 = load i16, ptr %gep.6
941  %cmp.6 = icmp sge i16 %l.6, 16383
942  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
943  store i16 %s.6, ptr %gep.6, align 2
944
945  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
946  %l.7 = load i16, ptr %gep.7
947  %cmp.7 = icmp sge i16 %l.7, 16383
948  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
949  store i16 %s.7, ptr %gep.7, align 2
950  ret void
951}
952
953define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) {
954; CHECK-LABEL: @select_smax_sge_4xi32(
955; CHECK-NEXT:  entry:
956; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
957; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], splat (i32 16383)
958; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
959; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
960; CHECK-NEXT:    ret void
961;
962entry:
963  %l.0 = load i32, ptr %ptr
964  %cmp.0 = icmp sge i32 %l.0, 16383
965  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
966  store i32 %s.0, ptr %ptr, align 4
967
968  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
969  %l.1 = load i32, ptr %gep.1
970  %cmp.1 = icmp sge i32 %l.1, 16383
971  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
972  store i32 %s.1, ptr %gep.1, align 4
973
974  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
975  %l.2 = load i32, ptr %gep.2
976  %cmp.2 = icmp sge i32 %l.2, 16383
977  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
978  store i32 %s.2, ptr %gep.2, align 4
979
980  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
981  %l.3 = load i32, ptr %gep.3
982  %cmp.3 = icmp sge i32 %l.3, 16383
983  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
984  store i32 %s.3, ptr %gep.3, align 4
985
986  ret void
987}
988
989; There is no <2 x i64> version of smax, but we can efficiently lower
990; compare/select pairs with uniform predicates.
991define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) {
992; CHECK-LABEL: @select_smax_sge_2xi64(
993; CHECK-NEXT:  entry:
994; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
995; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], splat (i64 16383)
996; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
997; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
998; CHECK-NEXT:    ret void
999;
1000entry:
1001  %l.0 = load i64, ptr %ptr
1002  %cmp.0 = icmp sge i64 %l.0, 16383
1003  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
1004  store i64 %s.0, ptr %ptr, align 4
1005
1006  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
1007  %l.1 = load i64, ptr %gep.1
1008  %cmp.1 = icmp sge i64 %l.1, 16383
1009  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
1010  store i64 %s.1, ptr %gep.1, align 4
1011
1012  ret void
1013}
1014